In [1]:
from tab_forge.dataset import Dataset
import pandas as pd

In [2]:
df = pd.read_csv("abalone.csv")

In [3]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


Покрутим полностью зарегестрированный датасет

In [4]:
dataset = Dataset(
    df,
    target="Rings",
    task_type="regression",
    categorical_features=["Sex"],
    numerical_features=["Length", "Diameter", "Height", "Whole weight", "Shucked weight", "Viscera weight", "Shell weight"])

In [5]:
dataset

Dataset(samples=4177, features=8, task='regression')

In [6]:
dataset.summary()

{'shape': (4177, 9),
 'task_type': 'regression',
 'target': 'Rings',
 'n_numerical': 7,
 'n_categorical': 1,
 'n_unregistered': 0,
 'missing_values': {'Sex': 0,
  'Length': 0,
  'Diameter': 0,
  'Height': 0,
  'Whole weight': 0,
  'Shucked weight': 0,
  'Viscera weight': 0,
  'Shell weight': 0,
  'Rings': 0}}

In [7]:
dataset.get_registered_features()

['Shucked weight',
 'Sex',
 'Shell weight',
 'Height',
 'Diameter',
 'Length',
 'Whole weight',
 'Viscera weight']

In [8]:
print("Категориальные признаки",dataset.get_categorical_features())
print("Числовые признаки",dataset.get_numerical_features())

Категориальные признаки ['Sex']
Числовые признаки ['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight']


In [9]:
dataset.get_registered_data().head()

Unnamed: 0,Shucked weight,Sex,Shell weight,Height,Diameter,Length,Whole weight,Viscera weight,Rings
0,0.2245,M,0.15,0.095,0.365,0.455,0.514,0.101,15
1,0.0995,M,0.07,0.09,0.265,0.35,0.2255,0.0485,7
2,0.2565,F,0.21,0.135,0.42,0.53,0.677,0.1415,9
3,0.2155,M,0.155,0.125,0.365,0.44,0.516,0.114,10
4,0.0895,I,0.055,0.08,0.255,0.33,0.205,0.0395,7


In [10]:
dataset.get_registered_data().shape

(4177, 9)

Покрутим датасет с неполными зарегестрированными данными.

In [11]:
dataset = Dataset(
    df,
    target="Rings",
    task_type="regression",
    categorical_features=[],
    numerical_features=["Length", "Diameter", "Height", "Whole weight", "Shell weight"])

In [12]:
dataset

Dataset(samples=4177, features=8, task='regression')

In [13]:
dataset.summary()

{'shape': (4177, 9),
 'task_type': 'regression',
 'target': 'Rings',
 'n_numerical': 5,
 'n_categorical': 0,
 'n_unregistered': 3,
 'missing_values': {'Sex': 0,
  'Length': 0,
  'Diameter': 0,
  'Height': 0,
  'Whole weight': 0,
  'Shucked weight': 0,
  'Viscera weight': 0,
  'Shell weight': 0,
  'Rings': 0}}

In [14]:
dataset.get_registered_features()

['Shell weight', 'Height', 'Diameter', 'Length', 'Whole weight']

In [15]:
print("Категориальные признаки",dataset.get_categorical_features())
print("Числовые признаки",dataset.get_numerical_features())

Категориальные признаки []
Числовые признаки ['Length', 'Diameter', 'Height', 'Whole weight', 'Shell weight']


In [16]:
dataset.get_registered_data().shape

(4177, 6)

In [17]:
dataset.get_registered_data().head()

Unnamed: 0,Shell weight,Height,Diameter,Length,Whole weight,Rings
0,0.15,0.095,0.365,0.455,0.514,15
1,0.07,0.09,0.265,0.35,0.2255,7
2,0.21,0.135,0.42,0.53,0.677,9
3,0.155,0.125,0.365,0.44,0.516,10
4,0.055,0.08,0.255,0.33,0.205,7


In [18]:
dataset.get_data().head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [19]:
dataset.get_data().shape

(4177, 9)