# Abalone Example

## Reading Data

In [13]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR

In [26]:
df = pd.read_csv('https://goz39a.s3.eu-central-1.amazonaws.com/abalone.csv',header=None)

In [15]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [16]:
X, y = df.values[:,0:-1], df.values[:,-1]
print(X.shape, y.shape)

(4177, 8) (4177,)


## Variable Types

In [17]:
datatypes = np.array(df.dtypes)
X_type = datatypes[0:-1]
print(X_type)

[dtype('O') dtype('float64') dtype('float64') dtype('float64')
 dtype('float64') dtype('float64') dtype('float64') dtype('float64')]


In [6]:
numerical_ix = X_type==np.dtype('float64')
numerical_cols = np.where(numerical_ix)

categorical_ix = X_type==np.dtype('O')
categorical_cols = np.where(categorical_ix)

print('categorical cols:',categorical_cols[0])
print('numerical cols:',numerical_cols[0])

categorical cols: [0]
numerical cols: [1 2 3 4 5 6 7]


## Create Transformer

In [7]:
t = [('cat', OneHotEncoder(), categorical_cols[0]), ('num', MinMaxScaler(), numerical_cols[0])]
col_transform = ColumnTransformer(transformers=t)

In [8]:
X_transformed = col_transform.fit_transform(X)
print(X_transformed.shape)

(4177, 10)


In [9]:
pd.DataFrame(X_transformed).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,1.0,0.513514,0.521008,0.084071,0.181335,0.150303,0.132324,0.147982
1,0.0,0.0,1.0,0.371622,0.352941,0.079646,0.079157,0.066241,0.063199,0.068261
2,1.0,0.0,0.0,0.614865,0.613445,0.119469,0.239065,0.171822,0.185648,0.207773
3,0.0,0.0,1.0,0.493243,0.521008,0.110619,0.182044,0.14425,0.14944,0.152965
4,0.0,1.0,0.0,0.344595,0.336134,0.070796,0.071897,0.059516,0.05135,0.053313
