In [40]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import seaborn as sns
import sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, Normalizer, StandardScaler

In [41]:
titanic_df = sns.load_dataset('titanic')

In [42]:
cont_features = ['age','fare','sibsp', 'pclass']
cat_features = ['embarked', 'class', 'who','sex']

In [43]:
titanic_df.head()
titanic_df['age_transformed'] = StandardScaler().fit_transform(SimpleImputer().fit_transform(titanic_df[['age']]))
titanic_df['fare_transformed'] = StandardScaler().fit_transform(SimpleImputer().fit_transform(titanic_df[['fare']]))

titanic_df = pd.concat([titanic_df,pd.get_dummies(titanic_df[cat_features], dtype=float, drop_first=True)], axis=1)

In [44]:
titanic_df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,...,alone,age_transformed,fare_transformed,embarked_Q,embarked_S,class_Second,class_Third,who_man,who_woman,sex_male
0,0,3,male,22.0,1,0,7.25,S,Third,man,...,False,-0.592481,-0.502445,0.0,1.0,0.0,1.0,1.0,0.0,1.0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,...,False,0.638789,0.786845,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,...,True,-0.284663,-0.488854,0.0,1.0,0.0,1.0,0.0,1.0,0.0
3,1,1,female,35.0,1,0,53.1,S,First,woman,...,False,0.407926,0.42073,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,0,3,male,35.0,0,0,8.05,S,Third,man,...,True,0.407926,-0.486337,0.0,1.0,0.0,1.0,1.0,0.0,1.0


In [45]:
train_df, test_df = train_test_split(titanic_df, train_size= 0.8)
print(train_df.shape, test_df.shape)

(712, 24) (179, 24)


In [66]:
class MyLogRegModel(nn.Module):
    def __init__(self, num_params):
        super().__init__()
        self.linear_layer = nn.Linear(num_params, 1)
    
    def forward(self,x):
        return(self.linear_layer(x))


In [47]:
train_df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,...,alone,age_transformed,fare_transformed,embarked_Q,embarked_S,class_Second,class_Third,who_man,who_woman,sex_male
400,1,3,male,39.0,0,0,7.925,S,Third,man,...,True,0.715743,-0.488854,0.0,1.0,0.0,1.0,1.0,0.0,1.0
63,0,3,male,4.0,3,2,27.9,S,Third,child,...,False,-1.977659,-0.086664,0.0,1.0,0.0,1.0,0.0,0.0,1.0
722,0,2,male,34.0,0,0,13.0,S,Second,man,...,True,0.330972,-0.386671,0.0,1.0,1.0,0.0,1.0,0.0,1.0
565,0,3,male,24.0,2,0,24.15,S,Third,man,...,False,-0.438572,-0.162169,0.0,1.0,0.0,1.0,1.0,0.0,1.0
628,0,3,male,26.0,0,0,7.8958,S,Third,man,...,True,-0.284663,-0.489442,0.0,1.0,0.0,1.0,1.0,0.0,1.0


In [48]:
train_df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone', 'age_transformed', 'fare_transformed', 'embarked_Q',
       'embarked_S', 'class_Second', 'class_Third', 'who_man', 'who_woman',
       'sex_male'],
      dtype='object')

In [None]:
train_df.shape[0]

712

In [55]:
#torch.tensor(train_df['survived'].to_numpy())

In [57]:
class TitanicDataset(Dataset):
    def __init__(self,df):
        super().__init__()
        self.df = df
        self.target = torch.tensor(df['survived'].to_numpy())
        self.features = torch.tensor(df[[ 'age_transformed', 'fare_transformed', 'embarked_Q',
       'embarked_S', 'class_Second', 'class_Third', 'who_man', 'who_woman',
       'sex_male']].to_numpy(), dtype=torch.float32)
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index):
        return {
            'target' : self.target[index],
            'features': self.features[index]
        }
titanic_train_torch_dataset =   TitanicDataset(train_df)

In [59]:
train_loader = DataLoader(titanic_train_torch_dataset, batch_size=16,shuffle=True)

In [61]:
next(iter(train_loader))['features'].shape

torch.Size([16, 9])

In [64]:
next(iter(train_loader))['features'].shape[-1]

9

In [68]:
thisLogRegModel = MyLogRegModel(next(iter(train_loader))['features'].shape[-1])

thisLogRegModel.eval()
with torch.no_grad():
    out = thisLogRegModel(next(iter(train_loader))['features'])

In [70]:
print(out.shape)

torch.Size([16, 1])
