# Neural Networl / Titanic

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("3 SEMESTER/Neural Networks/titanic-clean.csv")
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,1,22.0,1,0,S
1,1,1,0,38.0,1,0,C
2,1,3,0,26.0,0,0,S
3,1,1,0,35.0,1,0,S
4,0,3,1,35.0,0,0,S


In [3]:
df.shape

(881, 7)

In [4]:
from sklearn.preprocessing import OneHotEncoder

In [5]:
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
encoder.fit(df[["Embarked"]])
encoder_feature_names = encoder.get_feature_names_out()
encoder_feature_names

array(['Embarked_C', 'Embarked_Q', 'Embarked_S'], dtype=object)

In [6]:
matrix = encoder.transform(df[["Embarked"]])
matrix

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       ...,
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.]])

In [7]:
df_encoded = pd.DataFrame(data=matrix, columns=encoder_feature_names)
df_encoded.head()

Unnamed: 0,Embarked_C,Embarked_Q,Embarked_S
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,0.0,1.0
3,0.0,0.0,1.0
4,0.0,0.0,1.0


In [8]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,1,22.0,1,0,S
1,1,1,0,38.0,1,0,C
2,1,3,0,26.0,0,0,S
3,1,1,0,35.0,1,0,S
4,0,3,1,35.0,0,0,S


In [9]:
df = pd.merge(df, df_encoded, left_index=True, right_index=True)
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,22.0,1,0,S,0.0,0.0,1.0
1,1,1,0,38.0,1,0,C,1.0,0.0,0.0
2,1,3,0,26.0,0,0,S,0.0,0.0,1.0
3,1,1,0,35.0,1,0,S,0.0,0.0,1.0
4,0,3,1,35.0,0,0,S,0.0,0.0,1.0


In [10]:
df.drop(columns=["Embarked"], inplace=True)
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,22.0,1,0,0.0,0.0,1.0
1,1,1,0,38.0,1,0,1.0,0.0,0.0
2,1,3,0,26.0,0,0,0.0,0.0,1.0
3,1,1,0,35.0,1,0,0.0,0.0,1.0
4,0,3,1,35.0,0,0,0.0,0.0,1.0


**OBS:** OneHotEncoder does the same things as get_dummies but in addition, OneHotEncoder saves the exploded categories into it’s object.

In [11]:
encoder.transform([['H']])



array([[0., 0., 0.]])

In [12]:
encoder.transform([['C']])



array([[1., 0., 0.]])

In [13]:
encoder.inverse_transform([[1.,0.,0.]])

array([['C']], dtype=object)

### Scaling

#### MinMaxScaler

In [14]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,22.0,1,0,0.0,0.0,1.0
1,1,1,0,38.0,1,0,1.0,0.0,0.0
2,1,3,0,26.0,0,0,0.0,0.0,1.0
3,1,1,0,35.0,1,0,0.0,0.0,1.0
4,0,3,1,35.0,0,0,0.0,0.0,1.0


In [15]:
from sklearn.preprocessing import MinMaxScaler

In [16]:
features_to_scale = ["Pclass", "Age", "SibSp", "Parch"] #all those there are not between 0 and 1 

scaler = MinMaxScaler(clip=True)
df[features_to_scale] = scaler.fit_transform(df[features_to_scale])
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked_C,Embarked_Q,Embarked_S
0,0,1.0,1,0.334159,0.125,0.0,0.0,0.0,1.0
1,1,0.0,0,0.581914,0.125,0.0,1.0,0.0,0.0
2,1,1.0,0,0.396098,0.0,0.0,0.0,0.0,1.0
3,1,0.0,0,0.53546,0.125,0.0,0.0,0.0,1.0
4,0,1.0,1,0.53546,0.0,0.0,0.0,0.0,1.0


In [17]:
scaler.transform([[3, 60, 0, 0]])



array([[1.        , 0.92257665, 0.        , 0.        ]])

### Train/Test Split

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X = df.drop(columns=["Survived"])
y = df[["Survived"]]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=42)

In [23]:
print(X_train.shape, X_test.shape)

(704, 8) (177, 8)


### Neural Network

In [24]:
from sklearn.neural_network import MLPClassifier

In [25]:
# Fitting model based on the train data
model = MLPClassifier(
            random_state=42,
            max_iter=300,
            hidden_layer_sizes=(20,20,))
model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [26]:
model.score(X_test, y_test)

0.847457627118644