In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
test_set = pd.read_csv("./datasets/titanic/test.csv")
train_set = pd.read_csv("./datasets/titanic/train.csv")

In [3]:
train_set.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
train_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [5]:
from sklearn.base import TransformerMixin , BaseEstimator

class DataFrameSelector(TransformerMixin , BaseEstimator):
    def __init__(self,attributes_names):
        self.attributes_names = attributes_names
    def fit(self,X,y=None):
        return self
    def transform(self,X,y=None):
        return X[self.attributes_names]
    

# Inspired from stackoverflow.com/questions/25239958
class MostFrequentImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.most_frequent_ = pd.Series([X[c].value_counts().index[0] for c in X],
                                        index=X.columns)
        return self
    def transform(self, X, y=None):
        return X.fillna(self.most_frequent_)
        

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

num_pipeline=Pipeline([
    ('num',DataFrameSelector(["Age","SibSp","Parch","Fare"])),
    ('imputer',SimpleImputer(strategy="mean"))
])

cat_pipeline=Pipeline([
    ('cat',DataFrameSelector(["Pclass","Sex","Embarked"])),
    ('cat_imputer',MostFrequentImputer()),
    ('encoder',OneHotEncoder(sparse=False))
    ])

In [7]:
from sklearn.pipeline import FeatureUnion
full_pipeline=FeatureUnion(transformer_list=[
    ('num',num_pipeline),
    ('cat',cat_pipeline)
])


In [8]:
train_set_prepared = full_pipeline.fit_transform(train_set)

In [9]:
test_set_prepared = full_pipeline.fit_transform(test_set)

In [10]:
y_train = train_set["Survived"]
train_set.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [11]:
# from sklearn.ensemble import RandomForestClassifier

# forest_clf = RandomForestClassifier(n_estimators=200,random_state=42)
# forest_clf.fit(train_set_prepared,y_train)

In [12]:
# from xgboost import XGBClassifier

# xgbooss_clf = XGBClassifier()
# xgbooss_clf.fit(train_set_prepared,y_train)

In [13]:
def save_titanic_pred(y,test_set):
    prediction = pd.DataFrame(y,columns=["Survived"],index=list(test_set["PassengerId"]))
    prediction.index.names=['PassengerId']
    prediction.to_csv("_prediction1.csv")
    return prediction

In [14]:
# y_test = forest_clf.predict(test_set_prepared)

In [15]:
# y_test_xgboost = xgbooss_clf.predict(test_set_prepared)

In [16]:
# save_titanic_pred(y_test,test_set)

In [17]:
# save_titanic_pred(y_test_xgboost,test_set)


# Voting Classifier

In [18]:
# from sklearn.ensemble import VotingClassifier
# from sklearn.svm import SVC

# forest_clf_vtg=RandomForestClassifier(n_estimators=100, random_state=0,criterion='entropy')
# xgboost_clf_vtg=XGBClassifiaer()
# svc_clf = SVC(probability=True)

# voting_clf= VotingClassifier(
#     estimators=[('fc',forest_clf_vtg),('xg', xgboost_clf_vtg),('svc',svc_clf)],
#     voting='soft')
# voting_clf.fit(train_set_prepared,y_train)



In [19]:
# y_vtg_pred = voting_clf.predict(test_set_prepared) 
# save_titanic_pred(y_vtg_pred , test_set)

# Neural Network

In [20]:
from sklearn.preprocessing import StandardScaler
std = StandardScaler()
train_set_prepared_std1 = std.fit_transform(train_set_prepared)
train_set_prepared_std1

array([[-5.92480600e-01,  4.32793366e-01, -4.73673609e-01, ...,
        -4.82042680e-01, -3.07562343e-01,  6.15838425e-01],
       [ 6.38789012e-01,  4.32793366e-01, -4.73673609e-01, ...,
         2.07450510e+00, -3.07562343e-01, -1.62380254e+00],
       [-2.84663197e-01, -4.74545196e-01, -4.73673609e-01, ...,
        -4.82042680e-01, -3.07562343e-01,  6.15838425e-01],
       ...,
       [ 4.37434839e-15,  4.32793366e-01,  2.00893337e+00, ...,
        -4.82042680e-01, -3.07562343e-01,  6.15838425e-01],
       [-2.84663197e-01, -4.74545196e-01, -4.73673609e-01, ...,
         2.07450510e+00, -3.07562343e-01, -1.62380254e+00],
       [ 1.77062908e-01, -4.74545196e-01, -4.73673609e-01, ...,
        -4.82042680e-01,  3.25137334e+00, -1.62380254e+00]])

In [21]:
import tensorflow as tf

In [22]:
# X_valid = train_set_prepared_std1[750:891]
# y_valid = y_train[750:891]
# train_set_prepared_std1 = train_set_prepared_std1[0:749]
# y_train = y_train[0:749]

In [23]:
ann = tf.keras.models.Sequential()

In [24]:
ann.add(tf.keras.layers.Dense(units=300,activation="relu"))
ann.add(tf.keras.layers.Dense(units=300,activation="relu"))
ann.add(tf.keras.layers.Dense(units=300,activation="relu"))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [29]:
optimizer = tf.keras.optimizers.SGD(lr=0.5)
ann.compile(optimizer =optimizer, loss = 'binary_crossentropy' ,metrics = ['accuracy'])

In [30]:
ann.fit(train_set_prepared_std1, y_train, batch_size = 20, epochs = 500)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 15

Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 

Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 

Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<tensorflow.python.keras.callbacks.History at 0x220eea7c6d0>

In [None]:
def boolstr_to_floatstr(v):
    if v == 'True':
        return '1'
    elif v == 'False':
        return '0'
    else:
        return v

In [None]:
ann_predict1 = ann.predict(std.transform(test_set_prepared))

In [None]:
ann_predict1 = (ann_predict1>0.5)
new_data = np.vectorize(boolstr_to_floatstr)(ann_predict1).astype(int)

In [None]:
new_data

In [None]:
save_titanic_pred(new_data,test_set)