In [171]:
import warnings
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import pickle
from sklearn.externals import joblib

warnings.filterwarnings('ignore')

In [172]:
logregmodel = joblib.load('trainedmodels/logregmodel.pkl')
randomforestmodel = joblib.load('trainedmodels/randomforestmodel.pkl')
gnbmodel = joblib.load('trainedmodels/gnbmodel.pkl')
svcmodel = joblib.load('trainedmodels/svcmodel.pkl')
knnmodel = joblib.load('trainedmodels/knnmodel.pkl')
decisiontreemodel = joblib.load('trainedmodels/decisiontreemodel.pkl')

In [173]:
quest = pd.read_csv("csvs/forsolving.csv")
yo = pd.read_csv("csvs/test.csv")

In [174]:
dummy = pd.get_dummies(quest['Sex'])
quest = pd.concat([quest, dummy], axis = 1)
dummy = pd.get_dummies(quest['Embarked'])
quest = pd.concat([quest, dummy], axis = 1)
dummy = pd.get_dummies(quest['Title'])
quest = pd.concat([quest, dummy], axis = 1)
quest = quest.drop(['Sex', 'Embarked', 'Title', 'CabinAlpha'], axis = 1)


In [175]:
quest.isnull().any()


Pclass       False
Age          False
SibSp        False
Parch        False
Fare          True
HaveCabin    False
female       False
male         False
C            False
Q            False
S            False
Col          False
Don          False
Dr           False
Master       False
Miss         False
Mr           False
Mrs          False
Ms           False
Rev          False
dtype: bool

In [176]:
quest.loc[quest['Fare'].isnull(), 'Fare'] = quest['Fare'].mean()

In [177]:
quest.isnull().any()


Pclass       False
Age          False
SibSp        False
Parch        False
Fare         False
HaveCabin    False
female       False
male         False
C            False
Q            False
S            False
Col          False
Don          False
Dr           False
Master       False
Miss         False
Mr           False
Mrs          False
Ms           False
Rev          False
dtype: bool

In [178]:
quest.columns

Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'HaveCabin', 'female',
       'male', 'C', 'Q', 'S', 'Col', 'Don', 'Dr', 'Master', 'Miss', 'Mr',
       'Mrs', 'Ms', 'Rev'],
      dtype='object')

In [179]:
quest1 = quest[['Col', 'Don', 'Dr', 'Master', 'Miss', 'Mr', 'Mrs', 'Ms', 'Rev']]

In [180]:
quest1.head()

Unnamed: 0,Col,Don,Dr,Master,Miss,Mr,Mrs,Ms,Rev
0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,1,0,0,0
4,0,0,0,0,0,0,1,0,0


In [181]:
quest = quest.drop(['Col', 'Don', 'Dr', 'Master', 'Miss', 'Mr', 'Mrs', 'Ms', 'Rev'], axis = 1)

In [182]:
quest.head()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,HaveCabin,female,male,C,Q,S
0,3,34.5,0,0,7.8292,0,0,1,0,1,0
1,3,47.0,1,0,7.0,0,1,0,0,0,1
2,2,62.0,0,0,9.6875,0,0,1,0,1,0
3,3,27.0,0,0,8.6625,0,0,1,0,0,1
4,3,22.0,1,1,12.2875,0,1,0,0,0,1


In [183]:
quest['Capt'] = 0
quest['Col'] = quest1['Col']
quest['Don'] = quest1['Don']
quest['Dr'] = quest1['Dr']
quest['Jonkheer'] = 0
quest['Lady'] = 0
quest['Major'] = 0
quest['Master'] = quest1['Master']
quest['Miss'] = quest1['Miss']
quest['Mlle'] = 0
quest['Mme'] = 0
quest['Mr'] = quest1['Mr']
quest['Mrs'] = quest1['Mrs']
quest['Ms'] = quest1['Ms']
quest['Rev'] = quest1['Rev']
quest['Sir'] = 0
quest['the Countess'] = 0

In [184]:
quest.head()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,HaveCabin,female,male,C,Q,...,Master,Miss,Mlle,Mme,Mr,Mrs,Ms,Rev,Sir,the Countess
0,3,34.5,0,0,7.8292,0,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0
1,3,47.0,1,0,7.0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,2,62.0,0,0,9.6875,0,0,1,0,1,...,0,0,0,0,1,0,0,0,0,0
3,3,27.0,0,0,8.6625,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
4,3,22.0,1,1,12.2875,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [185]:
x_test = quest

In [186]:
y_predict_lgm = logregmodel.predict(x_test)
y_predict_rfm = randomforestmodel.predict(x_test)
y_predict_gnb = gnbmodel.predict(x_test)
y_predict_svc = svcmodel.predict(x_test)
y_predict_knn = knnmodel.predict(x_test)
y_predict_dt = decisiontreemodel.predict(x_test)

In [187]:
y_pred = np.stack((y_predict_dt, y_predict_gnb, y_predict_knn, y_predict_lgm, y_predict_rfm, y_predict_svc), axis = 1)
y_pred = pd.DataFrame(y_pred, columns = ['dt', 'gnb', 'knn', 'lgm', 'rfm', 'svc'])

Unnamed: 0,dt,gnb,knn,lgm,rfm,svc
0,0,0,0,0,0,0
1,0,1,0,1,0,0
2,1,0,0,0,0,0
3,1,0,0,0,1,0
4,0,1,0,1,0,1
5,0,0,0,0,0,0
6,0,0,0,1,0,0
7,0,0,1,0,0,1
8,1,1,0,1,1,1
9,0,0,0,0,0,0


In [188]:
ensemblemodel = joblib.load("trainedmodels/ensemblemodel.pkl")

In [189]:
y_sol = ensemblemodel.predict(y_pred)

In [190]:
submission = pd.DataFrame(y_sol)

In [191]:
submission

Unnamed: 0,0
0,0
1,0
2,1
3,1
4,0
5,0
6,0
7,0
8,1
9,0


In [193]:

final = pd.DataFrame()
final['PassengerId'] = yo['PassengerId']

In [194]:
final['Survived'] = submission[0]

In [197]:
final.to_csv("csvs/submission.csv", index = False)