## I use a lot of technique from this [notebook](https://www.kaggle.com/ihelon/titanic-hyperparameter-tuning-with-gridsearchcv)

# Import and cleaning data

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from catboost import CatBoostClassifier

In [4]:
train=pd.read_csv('../input/titanic/train.csv')
test=pd.read_csv('../input/titanic/test.csv')

In [5]:
full=pd.concat([train,test])

In [6]:
full.isnull().sum()

In [7]:
full.drop(['Cabin'],axis=1,inplace=True)

In [8]:
fig=plt.figure(figsize=(24,8))
plt.subplot(1,2,1)
plt.hist(full['Age'].dropna());
plt.title('Age');

plt.subplot(1,2,2)
plt.hist(full['Fare'].dropna(),color='green');
plt.title('Fare');

In [9]:
Age_median=full['Age'].median()
Fare_median=full['Fare'].median()
Embarked_mode='S'

full['Age']=full['Age'].fillna(Age_median)
full['Fare']=full['Fare'].fillna(Fare_median)
full['Embarked']=full['Embarked'].fillna(Embarked_mode)

In [10]:
full.isnull().sum()

In [11]:
Ticket_num=full['Ticket'].str.split(' ').str[-1]

In [12]:
le = LabelEncoder()
Ticket_num=le.fit_transform(Ticket_num)
full['Ticket']=Ticket_num

In [13]:
first_name=full['Name'].str.split(',').str[1]
name_title=first_name.str.split(' ').str[1]

In [14]:
name_title=name_title.replace(["Ms.", "Mlle."], "Miss.")
name_title=name_title.replace(["Mme.", "the", "Lady.", "Dona."], "Mrs.")
name_title=name_title.replace(["Dr.", "Major.", "Col.", "Sir.", "Rev.", "Jonkheer.", "Capt.", "Don."], "Mr.")
full['Name_title']=name_title

full.drop('Name',axis=1,inplace=True)

In [15]:
Sex_map={'male':0,'female':1}
Embarked_map={'S':0,'C':1,'Q':2}
Name_title_map={'Mr.':0,'Mrs.':1,'Miss.':2,'Master.':3}
full['Sex']=full['Sex'].map(Sex_map)
full['Embarked']=full['Embarked'].map(Embarked_map)
full['Name_title']=full['Name_title'].map(Name_title_map)

In [16]:
full['Family_size']=full['SibSp']+full['Parch']

full.drop(['SibSp','Parch'],axis=1,inplace=True)

In [17]:
full['Fare']=(full['Fare']-full['Fare'].mean())/full['Fare'].std()
full['Fare']

In [18]:
full

# Do machine learning.

In [19]:
train=full.loc[full['Survived'].notnull(),:]
test=full.loc[full['Survived'].isnull(),:].drop('Survived',axis=1)

In [20]:
X=train.drop(['PassengerId','Survived'],axis=1)
y=train['Survived']

## Random Forest

In [34]:
model_RF=RandomForestClassifier(n_estimators=250,class_weight="balanced")
param_grid={'max_features':['auto', 'sqrt', 'log2']}
grid_RF=GridSearchCV(model_RF,param_grid,cv=5)

In [35]:
grid_RF.fit(X,y);

In [36]:
grid_RF.best_score_

In [37]:
best_model_RF=grid_RF.best_estimator_
for_predict_RF=test.drop('PassengerId',axis=1)
y_predict_RF=best_model_RF.predict(for_predict_RF)

## LogisticRegression

In [48]:
model_LR=LogisticRegression(class_weight="balanced",solver="liblinear",)
param_grid={"C": [0.001, 0.01, 0.1, 1.],"penalty": ["l1", "l2"]}
grid_LR=GridSearchCV(model_LR,param_grid,cv=5)

In [49]:
grid_LR.fit(X,y);

In [50]:
grid_LR.best_score_

In [41]:
best_model_LR=grid_LR.best_estimator_
for_predict_LR=test.drop('PassengerId',axis=1)
y_predict_LR=best_model_LR.predict(for_predict_LR)

## SVC

In [27]:
model_svc = SVC(kernel='linear',class_weight='balanced',probability=True)
param_grid = {'C': [0.001, 0.01, 0.1, 1.]}
grid_svc = GridSearchCV(model_svc, param_grid, cv=5)

In [28]:
grid_svc.fit(X,y);

In [29]:
grid_svc.best_score_

In [30]:
best_model_svc=grid_svc.best_estimator_
for_predict_svc=test.drop('PassengerId',axis=1)
y_predict_svc=best_model_svc.predict(for_predict_svc)

## AdaBoostClassifier

In [68]:
model_ADA=AdaBoostClassifier()
param_grid = {'learning_rate': [0.001, 0.01, 0.1, 1.]}
grid_ADA = GridSearchCV(model_ADA, param_grid, cv=5)

In [69]:
grid_ADA.fit(X,y);

In [70]:
grid_ADA.best_score_

In [71]:
best_model_ADA=grid_ADA.best_estimator_
for_predict_ADA=test.drop('PassengerId',axis=1)
y_predict_ADA=best_model_ADA.predict(for_predict_ADA)

## CatBoost

In [21]:
cat_features = [0, 1]
model_cat=CatBoostClassifier(verbose=False)
param_grid = {'learning_rate': [0.001, 0.01, 0.1, 1.]}
grid_cat = GridSearchCV(model_cat, param_grid, cv=5)

In [22]:
grid_cat.fit(X,y);

In [94]:
grid_cat.best_score_

In [95]:
best_model_cat=grid_cat.best_estimator_
for_predict_cat=test.drop('PassengerId',axis=1)
y_predict_cat=best_model_cat.predict(for_predict_cat)

## Stacking

In [99]:
result_df=pd.DataFrame({'RF':y_predict_RF,'SVC':y_predict_svc,'LR':y_predict_LR,'ADA':y_predict_ADA,'CAT':y_predict_cat})
vote_df=result_df.mode(axis=1)
vote_df

## Submit

In [100]:
PassengerId_df=pd.DataFrame({'PassengerId':test['PassengerId']})
submission_df=pd.concat([PassengerId_df,vote_df.astype(int)],axis=1)
submission_df=submission_df.rename({0:'Survived'},axis=1)
submission_df=submission_df.set_index('PassengerId')
submission_df.to_csv('submission.csv')