# Goal : Predict if someone survived the Titanic Shipwreck or not

In [None]:
# Author : Sagar Bapodara (PS : This is my first competition submission)

# Importing Dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print('Dependencies Imported')

In [None]:
df = pd.read_csv('../input/titanic/train.csv')

In [None]:
df.head(10)

In [None]:
#dropping less correlated columns 
df.drop(['PassengerId','Name','Ticket','Cabin','Embarked','SibSp','Parch'], axis=1, inplace=True )

# Basic Data Analysis

In [None]:
df.head(10)

In [None]:
#checking for any missing values
df.isnull().sum()

In [None]:
df.describe()

In [None]:
#replacing missing values in 'Age' with mean values of 'Age' Column
df['Age'].fillna(df['Age'].mean(),inplace=True)

In [None]:
l_sex_dummies=pd.get_dummies(df['Sex'],drop_first=True)

In [None]:
df= pd.concat([df,l_sex_dummies],axis=1)

In [None]:
df.head(10)

In [None]:
df.drop(['Sex'], axis=1, inplace=True)

In [None]:
df.head(10)

# Feature Engineering

In [None]:
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()

In [None]:
feature_scale = ['Age','Fare']
df[feature_scale] = sd.fit_transform(df[feature_scale])

In [None]:
df.head(5)

In [None]:
X=df.drop(['Survived'],axis=1)
y=df['Survived']

# Implementing ML Model

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [None]:
#defining various model parameters
model_param = {
    'DecisionTreeClassifier':{
        'model':DecisionTreeClassifier(),
        'param':{
            'criterion': ['gini','entropy']
        }
    },
        'KNeighborsClassifier':{
        'model':KNeighborsClassifier(),
        'param':{
            'n_neighbors': [5,10,15,20,25,30,35]
        }
    },
        'SVC':{
        'model':SVC(),
        'param':{
            'kernel':['rbf','linear','poly','sigmoid'],
            'C': [0.1, 1, 10, 100]
         
        }
    },
    
    'RandomForestClassifier':{
        'model':RandomForestClassifier(),
        'param':{
            'n_estimators': [100],
            'criterion': ['gini','entropy'],
            'max_features' : ['auto', 'sqrt', 'log2']
        }
    }
}

In [None]:
scores =[]
for model_name, mp in model_param.items():
    model_selection = GridSearchCV(estimator=mp['model'],param_grid=mp['param'],cv=5,return_train_score=False)
    model_selection.fit(X,y)
    scores.append({
        'model': model_name,
        'best_score': model_selection.best_score_,
        'best_params': model_selection.best_params_
    })

In [None]:
df_model_score = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df_model_score

#### Note : Implementing Both SVC and Random Forest

# SVC Model

In [None]:
model_svc = SVC(C= 100,kernel='rbf')

In [None]:
model_svc.fit(X, y)

In [None]:
df2 = pd.read_csv('../input/titanic/test.csv')

In [None]:
df2.head(5)

In [None]:
df3=df2.drop(['PassengerId','Name','Ticket','Cabin','Embarked','SibSp','Parch'], axis=1 )

In [None]:
df3.isnull().sum()

In [None]:
#filling the missing values in test dataset
df3['Age'].fillna(df3['Age'].mean(),inplace=True)
df3['Fare'].fillna(df3['Fare'].mean(),inplace=True)

In [None]:
l_sex_dummies=pd.get_dummies(df3['Sex'],drop_first=True)
df3= pd.concat([df3,l_sex_dummies],axis=1)
df3.drop(['Sex'], axis=1, inplace=True )

In [None]:
df3.head()

In [None]:
df3[feature_scale] = sd.fit_transform(df3[feature_scale])

In [None]:
df3.head(5)

In [None]:
y_predicted = model_svc.predict(df3)

In [None]:
submission = pd.DataFrame({
        "PassengerId": df2['PassengerId'],
        "Survived": y_predicted
    })

In [None]:
submission

In [None]:
submission.to_csv('titanic_submission.csv', index=False)

# Random Forest Model

In [None]:
model_randomforest = RandomForestClassifier(n_estimators = 100, criterion='entropy',max_features='log2')

In [None]:
model_randomforest.fit(X, y)

In [None]:
y_predicted_randomforest = model_randomforest.predict(df3)

In [None]:
submission = pd.DataFrame({
        "PassengerId": df2['PassengerId'],
        "Survived": y_predicted
    })

In [None]:
submission

In [None]:
submission.to_csv('titanic_submission_rf.csv', index=False)

## If you found this useful, kindly upvote and comment your views :) 