# Titanic : Machine Learning from Disaster

## Importing the Libaries and dataset

In [60]:
import pandas as pd
import numpy as np

df = pd.read_csv('titanic.csv')
df_test = pd.read_csv('titanic_test.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Preprocessing data

### Dropping columns

In [95]:
dropped_list = ['Name','Age','Fare','Ticket','PassengerId']
new_df = df.drop(dropped_list, axis=1)

In [96]:
new_df = pd.get_dummies(new_df, columns=['Embarked'])

### Replacing values

In [97]:
new_df.loc[new_df['Cabin'].notnull(), 'Cabin'] = 1
new_df.loc[new_df['Cabin'].isnull(), 'Cabin'] = 0

In [98]:
sex_value = {
    'male':0,
    'female':1
}

new_df['Sex'] = new_df['Sex'].replace(sex_value)

In [99]:
new_df.head()

Unnamed: 0,Survived,Pclass,Sex,SibSp,Parch,Cabin,Embarked_C,Embarked_Q,Embarked_S
0,0,3,0,1,0,0,0,0,1
1,1,1,1,1,0,1,1,0,0
2,1,3,1,0,0,0,0,0,1
3,1,1,1,1,0,1,0,0,1
4,0,3,0,0,0,0,0,0,1


### Assigning Train data

In [100]:
#Assigning Train
x_train = new_df.drop('Survived', axis=1)
y_train = new_df['Survived']

## Model Building

### Importing knn model

In [101]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()
param_grid = {'n_neighbors':np.arange(3,51), 'weights':['distance','uniform']}

### KNN Parameter Selection

In [102]:
gscv = GridSearchCV(knn, param_grid=param_grid, scoring='roc_auc', cv=6)
gscv.fit(x_train, y_train)

GridSearchCV(cv=6, error_score='raise-deprecating',
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='warn', n_jobs=None,
             param_grid={'n_neighbors': array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
       37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]),
                         'weights': ['distance', 'uniform']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='roc_auc', verbose=0)

### Training Data

In [103]:
knn2 = KNeighborsClassifier(n_neighbors=37, weights='uniform')
knn2.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=37, p=2,
                     weights='uniform')

## Predicting Data

### Preprocessing Test Data

In [104]:
new_dftest = df_test.drop(dropped_list, axis=1)

In [105]:
new_dftest = pd.get_dummies(new_dftest, columns=['Embarked'])

In [106]:
new_dftest.loc[new_dftest['Cabin'].notnull(), 'Cabin'] = 1
new_dftest.loc[new_dftest['Cabin'].isnull(), 'Cabin'] = 0

In [107]:
new_dftest['Sex'] = new_dftest['Sex'].replace(sex_value)

### Predicting Data and Saving Result

In [108]:
y_test = knn2.predict(new_dftest)

In [114]:
result = pd.DataFrame(y_test)
result.index = df_test['PassengerId']
result.columns = ['Survived']
result.to_csv('predictions.csv')