## Blending

### Importing Libraries


In [29]:
#importing necessary libraries
import numpy as np
import pandas as pd

### Reading the data

In [30]:
data = pd.read_csv('data_cleaned.csv')

#Printing first five rows
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


### Separating the Dependent and Independent Variables

In [31]:
#Independent variables
x = data.drop(["Survived"], axis = 1)

#Dependent variable
y = data['Survived']

x.shape, y.shape

((891, 24), (891,))

### Defining the train and test sets

In [32]:
#import train-test split
from sklearn.model_selection import train_test_split as tts

#divide into train and test sets
train_x, test_x, train_y, test_y = tts (x, y, random_state = 9 , stratify = y)
train_x.shape, test_x.shape, train_y.shape, test_y.shape

((668, 24), (223, 24), (668,), (223,))

In [33]:
#importing the models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

### Model Training and Prediction

<img src="stacking image 5.png" alt="Drawing" style="width:300px;"/>

In [34]:
def blending(model, train_x, train_y, test_x):
    #train validation split
    x_train, x_valid, y_train, y_valid = tts(train_x, train_y, random_state=9)
    
    #fit on training
    model.fit(x_train, y_train)
    
    #predict on validation and test
    valid_pred = model.predict(x_valid)
    score=model.score(x_valid, y_valid)
    print('Validation Score', score)
    
    test_pred=model.predict(test_x)
    #return validation pred, test pred
    return valid_pred, y_valid, test_pred

In [35]:
#Model1 = Logistic Regression
LR=LogisticRegression()
M1_valid, M1_target, M1_test = blending(LR, train_x, train_y, test_x)

Validation Score 0.77245508982


In [36]:
#Model 2= Decision Tree
DT=DecisionTreeClassifier()
M2_valid, M2_target, M2_test = blending(DT, train_x, train_y, test_x)

Validation Score 0.748502994012


In [37]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X=train_x)

train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

In [38]:
#Model 3 = K Nearest Neighbour
knn=KNeighborsClassifier()
M3_valid, M3_target, M3_test = blending(knn, train_x, train_y, test_x)

Validation Score 0.760479041916


In [39]:
valid_prediction = {
              'LR': M1_valid,
              'DT': M2_valid,
              'knn': M3_valid
              }
new_train = pd.DataFrame(valid_prediction)
new_train.head()

Unnamed: 0,LR,DT,knn
0,1,0,0
1,1,1,1
2,0,0,0
3,1,0,0
4,0,0,0


In [40]:
test_prediction = {
              'LR': M1_test,
              'DT': M2_test,
              'knn': M3_test
              }
new_test = pd.DataFrame(test_prediction)
new_test.head()

Unnamed: 0,LR,DT,knn
0,0,0,0
1,1,1,1
2,0,1,0
3,0,0,0
4,0,0,0


In [43]:
# Final Model
model1 = KNeighborsClassifier()
model1.fit(new_train, M1_target)
model1.score(new_test,test_y)

0.73094170403587444