In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import chi2 , f_classif 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df_train = pd.read_csv('../input/data-science-london-scikit-learn/train.csv')
df_train['class'] = pd.read_csv('../input/data-science-london-scikit-learn/trainLabels.csv')
df_train

In [None]:
df_train.describe()

In [None]:
df_train.isnull().sum()

In [None]:
df_train.dtypes

In [None]:
X = df_train.iloc[:, :-1]
y = df_train.iloc[:, -1] 

In [None]:
plt.figure(figsize=(40,40))
cor = df_train.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds,fmt='.2f')
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9, random_state=44, shuffle =True)

# Gradient Boosting Classifier Model

In [None]:
GBCModel = GradientBoostingClassifier(n_estimators=250, max_depth=6, learning_rate=0.1, random_state=0)
GBCModel.fit(X_train, y_train)
print('GBCModel Train Score is : ' , GBCModel.score(X_train, y_train))
print('GBCModel Test Score is : ' , GBCModel.score(X_test, y_test))

In [None]:
y_pred_GB = GBCModel.predict(X_test)
CM_GB = confusion_matrix(y_test, y_pred_GB)

sns.heatmap(CM_GB, center=True)
plt.show()

print('Confusion Matrix is\n', CM_GB)

# SVC Model 

In [None]:
SVCModel = SVC(kernel= 'rbf',# it can be also linear,poly,sigmoid,precomputed
               max_iter=400,C=1.0,gamma='auto',probability=True)
SVCModel.fit(X_train, y_train)
print('SVCModel Train Score is : ' , SVCModel.score(X_train, y_train))
print('SVCModel Test Score is : ' , SVCModel.score(X_test, y_test))

In [None]:
y_pred_SVC = SVCModel.predict(X_test)
CM_SVC = confusion_matrix(y_test, y_pred_GB)

sns.heatmap(CM_SVC, center=True)
plt.show()

print('Confusion Matrix is\n', CM_SVC)

# KNN Model

In [None]:
KNNClassifierModel = KNeighborsClassifier(n_neighbors= 10,weights ='uniform', # it can be distance
                                          algorithm='auto') # it can be ball_tree, kd_tree,brute
KNNClassifierModel.fit(X_train, y_train)

print('KNNClassifierModel Train Score is : ' , KNNClassifierModel.score(X_train, y_train))
print('KNNClassifierModel Test Score is : ' , KNNClassifierModel.score(X_test, y_test))

In [None]:
y_pred_KNN = KNNClassifierModel.predict(X_test)
CM_KNN = confusion_matrix(y_test, y_pred_KNN)

sns.heatmap(CM_KNN, center=True)
plt.show()

print('Confusion Matrix is\n', CM_SVC)

# Neural Network Model

In [None]:
MLPClassifierModel = MLPClassifier(activation='relu', # can be also identity , logistic , relu
                                   solver='adam',  # can be also sgd , adam
                                   learning_rate='constant', # can be also invscaling , adaptive
                                   early_stopping= False,
                                   alpha=0.09 ,hidden_layer_sizes=(128, 3),random_state=33)
MLPClassifierModel.fit(X_train, y_train)

print('MLPClassifierModel Train Score is : ' , MLPClassifierModel.score(X_train, y_train))
print('MLPClassifierModel Test Score is : ' , MLPClassifierModel.score(X_test, y_test))
print('MLPClassifierModel loss is : ' , MLPClassifierModel.loss_)
print('MLPClassifierModel No. of iterations is : ' , MLPClassifierModel.n_iter_)
print('MLPClassifierModel No. of layers is : ' , MLPClassifierModel.n_layers_)
print('MLPClassifierModel last activation is : ' , MLPClassifierModel.out_activation_)

In [None]:
y_pred_MLP = MLPClassifierModel.predict(X_test)
CM_MLP = confusion_matrix(y_test, y_pred_MLP)

sns.heatmap(CM_MLP, center=True)
plt.show()

print('Confusion Matrix is\n', CM_MLP)


# Voting Model

In [None]:
VotingClassifierModel = VotingClassifier(estimators=[('GBCModel',GBCModel),
                                                     ('SVModel',SVCModel),
                                                     ('KNNModel',KNNClassifierModel),
                                                     ('MLPModel',MLPClassifierModel)],
                                         voting='soft')
VotingClassifierModel.fit(X_train, y_train)
print('VotingClassifierModel Train Score is : ' , VotingClassifierModel.score(X_train, y_train))
print('VotingClassifierModel Test Score is : ' , VotingClassifierModel.score(X_test, y_test))

In [None]:
y_pred_V = VotingClassifierModel.predict(X_test)
CM_V = confusion_matrix(y_test, y_pred_V)
sns.heatmap(CM_V, center=True)
plt.show()
print('Confusion Matrix is\n', CM_V)

In [None]:
df_test = pd.read_csv('../input/data-science-london-scikit-learn/test.csv')
df_test

In [None]:
df_test.isnull().sum()

In [None]:
x_predict = df_test.iloc[:]

In [None]:
y_pred_sv = SVCModel.predict(x_predict)
y_pred_v = VotingClassifierModel.predict(x_predict)

In [None]:
submission_sv = pd.DataFrame(y_pred_sv)
submission_v = pd.DataFrame(y_pred_v)

submission_sv.columns = ['Solution']
submission_v.columns = ['Solution']

submission_sv['Id'] = pd.DataFrame(np.arange(1,submission_sv.shape[0]+1))
submission_v['Id'] = pd.DataFrame(np.arange(1,submission_v.shape[0]+1))

In [None]:
submission_sv = submission_sv[['Id', 'Solution']]
submission_v = submission_v[['Id', 'Solution']]

submission_sv.to_csv('submission_sv.csv', index=False)
submission_v.to_csv('submission_v.csv', index=False)