# importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# loading datasets

In [None]:
train=pd.read_csv('../input/spaceship-titanic/train.csv')
train

In [None]:
train.drop(['PassengerId'], axis=1, inplace=True)

In [None]:
test=pd.read_csv('../input/spaceship-titanic/test.csv')
test

In [None]:
id = test['PassengerId']

In [None]:
test.drop(['PassengerId'], axis=1, inplace=True)

In [None]:
sample_submission=pd.read_csv('../input/spaceship-titanic/sample_submission.csv')
sample_submission

# train dataset

In [None]:
train.describe()

In [None]:
train.columns

In [None]:
train.info()
#there are no any special characters as all dtypes are in their respective types.

In [None]:
print('shape of train dataset is:', train.shape)
print('size of train dataset is:', train.size)
print('number of rows in train dataset:', len(train))
print('count of each column in train dataset is:', train.count())

# test

In [None]:
test.describe()

In [None]:
test.columns

In [None]:
test.info()
#there are no any special characters as all dtypes are in their respective types.

In [None]:
print('shape of train dataset is:', test.shape)
print('size of train dataset is:', test.size)
print('number of rows in train dataset:', len(test))
print('count of each column in train dataset is:', test.count())

# finding null values 

In [None]:
#Number of null values in each row of train dataset.
train.isnull().sum()

In [None]:
import missingno as msno
msno.bar(train)
#bar plot which shows the number of values in each column.

In [None]:
#Number of null values in each row of train dataset.
test.isnull().sum()

In [None]:
msno.bar(test)
#bar plot which shows the number of values in each column. Through this we can find number of null values in a column.

# fill null values

In [None]:
#here we are replacing null values in columns which are float or int with median and columns which are object with mode.
def fillna (df):
    for i in df.columns:
        if df[i].dtypes=='float64':
            df[i].fillna(df[i].median(), inplace=True)
        else:
            df[i].fillna(df[i].mode()[0], inplace=True)

In [None]:
fillna (test)

In [None]:
test.isnull().sum()

In [None]:
fillna (train)

In [None]:
train.isnull().sum()

In [None]:
train.info()

# Data Visualisations

In [None]:
f, axes = plt.subplots(3, 2, figsize=(15,30), sharex=False)
sns.distplot(train.iloc[:,4], color="skyblue", ax=axes[0,0])
sns.distplot(train.iloc[:,6], color="olive", ax=axes[0,1])
sns.distplot(train.iloc[:,7], color="gold", ax=axes[1,0])
sns.distplot(train.iloc[:,8], color="teal", ax=axes[1,1])
sns.distplot(train.iloc[:,9], color="skyblue", ax=axes[2,0])
sns.distplot(train.iloc[:,10], color="olive", ax=axes[2,1])
plt.show()

In [None]:
f, axes = plt.subplots(3, 2, figsize=(15,30), sharex=False)
sns.violinplot(train.iloc[:,4], color="skyblue", ax=axes[0,0])
sns.violinplot(train.iloc[:,6], color="olive", ax=axes[0,1])
sns.violinplot(train.iloc[:,7], color="gold", ax=axes[1,0])
sns.violinplot(train.iloc[:,8], color="teal", ax=axes[1,1])
sns.violinplot(train.iloc[:,9], color="skyblue", ax=axes[2,0])
sns.violinplot(train.iloc[:,10], color="olive", ax=axes[2,1])
plt.show()

In [None]:
sns.countplot(data=train, x='HomePlanet')
# from this plot we can understand that most of the Passengers are from earth and then from europa and atlast mars.

In [None]:
train['HomePlanet'].value_counts().plot(kind='pie',autopct="%0.1f",figsize=(10,10))
#this pie chart shows the percentages of Passenger's home planet.

In [None]:
sns.countplot(data=train, x='CryoSleep')
#through this plot we can understand that mejority of passengers are not in Cryosleep. 

In [None]:
sns.countplot(data=train, x='Destination')
#mejority po passengers are heading towards TRAPPIST-1e. And second most destination is 55 Cancri e and least percentage goes to PSO J318.5-22.

In [None]:
sns.countplot(data=train, x='VIP')
#through this graph we can understand that there are less than 1000 passengers who are VIP's. 

In [None]:
plt.figure(figsize=(20,20))
sns.countplot(data=train, y='Age')
#Through this graph we can understand the strength of each age group in space ship. In all age groups, age 27 are in majority compared to other age groups.

In [None]:
pip install autoviz

In [None]:
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
df_av = AV.AutoViz('../input/spaceship-titanic/train.csv')

# Applying lable encoding

In [None]:
from sklearn import preprocessing
for i in train.columns:
    if train[i].dtype=='object' or train[i].dtype=='bool' :
            label_encoder=preprocessing.LabelEncoder()
            train[i]=label_encoder.fit_transform(train[i])

In [None]:
train

In [None]:
from sklearn import preprocessing
for i in test.columns:
    if test[i].dtype=='object' or test[i].dtype=='bool' :
            label_encoder=preprocessing.LabelEncoder()
            test[i]=label_encoder.fit_transform(test[i])

In [None]:
test

# applying co-relation for dataframe

In [None]:
train.corr

In [None]:
plt.figure(figsize=(40,15))
a=sns.heatmap(train.corr(),annot=True)

# Feature selection

In [None]:
x=train.drop(['Transported'], axis=1)
y=train['Transported']

In [None]:
x

In [None]:
y

# test size is 0.3

In [None]:
#doing Test Train.
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)

In [None]:
x_train

In [None]:
x_test

In [None]:
y_train

In [None]:
y_test

# RandomForestClassifier

In [None]:
# Applying random forest regression
from sklearn.ensemble import RandomForestClassifier
rf_Classifier = RandomForestClassifier(n_estimators = 10,max_features=12, random_state = 0)
rf_Classifier.fit(x_train, y_train)

In [None]:
y_pred = rf_Classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import  confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
rf_acc=round(accuracy_score(y_test,y_pred)*100)
rf_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=rf_Classifier.predict(test)
da_rf=pd.DataFrame(d)
da_rf

In [None]:
da_rf.columns = ['transported']
da_rf['PassengerId'] = id
da_rf['kk'] = da_rf['transported']
da_rf = da_rf.drop(['transported'],axis=1)
da_rf.columns = ['PassengerId','transported']
da_rf['transported'] = da_rf['transported'].apply(lambda x:True if x==1 else False)
da_rf = da_rf.set_index('PassengerId')
da_rf

# DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
dt_classifier.fit(x_train, y_train)

In [None]:
y_pred = dt_classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
dt_acc=round(accuracy_score(y_test, y_pred)*100)
dt_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=dt_classifier.predict(test)
da_dt=pd.DataFrame(d)
da_dt

In [None]:
da_dt.columns = ['transported']
da_dt['PassengerId'] = id
da_dt['kk'] = da_dt['transported']
da_dt = da_dt.drop(['transported'],axis=1)
da_dt.columns = ['PassengerId','transported']
da_dt['transported'] = da_dt['transported'].apply(lambda x:True if x==1 else False)
da_dt = da_dt.set_index('PassengerId')
da_dt

# K Nearest Neighbors (KNN) classification

In [None]:
#K Nearest Neighbors (KNN) classification method.
from sklearn.neighbors import KNeighborsClassifier
knn_classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
knn_classifier.fit(x_train,y_train)

In [None]:
y_pred=knn_classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import  confusion_matrix,accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
knn_acc=round(accuracy_score(y_test,y_pred)*100)
knn_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=knn_classifier.predict(test)
da_knn=pd.DataFrame(d)
da_knn

In [None]:
da_knn.columns = ['transported']
da_knn['PassengerId'] = id
da_knn['kk'] = da_knn['transported']
da_knn = da_knn.drop(['transported'],axis=1)
da_knn.columns = ['PassengerId','transported']
da_knn['transported'] = da_knn['transported'].apply(lambda x:True if x==1 else False)
da_knn = da_knn.set_index('PassengerId')
da_knn

# LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression
lr_classifier = LogisticRegression(random_state = 0)
lr_classifier.fit(x_train, y_train)

In [None]:
y_pred = lr_classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
lr_acc=round(accuracy_score(y_test, y_pred)*100)
lr_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=lr_classifier.predict(test)
da_lr=pd.DataFrame(d)
da_lr

In [None]:
da_lr.columns = ['transported']
da_lr['PassengerId'] = id
da_lr['kk'] = da_lr['transported']
da_lr = da_lr.drop(['transported'],axis=1)
da_lr.columns = ['PassengerId','transported']
da_lr['transported'] = da_lr['transported'].apply(lambda x:True if x==1 else False)
da_lr = da_lr.set_index('PassengerId')
da_lr

# GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb_classifier = GaussianNB()
gnb_classifier.fit(x_train, y_train)

In [None]:
y_pred = gnb_classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
gnb_acc=round(accuracy_score(y_test, y_pred)*100)
gnb_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=gnb_classifier.predict(test)
da_gbn=pd.DataFrame(d)
da_gbn

In [None]:
da_gbn.columns = ['transported']
da_gbn['PassengerId'] = id
da_gbn['kk'] = da_gbn['transported']
da_gbn = da_gbn.drop(['transported'],axis=1)
da_gbn.columns = ['PassengerId','transported']
da_gbn['transported'] = da_gbn['transported'].apply(lambda x:True if x==1 else False)
da_gbn = da_gbn.set_index('PassengerId')
da_gbn

# svc classifier algorithm

In [None]:
from sklearn.svm import SVC   #svc classifier algorithm
svc_classifier = SVC()#kernel = 'linear', random_state = 0
svc_classifier.fit(x_train, y_train)

In [None]:
y_pred=svc_classifier.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
svc_acc=round(accuracy_score(y_test, y_pred)*100)
svc_acc

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
d=svc_classifier.predict(test)
da_svc=pd.DataFrame(d)
da_svc

In [None]:
da_svc.columns = ['transported']
da_svc['PassengerId'] = id
da_svc['kk'] = da_svc['transported']
da_svc = da_svc.drop(['transported'],axis=1)
da_svc.columns = ['PassengerId','transported']
da_svc['transported'] = da_svc['transported'].apply(lambda x:True if x==1 else False)
da_svc = da_svc.set_index('PassengerId')
da_svc

# GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
clf.fit(x_train, y_train)

In [None]:
y_pred_boost = clf.predict(x_test)
y_pred_boost

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_boost)
print(cm)
gb_acc=round(accuracy_score(y_test, y_pred_boost)*100)
print(gb_acc)
# Gradient Boosting algorithms is the best algorithm for this dataset.

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_boost))

In [None]:
d=clf.predict(test)
da_gb=pd.DataFrame(d)
da_gb

In [None]:
da_gb.columns = ['transported']
da_gb['PassengerId'] = id
da_gb['kk'] = da_gb['transported']
da_gb = da_gb.drop(['transported'],axis=1)
da_gb.columns = ['PassengerId','transported']
da_gb['transported'] = da_gb['transported'].apply(lambda x:True if x==1 else False)
da_gb = da_gb.set_index('PassengerId')
da_gb

# AdaBoostClassifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
X,Y = make_classification(n_samples=100, n_features=12, n_informative=2,
                          n_redundant=0, n_repeated=0, random_state=102)
clf_ada = AdaBoostClassifier(n_estimators=100, random_state=0, algorithm='SAMME')
clf_ada.fit(x_train, y_train)

In [None]:
y_pred_adt = clf_ada.predict(x_test)
y_pred_adt

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_adt)
print(cm)
ad_acc=round(accuracy_score(y_test, y_pred_adt)*100)
print(ad_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_adt))

In [None]:
d=clf_ada.predict(test)
da_ab=pd.DataFrame(d)
da_ab

In [None]:
da_ab.columns = ['transported']
da_ab['PassengerId'] = id
da_ab['kk'] = da_ab['transported']
da_ab = da_ab.drop(['transported'],axis=1)
da_ab.columns = ['PassengerId','transported']
da_ab['transported'] = da_ab['transported'].apply(lambda x:True if x==1 else False)
da_ab = da_ab.set_index('PassengerId')
da_ab

# lightgbm model

In [None]:
 import lightgbm as lgb
clf_lgb = lgb.LGBMClassifier()
clf_lgb.fit(x_train, y_train)
y_pred_lgbm = clf_lgb.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_lgbm)
print(cm)
lgb_acc=round(accuracy_score(y_test, y_pred_lgbm)*100)
print(lgb_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_lgbm))

In [None]:
d=clf_lgb.predict(test)
da_lgb=pd.DataFrame(d)
da_lgb

In [None]:
da_lgb.columns = ['transported']
da_lgb['PassengerId'] = id
da_lgb['kk'] = da_lgb['transported']
da_lgb = da_lgb.drop(['transported'],axis=1)
da_lgb.columns = ['PassengerId','transported']
da_lgb['transported'] = da_lgb['transported'].apply(lambda x:True if x==1 else False)
da_lgb = da_lgb.set_index('PassengerId')
da_lgb

# xgb classifier

In [None]:
from xgboost import XGBClassifier
xg_clf = XGBClassifier(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = 5, alpha = 10, n_estimators = 10)
xg_clf.fit(x_train,y_train)
y_pred_xgb = xg_clf.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_xgb)
print(cm)
xgb_acc=round(accuracy_score(y_test, y_pred_xgb)*100)
print(xgb_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_xgb))

In [None]:
d=xg_clf.predict(test)
da_xgb=pd.DataFrame(d)
da_xgb

In [None]:
da_xgb.columns = ['transported']
da_xgb['PassengerId'] = id
da_xgb['kk'] = da_xgb['transported']
da_xgb = da_xgb.drop(['transported'],axis=1)
da_xgb.columns = ['PassengerId','transported']
da_xgb['transported'] = da_xgb['transported'].apply(lambda x:True if x==1 else False)
da_xgb = da_xgb.set_index('PassengerId')
da_xgb

# NuSVC

In [None]:
from sklearn.svm import NuSVC
nsvc=NuSVC()
nsvc.fit(x_train, y_train)
y_pred_nsvc = nsvc.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_nsvc)
print(cm)
nsvc_acc=round(accuracy_score(y_test, y_pred_nsvc)*100)
print(nsvc_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_nsvc))

In [None]:
d=nsvc.predict(test)
da_nsvc=pd.DataFrame(d)
da_nsvc

In [None]:
da_nsvc.columns = ['transported']
da_nsvc['PassengerId'] = id
da_nsvc['kk'] = da_nsvc['transported']
da_nsvc = da_nsvc.drop(['transported'],axis=1)
da_nsvc.columns = ['PassengerId','transported']
da_nsvc['transported'] = da_nsvc['transported'].apply(lambda x:True if x==1 else False)
da_nsvc = da_nsvc.set_index('PassengerId')
da_nsvc

# Extra tree classifier

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
et_clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
et_clf.fit(x_train, y_train)
y_pred_et = et_clf.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_et)
print(cm)
et_acc=round(accuracy_score(y_test, y_pred_et)*100)
print(et_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_et))

In [None]:
d=et_clf.predict(test)
da_et=pd.DataFrame(d)
da_et

In [None]:
da_et.columns = ['transported']
da_et['PassengerId'] = id
da_et['kk'] = da_et['transported']
da_et = da_et.drop(['transported'],axis=1)
da_et.columns = ['PassengerId','transported']
da_et['transported'] = da_et['transported'].apply(lambda x:True if x==1 else False)
da_et = da_et.set_index('PassengerId')
da_et

# bagging classifier

In [None]:
from sklearn.ensemble import BaggingClassifier
bg_clf = BaggingClassifier()
bg_clf.fit(x_train,y_train)
y_pred_bg=bg_clf.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_bg)
print(cm)
bg_acc=round(accuracy_score(y_test, y_pred_bg)*100)
print(bg_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_bg))

In [None]:
d=bg_clf.predict(test)
da_bg=pd.DataFrame(d)
da_bg

In [None]:
da_bg.columns = ['transported']
da_bg['PassengerId'] = id
da_bg['kk'] = da_bg['transported']
da_bg = da_bg.drop(['transported'],axis=1)
da_bg.columns = ['PassengerId','transported']
da_bg['transported'] = da_bg['transported'].apply(lambda x:True if x==1 else False)
da_bg = da_bg.set_index('PassengerId')
da_bg

# label propagation

In [None]:
from sklearn.semi_supervised import LabelPropagation
lp = LabelPropagation()
lp.fit(x_train,y_train)
y_pred_lp=lp.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_lp)
print(cm)
lp_acc=round(accuracy_score(y_test, y_pred_lp)*100)
print(lp_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_lp))

In [None]:
d=lp.predict(test)
da_lp=pd.DataFrame(d)
da_lp

In [None]:
da_lp.columns = ['transported']
da_lp['PassengerId'] = id
da_lp['kk'] = da_lp['transported']
da_lp = da_lp.drop(['transported'],axis=1)
da_lp.columns = ['PassengerId','transported']
da_lp['transported'] = da_lp['transported'].apply(lambda x:True if x==1 else False)
da_lp = da_lp.set_index('PassengerId')
da_lp

# label spreading

In [None]:
from sklearn.semi_supervised import LabelSpreading
sp = LabelSpreading()
sp.fit(x_train,y_train)
y_pred_sp=sp.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_sp)
print(cm)
sp_acc=round(accuracy_score(y_test, y_pred_sp)*100)
print(sp_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_sp))

In [None]:
d=sp.predict(test)
da_sp=pd.DataFrame(d)
da_sp

In [None]:
da_sp.columns = ['transported']
da_sp['PassengerId'] = id
da_sp['kk'] = da_sp['transported']
da_sp = da_sp.drop(['transported'],axis=1)
da_sp.columns = ['PassengerId','transported']
da_sp['transported'] = da_sp['transported'].apply(lambda x:True if x==1 else False)
da_sp = da_sp.set_index('PassengerId')
da_sp

# calibrated classifier cv

In [None]:
from sklearn.calibration import CalibratedClassifierCV
base_clf = GaussianNB()
calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)
calibrated_clf.fit(x_train, y_train)
y_pred_clf_cv=calibrated_clf.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_clf_cv)
print(cm)
clf_acc=round(accuracy_score(y_test, y_pred_clf_cv)*100)
print(clf_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_clf_cv))

In [None]:
d=calibrated_clf.predict(test)
da_cv=pd.DataFrame(d)
da_cv

In [None]:
da_cv.columns = ['transported']
da_cv['PassengerId'] = id
da_cv['kk'] = da_cv['transported']
da_cv = da_cv.drop(['transported'],axis=1)
da_cv.columns = ['PassengerId','transported']
da_cv['transported'] = da_cv['transported'].apply(lambda x:True if x==1 else False)
da_cv = da_cv.set_index('PassengerId')
da_cv

# SGD classifier

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
clf_sgd = make_pipeline(StandardScaler(),SGDClassifier(max_iter=1000, tol=1e-3))
clf_sgd.fit(x_train, y_train)
y_pred_sgd=clf_sgd.predict(x_test)
    

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_sgd)
print(cm)
sgd_acc=round(accuracy_score(y_test, y_pred_sgd)*100)
print(sgd_acc)

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_sgd))

In [None]:
d=clf_sgd.predict(test)
da_sgb=pd.DataFrame(d)
da_sgb

In [None]:
da_sgb.columns = ['transported']
da_sgb['PassengerId'] = id
da_sgb['kk'] = da_sgb['transported']
da_sgb = da_sgb.drop(['transported'],axis=1)
da_sgb.columns = ['PassengerId','transported']
da_sgb['transported'] = da_sgb['transported'].apply(lambda x:True if x==1 else False)
da_sgb = da_sgb.set_index('PassengerId')
da_sgb

# Accuracy of models

In [None]:
import numpy as np

plt.rcParams["figure.figsize"] = [27, 10]
plt.rcParams["figure.autolayout"] = True

x=['RandomForest','DecisionTree','KNN','LogisticRegression','GaussianNB','svc','GradientBoosting','AdaBoost','LGBMClassifier','XGBClassifier','NuSVC','Extra tree classifier','BaggingClassifier','LabelPropagation','LabelSpreading','CalibratedClassifierCV','SGDClassifier']
y=[rf_acc,dt_acc,knn_acc,lr_acc,gnb_acc,svc_acc,gb_acc,ad_acc,lgb_acc,xgb_acc,nsvc_acc,et_acc,bg_acc,lp_acc,sp_acc,clf_acc,sgd_acc]

width = 0.75
fig, ax = plt.subplots()

pps = ax.bar(x, y, width, align='center')

for p in pps:
   height = p.get_height()
   ax.text(x=p.get_x() + p.get_width() / 2, y=height+.20,
      s="{}%".format(height),
      ha='center')
plt.title('Accuracy of models')
plt.show()

In [None]:
data = {'Algorithms': ['RandomForest','DecisionTree','KNN','LogisticRegression','GaussianNB','svc','GradientBoosting','AdaBoost','LGBMClassifier','XGBClassifier','NuSVC','Extra tree classifier','BaggingClassifier','LabelPropagation','LabelSpreading','CalibratedClassifierCV','SGDClassifier'],
        'Accuracies(%)': [rf_acc,dt_acc,knn_acc,lr_acc,gnb_acc,svc_acc,gb_acc,ad_acc,lgb_acc,xgb_acc,nsvc_acc,et_acc,bg_acc,lp_acc,sp_acc,clf_acc,sgd_acc]}

In [None]:
df = pd.DataFrame(data)
df