In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import models,layers
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')
train_df=pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')

In [None]:
train_df.head()

In [None]:
train_df.shape

In [None]:
train_df.describe()

In [None]:
train_df.isnull().sum()

In [None]:
#We have lots of features which have missing values.

In [None]:
train_df.isnull().sum().sum()

In [None]:
train_df.isnull().sum()/train_df.shape[0]*100

In [None]:
train_df['CryoSleep'].value_counts()

In [None]:
train_df['HomePlanet'] = train_df['HomePlanet'].fillna(train_df['HomePlanet'].mode()[0])
train_df['CryoSleep'] = train_df['CryoSleep'].fillna(train_df['CryoSleep'].mode()[0])
train_df['Destination']=train_df['Destination'].fillna(train_df['Destination'].mode()[0])
train_df['Age']=train_df['Age'].fillna(train_df['Age'].median())
train_df['VIP']=train_df['VIP'].fillna(train_df['VIP'].mode()[0])
train_df['RoomService']=train_df['RoomService'].fillna(train_df['RoomService'].median())
train_df['FoodCourt']=train_df['FoodCourt'].fillna(train_df['FoodCourt'].median())
train_df['ShoppingMall']=train_df['ShoppingMall'].fillna(train_df['ShoppingMall'].median())
train_df['Spa']=train_df['Spa'].fillna(train_df['Spa'].median())
train_df['VRDeck']=train_df['VRDeck'].fillna(train_df['VRDeck'].median())

In [None]:
train_df.shape

In [None]:
train_df = train_df.drop(columns=['PassengerId','Cabin','Name'],axis=1)
train_df.head(1)

In [None]:
train_df.isnull().sum().sum()

In [None]:
train_df.head(1)

In [None]:
sns.countplot(train_df['HomePlanet'])
plt.show()

In [None]:
sns.countplot(train_df.CryoSleep)
plt.show()

In [None]:
train_df['Destination'].nunique()

In [None]:
sns.countplot(train_df['Destination'])
plt.show()

In [None]:
sns.distplot(train_df['Age'],color='blue')
plt.show()

In [None]:
sns.countplot(train_df['VIP'])
plt.show()

In [None]:
sns.distplot(train_df['RoomService'])
plt.show()

In [None]:
sns.countplot(train_df['Transported'])
plt.show()                       

In [None]:
train_cols=['HomePlanet','CryoSleep','Destination','VIP','Transported']
le=LabelEncoder()
for col in train_cols:
    train_df[col]=le.fit_transform(train_df[col])

In [None]:
train_df.head(3)

In [None]:
train_df.corr()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(train_df.corr(),annot=True,cmap='BuPu')
plt.show()

In [None]:
x=train_df.drop(columns='Transported',axis=1)
y=train_df['Transported']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=0)

In [None]:
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

In [None]:
x_train

In [None]:
y_test

In [None]:
mmc=MinMaxScaler(feature_range=(0,1))
x_train=mmc.fit_transform(x_train)
x_test=mmc.transform(x_test)

In [None]:
x_train

In [None]:
x_test

In [None]:
rfg=RandomForestClassifier(n_estimators=200,criterion='entropy')
rfg.fit(x_train,y_train)

In [None]:
y_pred1=rfg.predict(x_test)

In [None]:
y_pred1[:5]

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix
accuracy_score(y_test,y_pred1)

In [None]:
cm=confusion_matrix(y_test,y_pred1)
cm

In [None]:
sns.heatmap(cm,annot=True,cmap='viridis')
plt.show()

In [None]:
xgb=XGBClassifier(n_estimators=200,learning_rate=0.001)
xgb.fit(x_train,y_train)

In [None]:
y_pred2=xgb.predict(x_test)
y_pred2[:5]

In [None]:
accuracy_score(y_test,y_pred2)

In [None]:
#Applying neural network

In [None]:
ann=models.Sequential([
    
    layers.Dense(101,activation='relu',input_shape=(10,)),
    layers.Dense(72,activation='relu'),
    layers.Dense(49,activation='relu'),
    layers.Dense(31,activation='relu'),
    layers.Dense(7,activation='relu'),
    layers.Dense(1,activation='sigmoid')
    
                    ])

In [None]:
ann.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])
ann.fit(x_train,y_train,batch_size=200,epochs=19)

In [None]:
y_pred3=ann.predict(x_test)

In [None]:
y_pred3[:5]

In [None]:
y_pred3=(y_pred3>0.5)
y_pred3[:5]

In [None]:
y_pred3=le.fit_transform(y_pred3)

In [None]:
y_pred3

In [None]:
accuracy_score(y_test,y_pred3)


In [None]:
test_df = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

In [None]:
test_df.head()

In [None]:
test=test_df.copy()

In [None]:
test_df=test_df.drop(columns=['PassengerId','Cabin','Name'],axis=1)
test_df.head(1)

In [None]:
test_df['HomePlanet']=test_df['HomePlanet'].fillna(test_df['HomePlanet'].mode()[0])
test_df['CryoSleep']=test_df['CryoSleep'].fillna(test_df['CryoSleep'].mode()[0])
test_df['Destination']=test_df['Destination'].fillna(test_df['Destination'].mode()[0])
test_df['Age']=test_df['Age'].fillna(test_df['Age'].median())
test_df['VIP']=test_df['VIP'].fillna(test_df['VIP'].mode()[0])
test_df['RoomService']=test_df['RoomService'].fillna(test_df['RoomService'].median())
test_df['FoodCourt']=test_df['FoodCourt'].fillna(test_df['FoodCourt'].median())
test_df['ShoppingMall']=test_df['ShoppingMall'].fillna(test_df['ShoppingMall'].median())
test_df['Spa']=test_df['Spa'].fillna(test_df['Spa'].median())
test_df['VRDeck']=test_df['VRDeck'].fillna(test_df['VRDeck'].median())

In [None]:
test_df.head()

In [None]:
test_cols=['HomePlanet','CryoSleep','Destination','VIP']
for col in test_cols:
    test_df[col]=le.fit_transform(test_df[col])

In [None]:
test_df.head()

In [None]:
test_df=mmc.transform(test_df)

In [None]:
test_df

In [None]:
x=mmc.fit_transform(x)
x

In [None]:
nn=models.Sequential([
    
    layers.Dense(1010,activation='relu',input_shape=(10,)),
    layers.Dense(720,activation='relu'),
    layers.Dense(490,activation='relu'),
    layers.Dense(310,activation='relu'),
    layers.Dense(79,activation='relu'),
    layers.Dense(23,activation='relu'),
    layers.Dense(1,activation='sigmoid')
    
                    ])

In [None]:
nn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
nn.fit(x,y,batch_size=50,epochs=50)

In [None]:
y_pred=nn.predict(test_df)
y_pred[:10]

In [None]:
y_pred=(y_pred>0.5)
y_pred[:10]

In [None]:
y_pred=y_pred.reshape(-1,)
y_pred[:10]

In [None]:
submission=pd.DataFrame({'PassengerId':test['PassengerId'],'Transported':y_pred})

In [None]:
submission.to_csv('Titanic_spaceship_pred.csv',index=None)