In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import plotly.express as px
import plotly.graph_objects as go

In [None]:
train = pd.read_csv(r'../input/spaceship-titanic/train.csv')
train.head()

In [None]:
test = pd.read_csv(r'../input/spaceship-titanic/test.csv')
test.head()

In [None]:
sub = pd.read_csv(r'../input/spaceship-titanic/sample_submission.csv')
sub.head()

In [None]:
print(f'train set have {train.shape[0]} rows and {train.shape[1]} columns.')
print(f'test set have {test.shape[0]} rows and {test.shape[1]} columns.') 
print(f'sample_submission set have {sub.shape[0]} rows and {sub.shape[1]} columns.') 

In [None]:
train.dtypes

In [None]:
train.nunique()

In [None]:
train.drop(['PassengerId','Name','Cabin'],axis=1,inplace=True)
test.drop(['PassengerId','Name','Cabin'],axis=1,inplace=True)

In [None]:
train.isnull().sum()

In [None]:
train.describe().T

In [None]:
train['HomePlanet'].replace(np.nan,'missing',inplace=True)
train['CryoSleep'].replace(np.nan,'missing',inplace=True)
train['Destination'].replace(np.nan,'missing',inplace=True)
train['VIP'].replace(np.nan,'missing',inplace=True)

test['HomePlanet'].replace(np.nan,'missing',inplace=True)
test['CryoSleep'].replace(np.nan,'missing',inplace=True)
test['Destination'].replace(np.nan,'missing',inplace=True)
test['VIP'].replace(np.nan,'missing',inplace=True)

In [None]:
train['Age'].mode()

In [None]:
train['RoomService'].fillna(train['RoomService'].mean(),inplace=True)
train['FoodCourt'].fillna(train['FoodCourt'].mean(),inplace=True)
train['ShoppingMall'].fillna(train['ShoppingMall'].mean(),inplace=True)
train['Spa'].fillna(train['Spa'].mean(),inplace=True)
train['VRDeck'].fillna(train['VRDeck'].mean(),inplace=True)
train['Age'].replace(np.nan,24.0,inplace=True)

test['RoomService'].fillna(test['RoomService'].mean(),inplace=True)
test['FoodCourt'].fillna(test['FoodCourt'].mean(),inplace=True)
test['ShoppingMall'].fillna(test['ShoppingMall'].mean(),inplace=True)
test['Spa'].fillna(test['Spa'].mean(),inplace=True)
test['VRDeck'].fillna(test['VRDeck'].mean(),inplace=True)
test['Age'].replace(np.nan,24.0,inplace=True)

In [None]:
train.isnull().sum()

## Data visualization:
#### **categorical features:**

In [None]:
fig = go.Figure(data=[go.Pie(labels=train.HomePlanet, hole=.4)])
fig.add_annotation(text='HomePlanet',
                   x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')
fig.update_traces(hoverinfo='label+percent+value',
                  marker=dict(colors=['darkorange','blue'], line=dict(color='#000000', width=2)))
fig.update_layout(
    font_family='monospace',
    title=dict(text='HomePlanet',x=0.47,y=0.98,
               font=dict(color='black',size=20)),
    legend=dict(orientation='v',traceorder='reversed'),
    hoverlabel=dict(bgcolor='white'))
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

In [None]:
fig = go.Figure(data=[go.Pie(labels=train.CryoSleep, hole=.4)])
fig.add_annotation(text='CryoSleep',
                   x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')
fig.update_traces(hoverinfo='label+percent+value',
                  marker=dict(colors=['darkorange','blue'], line=dict(color='#000000', width=2)))
fig.update_layout(
    font_family='monospace',
    title=dict(text='CryoSleep',x=0.47,y=0.98,
               font=dict(color='black',size=20)),
    legend=dict(orientation='v',traceorder='reversed'),
    hoverlabel=dict(bgcolor='white'))
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

In [None]:
fig = go.Figure(data=[go.Pie(labels=train.Destination, hole=.4)])
fig.add_annotation(text='Destination',
                   x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')
fig.update_traces(hoverinfo='label+percent+value',
                  marker=dict(colors=['darkorange','blue'], line=dict(color='#000000', width=2)))
fig.update_layout(
    font_family='monospace',
    title=dict(text='Destination',x=0.47,y=0.98,
               font=dict(color='black',size=20)),
    legend=dict(orientation='v',traceorder='reversed'),
    hoverlabel=dict(bgcolor='white'))
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

In [None]:
fig = go.Figure(data=[go.Pie(labels=train.VIP, hole=.4)])
fig.add_annotation(text='VIP',
                   x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')
fig.update_traces(hoverinfo='label+percent+value',
                  marker=dict(colors=['darkorange','blue'], line=dict(color='#000000', width=2)))
fig.update_layout(
    font_family='monospace',
    title=dict(text='VIP',x=0.47,y=0.98,
               font=dict(color='black',size=20)),
    legend=dict(orientation='v',traceorder='reversed'),
    hoverlabel=dict(bgcolor='white'))
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

#### **numerical features:**

In [None]:
fig = px.histogram(train,x='RoomService',template='plotly_dark',
                  marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=['#FF6692'],
                  barmode='group',histfunc='count')

fig.update_layout(
    font_family='monospace',
    title=dict(text='RoomService feature Distribution',x=0.53,y=0.95),
    xaxis_title_text='RoomService',
    yaxis_title_text='Count',
    bargap=0.3,
)
fig.show()

In [None]:
fig = px.histogram(train,x='FoodCourt',template='plotly_dark',
                  marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=['#FECB52'],
                  barmode='group',histfunc='count')

fig.update_layout(
    font_family='monospace',
    title=dict(text='FoodCourt feature Distribution',x=0.53,y=0.95),
    xaxis_title_text='FoodCourt',
    yaxis_title_text='Count',
    bargap=0.3,
)
fig.show()

In [None]:
fig = px.histogram(train,x='ShoppingMall',template='plotly_dark',
                  marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=['#FF97FF'],
                  barmode='group',histfunc='count')

fig.update_layout(
    font_family='monospace',
    title=dict(text='ShoppingMall feature Distribution',x=0.53,y=0.95),
    xaxis_title_text='ShoppingMall',
    yaxis_title_text='Count',
    bargap=0.3,
)
fig.show()

In [None]:
fig = px.histogram(train,x='Spa',template='plotly_dark',
                  marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=['#636EFA'],
                  barmode='group',histfunc='count')

fig.update_layout(
    font_family='monospace',
    title=dict(text='Spa feature Distribution',x=0.53,y=0.95),
    xaxis_title_text='Spa',
    yaxis_title_text='Count',
    bargap=0.3,
)
fig.show()

In [None]:
fig = px.histogram(train,x='VRDeck',template='plotly_dark',
                  marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=['#EF553B'],
                  barmode='group',histfunc='count')

fig.update_layout(
    font_family='monospace',
    title=dict(text='VRDeck feature Distribution',x=0.53,y=0.95),
    xaxis_title_text='VRDeck',
    yaxis_title_text='Count',
    bargap=0.3,
)
fig.show()

### Data pre-processing:

In [None]:
cat = ['HomePlanet','CryoSleep','Destination','VIP']
train[cat] = train[cat].astype(str)
test[cat] = test[cat].astype(str)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for i in cat:
    train[i] = le.fit_transform(train[i])
    test[i] = le.transform(test[i])

In [None]:
train['Transported'] = train['Transported'].astype(str)
train['Transported'] = train['Transported'].replace({'False':0,'True':1})

In [None]:
train.head()

In [None]:
plt.figure(figsize = (16,10))
sns.heatmap(train.corr(), annot = True, cmap="YlGnBu")
plt.show()

### LGBMClassifier

In [None]:
y = train['Transported']
X = train.drop('Transported',axis=1)

In [None]:
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

folds = KFold(n_splits=5, shuffle=True)

for fold, (trn_idx, val_idx) in enumerate(folds.split(X)):
    print(f"Fold: {fold}")
    X_train, X_test = X.iloc[trn_idx], X.iloc[val_idx]
    y_train, y_test = y.iloc[trn_idx], y.iloc[val_idx]

    model = LGBMClassifier(n_estimators=2022,learning_rate=0.1)
   
    model.fit(X_train, y_train,
              eval_set=[(X_test, y_test)],
                early_stopping_rounds=400,
                verbose=False)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    print(f" accuracy_score: {acc}")
    print("-"*50)

In [None]:
sub['Transported'] = model.predict(test)
sub['Transported'] = sub['Transported'].replace({0:'False',1:'True'})
sub.to_csv(f'lgb.csv',index = False)

### Pycaret

In [None]:
! pip install pycaret

In [None]:
from pycaret.classification import setup, compare_models, blend_models, finalize_model, predict_model

In [None]:
def pycaret_model(train, target,test, n_select, fold,opt):
    print('Setup Your Data....')
    setup(data=train,
          target=target,
          silent= True,use_gpu = True)
  
    print('Comparing Models....')
    best = compare_models(sort = opt,n_select=n_select, fold = fold)
    
    print('Blending Models....')
    blended = blend_models(estimator_list= best, fold=fold, optimize=opt)
    pred_holdout = predict_model(blended)
    
    print('Finallizing Models....')
    final_model = finalize_model(blended)
    print('Done...!!!')

    pred_test = predict_model(final_model, test)
    pred = pred_test['Label']
    
    return pred

In [None]:
result = pycaret_model(train,'Transported',test, 3, 5,'Accuracy')

In [None]:
sub['Transported'] = result
sub['Transported'] = sub['Transported'].replace({0:'False',1:'True'})
sub.to_csv('pycaret_pred.csv',index=False)

<div class="alert alert-info">
<h4>If you like this notebook, please upvote it! 
     Thank you! :)</h4>
</div>