**IMPORT NECESSARY LIBRARIES**

In [None]:
!pip install verstack

In [None]:
!git clone https://github.com/analokmaus/kuma_utils.git

In [None]:
!pip3 install impyute

In [None]:
import pandas as pd
import numpy as np
from verstack import NaNImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from kuma_utils.preprocessing.imputer import LGBMImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LinearRegression
from impyute.imputation.cs import mice
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')

In [None]:
Train=pd.read_csv('../input/spaceship-titanic/train.csv')
Test=pd.read_csv('../input/spaceship-titanic/test.csv')
Submission=pd.read_csv('../input/spaceship-titanic/sample_submission.csv')

In [None]:
Train.head()

**Split** categorical & numeric features 

In [None]:
cat_features=[x for x in Train.select_dtypes(include='object').columns]
col_num=[x for x in Test.columns if x not in cat_features]

In [None]:
#Power transforms are a family of parametric, monotonic transformations that are applied to make data more Gaussian-like.
scaler = preprocessing.PowerTransformer()
Train[col_num] = scaler.fit_transform(Train[col_num])
Test[col_num] = scaler.transform(Test[col_num])

In [None]:
Train[cat_features].nunique()

In [None]:
Train.Transported.isnull().sum()

In [None]:
class Preprocessing :
    
    def delete_columns(self,data):
        data.drop(['PassengerId','Name'],axis=1,inplace=True)
        return data
    
    def one_hot_encoding(self,data):
        data=pd.get_dummies(data)
        return data
    
    ##################################################
    #Famous techniques of imputing
    def verstack_nanimputer(self,data):
        x=data.copy()
        imp=NaNImputer()
        data = imp.impute(data)
        x = pd.DataFrame(data, columns=x.columns)
        return data
    
    def LGBM_Imputer(self,data):
        x=data.copy()
        lgbm_imtr = LGBMImputer(n_iter=500)
        data = lgbm_imtr.fit_transform(data)
        x = pd.DataFrame(data, columns=x.columns)
        return x
    
    def Mice_Imputer(self,data):
        x=data.copy()
        data = mice(data.values)
        x = pd.DataFrame(data, columns=x.columns)
        return x
    #################################################
    
    def Columns_repairer(self,data):
        data[['Deck','Num','Side']]=data['Cabin'].str.split('/',expand=True)
        data.drop('Cabin',axis=1,inplace=True)
        data['Num']=data['Num'].astype(float)
        data['VIP'].replace({True:1,False:0},inplace=True)
        data['CryoSleep'].replace({True:1,False:0},inplace=True)
        return data
    
    def feature_engineering(self,df):
        #Some feature engineering
        df['TotalSpend'] = df['RoomService'] + df['FoodCourt'] + df['ShoppingMall'] + df['Spa'] + df['VRDeck']
        df['PassengerGroup'] = df['PassengerId'].apply(lambda x: x.split('_')[0]).astype(int)
        df['Under_18']=(df['Age']<18).astype(int)
        df['Group'] = df['PassengerId'].apply(lambda x: x.split('_')[0]).astype(int)
        df['Group_size']=df['Group'].map(lambda x: df['Group'].value_counts()[x])
        df['Cabin_region3']=((df['Num']>=600) & (df['Num']<900)).astype(int)
        df['Cabin_region4']=((df['Num']>=900) & (df['Num']<1200)).astype(int)
        return df

preprocessing=Preprocessing()

In [None]:
Train=preprocessing.Columns_repairer(Train)
Test=preprocessing.Columns_repairer(Test)

In [None]:
Train=preprocessing.feature_engineering(Train)
Test=preprocessing.feature_engineering(Test)

In [None]:
Train=preprocessing.delete_columns(Train)
Test=preprocessing.delete_columns(Test)

In [None]:
Train=preprocessing.one_hot_encoding(Train)
Test=preprocessing.one_hot_encoding(Test)

In [None]:
def undummify(df, prefix_sep="_"):
        cols2collapse = {
            item.split(prefix_sep)[0]: (prefix_sep in item) for item in df.columns}
        series_list = []
        for col, needs_to_collapse in cols2collapse.items():
            if needs_to_collapse:
                undummified = (
                    df.filter(like=col)
                    .idxmax(axis=1)
                    .apply(lambda x: x.split(prefix_sep, maxsplit=1)[1])
                    .rename(col)
                )
                series_list.append(undummified)
            else:
                series_list.append(df[col])
        undummified_df = pd.concat(series_list, axis=1)
        return undummified_df

In [None]:
Train=preprocessing.verstack_nanimputer(Train)
Test=preprocessing.verstack_nanimputer(Test)

In [None]:
HomePlanet=['HomePlanet_Europa','HomePlanet_Mars','HomePlanet_Earth']
Deck=['Deck_B','Deck_C','Deck_D','Deck_E','Deck_F','Deck_G','Deck_T','Deck_A']
Destination=['Destination_55 Cancri e', 'Destination_PSO J318.5-22', 'Destination_TRAPPIST-1e']


Train['Deck']=undummify(Train[Deck])
Train['Destination']=undummify(Train[Destination])
Train['HomePlanet']=undummify(Train[HomePlanet])
Train.drop(HomePlanet+Deck+Destination+['Side_S'],axis=1,inplace=True)

Test['Deck']=undummify(Test[Deck])
Test['Destination']=undummify(Test[Destination])
Test['HomePlanet']=undummify(Test[HomePlanet])
Test.drop(HomePlanet+Deck+Destination+['Side_S'],axis=1,inplace=True)

In [None]:
Train['Destination'].replace({'TRAPPIST-1e':1, 'PSO J318.5-22':0,'55 Cancri e':2},inplace=True)
Test['Destination'].replace({'TRAPPIST-1e':1, 'PSO J318.5-22':0,'55 Cancri e':2},inplace=True)

Train['HomePlanet'].replace({'Europa':1, 'Mars':0,'Earth':2},inplace=True)
Test['HomePlanet'].replace({'Europa':1, 'Mars':0,'Earth':2},inplace=True)

Train['Deck'].replace({'B':1, 'F':2, 'G':3, 'E':4, 'D':5, 'C':6, 'T':7,'A':0},inplace=True)
Test['Deck'].replace({'B':1, 'F':2, 'G':3, 'E':4, 'D':5, 'C':6, 'T':7,'A':0},inplace=True)

**Feature Correlation**

In [None]:
plt.figure(figsize=(30,30))
sns.heatmap(Train.corr(),annot=True);

**Split Data & Traget**

In [None]:
y=Train.pop('Transported')
X=Train

**Feature Importance With XGBClassifier**

In [None]:
light=xgb.XGBClassifier(metrics='accuracy')
light.fit(X,y)
importance = light.feature_importances_
results = pd.DataFrame({'Features': X.columns,
                        'Importances': importance})
results.sort_values(by='Importances', inplace=True)
plt.figure(figsize=(30,30))
ax = plt.barh(results['Features'], results['Importances'])
plt.xlabel('Importance percentages')
plt.show()

**Feature Selection With XGBClassifier**

In [None]:
import xgboost as xgb
from sklearn.feature_selection import RFE
lgb=xgb.XGBClassifier()
selector = RFE(lgb, n_features_to_select=13, step=1)
selector = selector.fit(X, y)
rank=selector.ranking_
feature_rank=pd.DataFrame(columns=['Feature','Rank'])
feature_rank['Feature']=X.columns
feature_rank['Rank']=rank
features=feature_rank[feature_rank['Rank']==1]
features=features['Feature'].tolist()
features

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42,shuffle=True, stratify=y)

**Lets take a look on the perfermance of  XGBClassifier with our prepared data:**

In [None]:
model=xgb.XGBClassifier()
model.fit(X_train,y_train)
model.score(X_test,y_test)

# flaml :

In [None]:
!pip install flaml

In [None]:
from flaml import AutoML
automl = AutoML()

In [None]:
automl.fit(X, y, task="classification",metric='accuracy',time_budget=500)

In [None]:
Submission['Transported']=automl.predict(Test)
Submission.to_csv('./submission.csv',index=False)

**Make a Download Link :**

In [None]:
import base64
from IPython.display import HTML

def create_download_link(df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)
create_download_link(Submission)

# THANK YOU ! PLEASE UPVOTE MY NOTEBOOK , IF YOU FIND IT INTERRESTING