In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install pycaret

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder
from pycaret.classification import *

In [None]:
train = pd.read_csv('../input/tabular-playground-series-apr-2021/train.csv')
train.head()

In [None]:
test = pd.read_csv('../input/tabular-playground-series-apr-2021/test.csv')
test.head()

In [None]:
train_copy = train.copy()

In [None]:
train.drop('Survived', axis=1, inplace = True)

In [None]:
data_df = pd.concat([train,test],ignore_index=True)
data_df.head()

In [None]:
data_df.tail()

In [None]:
data_df[['First Name', 'Last Name']] = data_df.Name.str.split(',', expand=True,)

In [None]:
data_df.drop(['Name','First Name'], axis=1, inplace = True)

In [None]:
data_df.head()

In [None]:
data_df['Pclass'].value_counts()

In [None]:
data_age_median = data_df['Age'].median()

In [None]:
data_df['Age'] = data_df['Age'].fillna(data_age_median)

In [None]:
data_fare_median = data_df['Fare'].median()

In [None]:
data_df['Fare'] = data_df['Fare'].fillna(data_fare_median)

In [None]:
data_embarked_mode = data_df['Embarked'].mode()[0]

In [None]:
data_df['Embarked'] = data_df['Embarked'].fillna(data_embarked_mode)

In [None]:
data_df['Ticket'] = data_df['Ticket'].fillna('X')

In [None]:
data_df['Cabin'] = data_df['Cabin'].fillna('X')

In [None]:
data_df['Pclass'] = data_df['Pclass'].astype('object')

In [None]:
data_df.head()

In [None]:
scaler = StandardScaler()
label = LabelEncoder()

In [None]:
data_onehot = pd.get_dummies(data_df[['Sex','Embarked']])
data_onehot.head()

In [None]:
data_df = pd.concat([data_df,data_onehot], axis=1)
data_df.head()

In [None]:
data_df.drop(['Sex','Embarked'], axis=1, inplace = True)

In [None]:
data_df['Ticket'] = label.fit_transform(data_df['Ticket'])

In [None]:
data_df['Cabin'] = label.fit_transform(data_df['Cabin'])

In [None]:
data_df['Last Name'] = label.fit_transform(data_df['Last Name'])

In [None]:
data_df.head()

In [None]:
train_df = data_df[0:100000]
test_df = data_df[100000:]

In [None]:
train_df = pd.concat([train_df,train_copy['Survived']], axis=1)

In [None]:
train_df.tail()

In [None]:
data = train_df.sample(frac=0.95, random_state=786)
data_unseen = train_df.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
titanic_classification = setup(data= data, target='Survived', numeric_features =['SibSp','Parch'], ordinal_features = {'Pclass': ['1','2','3']},
                              ignore_features= ['PassengerId'],  train_size=0.7, normalize=True, transformation= True, fix_imbalance=True, use_gpu=True, 
                               session_id=123, log_experiment=True, log_plots=True, log_profile=True, log_data=True, profile=True, silent=True)

In [None]:
lightgbm = create_model('lightgbm')

In [None]:
tuned_lightgbm = tune_model(lightgbm)

In [None]:
plot_model(lightgbm, plot='feature')

In [None]:
plot_model(lightgbm, plot = 'confusion_matrix')

In [None]:
evaluate_model(lightgbm)

In [None]:
predict_model(lightgbm)

In [None]:
final_lightgbm = finalize_model(lightgbm)

In [None]:
predict_model(final_lightgbm)

In [None]:
unseen_predictions = predict_model(final_lightgbm, data=data_unseen)
unseen_predictions.head()

In [None]:
from pycaret.utils import check_metric
check_metric(unseen_predictions['Survived'], unseen_predictions['Label'], metric = 'Accuracy')

In [None]:
save_model(final_lightgbm,'Final lightgbm Model 16Apr2021')

In [None]:
saved_final_lightgbm = load_model('Final lightgbm Model 16Apr2021')

In [None]:
submission= predict_model(saved_final_lightgbm, data=test_df)
submission.head()

In [None]:
submission_copy = submission.copy()

In [None]:
submission_copy.drop(['Pclass', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare',
       'Cabin', 'Last Name', 'Sex_female', 'Sex_male', 'Embarked_C',
       'Embarked_Q', 'Embarked_S', 'Score'], axis=1, inplace = True)

In [None]:
submission_copy.rename(columns={'Label':'Survived'},inplace = True)

In [None]:
submission_copy.head()

In [None]:
submission_copy.to_csv('submission.csv', index=False)