In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import spacy
import tensorflow as tf
sns.set(style='darkgrid', palette='husl')

In [2]:
#setting a seed for reproducability
SEED = 1002
def seed_everything(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed) 
    
seed_everything(SEED) 

In [3]:
#reading input data with pandas
train = pd.read_csv("../input/titanic/train.csv")
test = pd.read_csv("../input/titanic/test.csv")

In [4]:
train.head()

In [5]:
train.info()

In [8]:
!pip install pycaret
from pycaret.classification import *

In [58]:
train["Age"].fillna(train["Age"].median(), inplace=True)
train["Fare"].fillna(train["Fare"].median(), inplace=True)

test["Age"].fillna(test["Age"].median(), inplace=True)
test["Fare"].fillna(test["Fare"].median(), inplace=True)

In [59]:
train = train.drop_duplicates(keep='first')
train = train.apply(lambda x: x.fillna(" "))
test = test.apply(lambda x: x.fillna(" "))

In [60]:
def take_first_element(text):
    return text[0]

In [61]:
train['Ticket'] = train['Ticket'].apply(lambda x: take_first_element(x))
test['Ticket'] = test['Ticket'].apply(lambda x: take_first_element(x))
train['Cabin'] = train['Cabin'].apply(lambda x: take_first_element(x))
test['Cabin'] = test['Cabin'].apply(lambda x: take_first_element(x))

In [62]:
experiment = setup(train, target ='Survived', session_id=42, experiment_name='kaggle-nlp1')

In [63]:
models()

In [64]:
models_list = list(models().index)

In [65]:
compare_models(sort="AUC")

In [111]:
my_model = create_model('gbc')

In [112]:
tuned_model = tune_model(my_model)

In [113]:
plot_model(tuned_model, plot = 'auc')

In [114]:
plot_model(tuned_model, plot = 'pr')

In [115]:
plot_model(tuned_model, plot='feature')

In [116]:
plot_model(tuned_model, plot = 'confusion_matrix')

In [117]:
evaluate_model(tuned_model)

In [118]:
final_model = finalize_model(tuned_model)

In [119]:
predict_model(final_model)

In [120]:
unseen_predictions = predict_model(final_model, data=test)
unseen_predictions.head()

In [121]:
test_result = np.array([1 if x >= 0.5 else 0 for x in unseen_predictions["Label"]])

In [124]:
test_result.sum()

In [123]:
test["Survived"] = test_result
csv_data = test[['PassengerId', 'Survived']]
csv_data.to_csv('final.csv', index=False)