In [None]:
import os
import sys
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_df = pd.read_csv('/kaggle/input/titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/titanic/test.csv')
gender_df = pd.read_csv('/kaggle/input/titanic/gender_submission.csv')
passenger_id = test_df.loc[:, 'PassengerId']

In [None]:
display(train_df.head(), test_df.head(), gender_df.head())

In [None]:
not_need_columns = ['PassengerId', 'Name', 'Cabin', 'Ticket']
train_df = train_df.drop(axis=1, columns=not_need_columns)
test_df = test_df.drop(axis=1, columns=not_need_columns)

In [None]:
display(train_df.head(), test_df.head()) 

In [None]:
class BaseLine:
    def __init__(self):
        self.le = LabelEncoder() # Changing the categorical features into numerical features
        
    def transform_data(self, df):
        return self.le.fit_transform(df)
    
    def fillna_mean(self, df):
        return df.fillna(value=df.mean())


class BaseModel(BaseLine):
    def __init__(self):
        super().__init__()
    
    def train_model_and_predict(self, model, X_train,
                                y_train, X_test):
        model.fit(X_train, y_train)
        return model.predict(X_test)

In [None]:
bm = BaseModel()
categorical_cols = ['Sex', 'Embarked']
for col in categorical_cols:
    train_df[col] = bm.transform_data(train_df[col])
    test_df[col] = bm.transform_data(test_df[col])
train_df['Age'] = bm.fillna_mean(train_df['Age'])
test_df['Age'] = bm.fillna_mean(test_df['Age'])
test_df['Fare'] = bm.fillna_mean(test_df['Fare'])

In [None]:
train_df.info(), test_df.info()

In [None]:
all_y_train = train_df.loc[:, ['Survived']]
all_X_train = train_df.drop(axis=1, columns=['Survived'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(all_X_train, all_y_train,
                                                    random_state=42)

In [None]:
y_pred = bm.train_model_and_predict(AdaBoostClassifier(), X_train, y_train, X_test)
accuracy_score(y_true=y_test, y_pred=y_pred)

In [None]:
y_pred = bm.train_model_and_predict(RandomForestClassifier(), X_train, y_train, X_test)
accuracy_score(y_pred, y_test)

In [None]:
y_pred = bm.train_model_and_predict(DecisionTreeClassifier(), X_train, y_train, X_test)
accuracy_score(y_pred, y_test)

In [None]:
result_pred = bm.train_model_and_predict(RandomForestClassifier(n_estimators=25, 
                                                           max_depth=7, 
                                                           random_state=42), 
                                         all_X_train, all_y_train, test_df)
accuracy_score(result_pred, gender_df.loc[:, ['Survived']])

In [None]:
submission = pd.DataFrame({
        "PassengerId": passenger_id,
        "Survived": result_pred
    })

In [None]:
submission.to_csv('submission.csv', index=False)

# Thanks for reading. Don't forget to upvote the work. Good luck kaggling!