In [827]:
import pandas as pd
import numpy as np
import re
from scipy import stats
from sklearn import linear_model as LM, preprocessing as pp, metrics, tree
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier as MLPC, MLPRegressor as MLPR
np.random.seed(1000)

In [828]:
class Model():
    def __init__(self, trn, tst):
        self.le=pp.LabelEncoder()
        self.sc = StandardScaler()
        self.ogtrain = trn
        self.ogtest = tst
        self.train = PreProcess(self.ogtrain)
        self.test = PreProcess(self.ogtest)
        self.survive_Trn_X, self.survive_Trn_y = xySurvive(self.train)
        self.survive_Tst_X, self.survive_Tst_y = xySurvive(self.test)
        self.fare_Trn_X, self.fare_Trn_y = xyFare(self.train)
        self.fare_Tst_X, self.fare_Tst_y = xyFare(self.test)
        self.sc.fit(self.survive_Trn_X)
        self.scaled_Survive_Trn_X = sc.transform(self.survive_Trn_X)
        self.scaled_Survive_Tst_X = sc.transform(self.survive_Tst_X)
        self.sc.fit(self.fare_Trn_X)
        self.scaled_Fare_Trn_X = sc.transform(self.fare_Trn_X)
        self.scaled_Fare_Tst_X = sc.transform(self.fare_Tst_X)
        self.uselessColumns = ['Survived?', 'Class/Dept', 'URL', 'Body','Ticket', 'Cabin', 'Died', 'Boat','Born', 'Occupation', 'Name']
     
    #Just get the class number (1, 2, or 3)
    @staticmethod
    def dept(val) -> int:
        return int(val[0])

    #Convert Pounds/Shillings/Pence to Pounds(decimal)
    @staticmethod
    def FixFares(val) -> int:
        val = str(val).split()
        pence = 0
        if val[0] == 'nan':
            return np.nan
        if len(val) == 1:
            return round(float(val[0][1:]),2)
        shillingsToPence = float(val[1][:-1])*12
        if len(val) > 2:
            pence = float(val[2][:-1])
        pounds = round(((shillingsToPence + pence)/240),2)
        pounds += int(val[0][1:])
        return pounds

    @staticmethod
    def Survived(val) -> int:
        if val == 'LOST':
            return 0
        elif val == 'SAVED':
            return 1
        else:
            return np.nan

    @staticmethod
    def age(val) -> int:
        if str(val) == 'nan':
            return np.nan
        if type(val) == float:
            return int(val)
        else:
            return int(re.sub('[^0-9]','', val))
        
    @staticmethod
    def PreProcess(ogdf: pd.DataFrame) -> pd.DataFrame:
        df = ogdf.copy(deep=True).reset_index(drop=True, inplace=True)
        df['Survived'] = df['Survived?'].apply(Survived)
        df['Fare'] = df['Fare'].apply(FixFares).astype(float)
        df['Fare'] = df['Fare'].fillna(df['Fare'].median())
        df['Age'] = df['Age'].apply(age)
        df['Age'] = df['Age'].fillna(df["Age"].median())
        df['Joined'] = le.fit_transform(df['Joined'])
        df['Gender'] = le.fit_transform(df['Gender'])
        df['Nationality'] = le.fit_transform(df['Nationality'])
        df['Class'] = df['Class/Dept'].apply(dept)
        df.drop(self.uselessColumns, axis=1, inplace=True)
        df.dropna(inplace=True)
        return df

    @staticmethod
    def xySurvive(df: pd.DataFrame) -> tuple:
        y = df['Survived']
        x = df.drop(['Survived'], axis=1)
        return (x, y)

    @staticmethod
    def xyFare(df: pd.DataFrame) -> tuple:
        y = df['Fare']
        x = df.drop(['Fare'], axis=1)
        return (x, y)

    def SurvivedScore(self, pred) -> None:
        print(f"Accuracy Score: {round(metrics.accuracy_score(self.survive_Tst_y, pred),2)*100}%")
        print(f"ConfusionMatrix:\n {metrics.confusion_matrix(self.survive_Tst_y, pred)}\n")

    def FareScore(self, pred) -> None:
        print(f"R2: {metrics.r2_score(self.fare_Tst_y, pred)}")
        print(f"MSE: {metrics.mean_squared_error(self.fare_Tst_y, pred)}\n")
    
    def SurvivalPredictions(self) -> None:
        print("Survival Predictions:")
        print("___________________________________________\n")
        print("Decision Tree Classifier:")
        self.train_predict_survive(tree.DecisionTreeClassifier())
        print("___________________________________________\n")
        print("Multilayer Perceptron - solver: adam:")
        self.train_predict_survive(MLPC(solver='adam', max_iter=10000))
        print("___________________________________________\n")
        print("Multilayer Perceptron - solver: SGD:")
        self.train_predict_survive(MLPC(solver='sgd', max_iter=100000))
        print("___________________________________________\n")

    def FarePredictions(self) -> None:
        print("\n____________________________________________________")
        print("____________________________________________________\n")
        print("\nFare Predictions:")
        print("___________________________________________\n")
        print("MultiLayer Perceptron Regressor - solver: adam, Activation: tanh")
        self.train_predict_fare(MLPR(max_iter=10000, activation='tanh', random_state=728, solver='adam', learning_rate='adaptive',))
        print("___________________________________________\n")
        print("Linear Regression:")
        self.train_predict_fare(LM.LinearRegression())
        print("___________________________________________\n")
        print("Decision Tree Regressor:")
        self.train_predict_fare(tree.DecisionTreeRegressor())
        print("___________________________________________\n")

    def train_predict_survive(self, classifier) -> None:
        classifier.fit(self.scaled_Survive_Trn_X, self.survive_Trn_y)
        pred = classifier.predict(self.scaled_Survive_Tst_X)
        self.SurvivedScore(pred)
        print("ClassificationReport:")
        print(metrics.classification_report(self.survive_Tst_y, pred))       
    
    def train_predict_fare(self, classifier) -> None:
        classifier.fit(self.scaled_Fare_Trn_X, self.fare_Trn_y)
        pred = classifier.predict(self.scaled_Fare_Tst_X)
        self.FareScore( pred)
    

In [829]:
ogTraindf = pd.read_csv('titanic_train.csv')
ogTestdf = pd.read_csv('titanic_test.csv')
ogTraindf.drop('Unnamed: 0', axis=1, inplace=True)
ogTestdf.drop('Unnamed: 0', axis=1, inplace=True)
model = Model(ogTraindf, ogTestdf)

In [830]:
model.SurvivalPredictions()
model.FarePredictions()

Survival Predictions:
___________________________________________

Decision Tree Classifier:
Accuracy Score: 74.0%
ConfusionMatrix:
 [[141  34]
 [ 35  56]]

ClassificationReport:
              precision    recall  f1-score   support

           0       0.80      0.81      0.80       175
           1       0.62      0.62      0.62        91

    accuracy                           0.74       266
   macro avg       0.71      0.71      0.71       266
weighted avg       0.74      0.74      0.74       266

___________________________________________

Multilayer Perceptron - solver: adam:
Accuracy Score: 80.0%
ConfusionMatrix:
 [[153  22]
 [ 31  60]]

ClassificationReport:
              precision    recall  f1-score   support

           0       0.83      0.87      0.85       175
           1       0.73      0.66      0.69        91

    accuracy                           0.80       266
   macro avg       0.78      0.77      0.77       266
weighted avg       0.80      0.80      0.80       266