## Decision Tree Classifier

In [106]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import random
np.random.seed(0)

dataset_files = ['titanic1_FMV_mean.csv', 
                 'titanic2_FMV_median.csv',
                 'titanic3_FMV_mode.csv',
                 'titanic4_FMV_randomMeanStd.csv',
                 'titanic5_FMV_randomMinMax.csv',
                 'titanic6_FMV_interpolation.csv',
                 'titanic7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('Survived', axis=1)  
    y = df['Survived']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
 
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')
    


Accuracy for titanic1_FMV_mean.csv: 0.7453183520599251
Accuracy: 0.75

Accuracy for titanic2_FMV_median.csv: 0.7602996254681648
Accuracy: 0.76

Accuracy for titanic3_FMV_mode.csv: 0.7827715355805244
Accuracy: 0.78

Accuracy for titanic4_FMV_randomMeanStd.csv: 0.6891385767790262
Accuracy: 0.69

Accuracy for titanic5_FMV_randomMinMax.csv: 0.7940074906367042
Accuracy: 0.79

Accuracy for titanic6_FMV_interpolation.csv: 0.7565543071161048
Accuracy: 0.76

Accuracy for titanic7_FMV_drop.csv: 0.7464788732394366
Accuracy: 0.75



## Random Forest Classifier

In [108]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import random
np.random.seed(0)
dataset_files = ['titanic1_FMV_mean.csv', 
                 'titanic2_FMV_median.csv',
                 'titanic3_FMV_mode.csv',
                 'titanic4_FMV_randomMeanStd.csv',
                 'titanic5_FMV_randomMinMax.csv',
                 'titanic6_FMV_interpolation.csv',
                 'titanic7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('Survived', axis=1)  
    y = df['Survived']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Random Forest:")
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')
    

Random Forest:
Accuracy for titanic1_FMV_mean.csv: 0.7378277153558053
Accuracy: 0.74

Random Forest:
Accuracy for titanic2_FMV_median.csv: 0.7677902621722846
Accuracy: 0.77

Random Forest:
Accuracy for titanic3_FMV_mode.csv: 0.7715355805243446
Accuracy: 0.77

Random Forest:
Accuracy for titanic4_FMV_randomMeanStd.csv: 0.7378277153558053
Accuracy: 0.74

Random Forest:
Accuracy for titanic5_FMV_randomMinMax.csv: 0.7715355805243446
Accuracy: 0.77

Random Forest:
Accuracy for titanic6_FMV_interpolation.csv: 0.7453183520599251
Accuracy: 0.75

Random Forest:
Accuracy for titanic7_FMV_drop.csv: 0.7793427230046949
Accuracy: 0.78



## Logistic Regression

In [109]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
np.random.seed(0)

dataset_files = ['titanic1_FMV_mean.csv', 
                 'titanic2_FMV_median.csv',
                 'titanic3_FMV_mode.csv',
                 'titanic4_FMV_randomMeanStd.csv',
                 'titanic5_FMV_randomMinMax.csv',
                 'titanic6_FMV_interpolation.csv',
                 'titanic7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('Survived', axis=1) 
    y = df['Survived']

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)

    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Logistic regression:")
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')
    

Logistic regression:
Accuracy for titanic1_FMV_mean.csv: 0.7677902621722846
Accuracy: 0.77

Logistic regression:
Accuracy for titanic2_FMV_median.csv: 0.7715355805243446
Accuracy: 0.77

Logistic regression:
Accuracy for titanic3_FMV_mode.csv: 0.7715355805243446
Accuracy: 0.77

Logistic regression:
Accuracy for titanic4_FMV_randomMeanStd.csv: 0.7677902621722846
Accuracy: 0.77

Logistic regression:
Accuracy for titanic5_FMV_randomMinMax.csv: 0.704119850187266
Accuracy: 0.70

Logistic regression:
Accuracy for titanic6_FMV_interpolation.csv: 0.7677902621722846
Accuracy: 0.77

Logistic regression:
Accuracy for titanic7_FMV_drop.csv: 0.7699530516431925
Accuracy: 0.77

