## Decision Tree Classifier

In [66]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import random
np.random.seed(0)

dataset_files = ['hd1_FMV_mean.csv', 
                 'hd2_FMV_median.csv',
                 'hd3_FMV_mode.csv',
                 'hd4_FMV_stdmean.csv',
                 'hd5_FMV_minmax.csv',
                 'hd6_FMV_interpolation.csv',
                 'hd7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('CHAS', axis=1)  
    y = df['CHAS']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Decision Tree")
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')


Decision Tree
Accuracy for hd1_FMV_mean.csv: 0.9144736842105263
Accuracy: 0.91

Decision Tree
Accuracy for hd2_FMV_median.csv: 0.9078947368421053
Accuracy: 0.91

Decision Tree
Accuracy for hd3_FMV_mode.csv: 0.9210526315789473
Accuracy: 0.92

Decision Tree
Accuracy for hd4_FMV_stdmean.csv: 0.9473684210526315
Accuracy: 0.95

Decision Tree
Accuracy for hd5_FMV_minmax.csv: 0.9276315789473685
Accuracy: 0.93

Decision Tree
Accuracy for hd6_FMV_interpolation.csv: 0.9342105263157895
Accuracy: 0.93

Decision Tree
Accuracy for hd7_FMV_drop.csv: 0.9159663865546218
Accuracy: 0.92



## Random Forest Classifier

In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import random
np.random.seed(0)

dataset_files = ['hd1_FMV_mean.csv', 
                 'hd2_FMV_median.csv',
                 'hd3_FMV_mode.csv',
                 'hd4_FMV_stdmean.csv',
                 'hd5_FMV_minmax.csv',
                 'hd6_FMV_interpolation.csv',
                 'hd7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('CHAS', axis=1)  
    y = df['CHAS']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Random Forest:")
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')

Random Forest:
Accuracy for hd1_FMV_mean.csv: 0.9144736842105263
Accuracy: 0.91

Random Forest:
Accuracy for hd2_FMV_median.csv: 0.9276315789473685
Accuracy: 0.93

Random Forest:
Accuracy for hd3_FMV_mode.csv: 0.9276315789473685
Accuracy: 0.93

Random Forest:
Accuracy for hd4_FMV_stdmean.csv: 0.9342105263157895
Accuracy: 0.93

Random Forest:
Accuracy for hd5_FMV_minmax.csv: 0.9210526315789473
Accuracy: 0.92

Random Forest:
Accuracy for hd6_FMV_interpolation.csv: 0.9210526315789473
Accuracy: 0.92

Random Forest:
Accuracy for hd7_FMV_drop.csv: 0.9243697478991597
Accuracy: 0.92



## Logistic Regression

In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
np.random.seed(0)

dataset_files = ['hd1_FMV_mean.csv', 
                 'hd2_FMV_median.csv',
                 'hd3_FMV_mode.csv',
                 'hd4_FMV_stdmean.csv',
                 'hd5_FMV_minmax.csv',
                 'hd6_FMV_interpolation.csv',
                 'hd7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('CHAS', axis=1)  
    y = df['CHAS']

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)

    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"Logistic regression:")
    print(f"Accuracy for {dataset_file}: {accuracy}")
    print(f'Accuracy: {accuracy:.2f}\n')
    

Logistic regression:
Accuracy for hd1_FMV_mean.csv: 0.9342105263157895
Accuracy: 0.93

Logistic regression:
Accuracy for hd2_FMV_median.csv: 0.9342105263157895
Accuracy: 0.93

Logistic regression:
Accuracy for hd3_FMV_mode.csv: 0.9407894736842105
Accuracy: 0.94

Logistic regression:
Accuracy for hd4_FMV_stdmean.csv: 0.9407894736842105
Accuracy: 0.94

Logistic regression:
Accuracy for hd5_FMV_minmax.csv: 0.9407894736842105
Accuracy: 0.94

Logistic regression:
Accuracy for hd6_FMV_interpolation.csv: 0.9407894736842105
Accuracy: 0.94

Logistic regression:
Accuracy for hd7_FMV_drop.csv: 0.9243697478991597
Accuracy: 0.92

