## Decision Tree Classifier

In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.tree import DecisionTreeClassifier
import random
import numpy as np
np.random.seed(0)
dataset_files = ['hepatitis1_FMV_mean.csv',
                 'hepatitis2_FMV_median.csv',
                 'hepatitis3_FMV_mode.csv',
                 'hepatitis4_FMV_RandomStdMean.csv',
                 'hepatitis5_FMV_RandomMinMax.csv',
                 'hepatitis6_FMV_interpolation.csv',
                 'hepatitis7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('class', axis=1)  
    y = df['class']

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.7, random_state=42)
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"Decision Tree:")
    print(f'confusion matrix:')
    print(confusion_matrix(y_test,y_pred))
    print(classification_report(y_test,y_pred))
    print(f"{dataset_file}")
    print(f'Accuracy: {accuracy:.2f}')
    print(f'F1-Score: {f1:.2f}\n')
    

Decision Tree:
confusion matrix:
[[ 9 14]
 [16 70]]
              precision    recall  f1-score   support

           1       0.36      0.39      0.37        23
           2       0.83      0.81      0.82        86

    accuracy                           0.72       109
   macro avg       0.60      0.60      0.60       109
weighted avg       0.73      0.72      0.73       109

hepatitis1_FMV_mean.csv
Accuracy: 0.72
F1-Score: 0.37

Decision Tree:
confusion matrix:
[[ 9 14]
 [16 70]]
              precision    recall  f1-score   support

           1       0.36      0.39      0.37        23
           2       0.83      0.81      0.82        86

    accuracy                           0.72       109
   macro avg       0.60      0.60      0.60       109
weighted avg       0.73      0.72      0.73       109

hepatitis2_FMV_median.csv
Accuracy: 0.72
F1-Score: 0.37

Decision Tree:
confusion matrix:
[[ 9 14]
 [16 70]]
              precision    recall  f1-score   support

           1       0.36


## Random Forest Classifier

In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
import random
import numpy as np
np.random.seed(0)
dataset_files = ['hepatitis1_FMV_mean.csv',
                 'hepatitis2_FMV_median.csv',
                 'hepatitis3_FMV_mode.csv',
                 'hepatitis4_FMV_RandomStdMean.csv',
                 'hepatitis5_FMV_RandomMinMax.csv',
                 'hepatitis6_FMV_interpolation.csv',
                 'hepatitis7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('class', axis=1)  
    y = df['class']

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"\nRandom Forest Classifier:")
    print(f'confusion matrix:')
    print(confusion_matrix(y_test,y_pred))
    print(classification_report(y_test,y_pred))
    print(f"{dataset_file}")
    print(f'Accuracy: {accuracy:.2f}')
    print(f'F1-Score: {f1:.2f}\n')



Random Forest Classifier:
confusion matrix:
[[ 1  8]
 [ 3 35]]
              precision    recall  f1-score   support

           1       0.25      0.11      0.15         9
           2       0.81      0.92      0.86        38

    accuracy                           0.77        47
   macro avg       0.53      0.52      0.51        47
weighted avg       0.71      0.77      0.73        47

hepatitis1_FMV_mean.csv
Accuracy: 0.77
F1-Score: 0.15


Random Forest Classifier:
confusion matrix:
[[ 2  7]
 [ 2 36]]
              precision    recall  f1-score   support

           1       0.50      0.22      0.31         9
           2       0.84      0.95      0.89        38

    accuracy                           0.81        47
   macro avg       0.67      0.58      0.60        47
weighted avg       0.77      0.81      0.78        47

hepatitis2_FMV_median.csv
Accuracy: 0.81
F1-Score: 0.31


Random Forest Classifier:
confusion matrix:
[[ 2  7]
 [ 2 36]]
              precision    recall  f1-scor

## Logistic Regression

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.linear_model import LogisticRegression
import random
import numpy as np
np.random.seed(0)
import warnings
warnings.filterwarnings("ignore")

dataset_files = ['hepatitis1_FMV_mean.csv',
                 'hepatitis2_FMV_median.csv',
                 'hepatitis3_FMV_mode.csv',
                 'hepatitis4_FMV_RandomStdMean.csv',
                 'hepatitis5_FMV_RandomMinMax.csv',
                 'hepatitis6_FMV_interpolation.csv',
                 'hepatitis7_FMV_drop.csv']

for dataset_file in dataset_files:
    df = pd.read_csv(dataset_file)
    X = df.drop('class', axis=1) 
    y = df['class']

    X_train, X_test, y_train, y_test = train_test_split(X, y, 
    test_size=0.3, random_state=42)
    model = LogisticRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"\nLogistic Regression:")
    print(f'confusion matrix:')
    print(confusion_matrix(y_test,y_pred))
    print(classification_report(y_test,y_pred))
    print(f"{dataset_file}")
    print(f'Accuracy: {accuracy:.2f}')
    print(f'F1-Score: {f1:.2f}\n')
    


Logistic Regression:
confusion matrix:
[[ 3  6]
 [ 2 36]]
              precision    recall  f1-score   support

           1       0.60      0.33      0.43         9
           2       0.86      0.95      0.90        38

    accuracy                           0.83        47
   macro avg       0.73      0.64      0.66        47
weighted avg       0.81      0.83      0.81        47

hepatitis1_FMV_mean.csv
Accuracy: 0.83
F1-Score: 0.43


Logistic Regression:
confusion matrix:
[[ 3  6]
 [ 3 35]]
              precision    recall  f1-score   support

           1       0.50      0.33      0.40         9
           2       0.85      0.92      0.89        38

    accuracy                           0.81        47
   macro avg       0.68      0.63      0.64        47
weighted avg       0.79      0.81      0.79        47

hepatitis2_FMV_median.csv
Accuracy: 0.81
F1-Score: 0.40


Logistic Regression:
confusion matrix:
[[ 3  6]
 [ 3 35]]
              precision    recall  f1-score   support

  