<a href="https://colab.research.google.com/github/madhurendra0089/Assignment/blob/main/MRI__Histogram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Library

In [None]:
import os
import glob
import numpy as np
import nibabel as nib
from scipy import ndimage
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score
folderpath = "/content/drive/MyDrive/DATASET/preprocessed data/histogram/"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Load Preprocessed Data
(saved it earlier using numpy for convenience)

In [None]:
folderpath = '/content/drive/MyDrive/MRI_ED/'
'''x_train = np.load(folderpath+'x_train.npy')
x_test = np.load(folderpath+'x_test.npy')
x_val = np.load(folderpath+'x_val.npy')

y_train = np.load(folderpath+'y_train.npy')
y_test = np.load(folderpath+'y_test.npy')
y_val = np.load(folderpath+'y_val.npy')

print("#x_train:" + str(len(x_train))+" #y_train:" +str(len(y_train))+" #x_test:"+str(len(x_test))+" #y_test:"+ str(len(y_test))+" #x_val:"+str(len(x_val))+" #y_val:"+ str(len(y_val)))

#x_train:407 #y_train:407 #x_test:142 #y_test:142 #x_val:100 #y_val:100


In [None]:
'''x = np.concatenate((np.concatenate((x_train, x_val)), x_test))
x = np.array([i.flatten(order='C') for i in x])
y = np.concatenate((np.concatenate((y_train, y_val)), y_test))
print(x.shape, y.shape)

(649, 1048576) (649,)


In [None]:
# PCA
'''pca = PCA(n_components=0.99)
x_pca = pca.fit_transform(x)

x_train,x_test, y_train, y_test =train_test_split(x_pca, y, shuffle=True, test_size=0.2)
print(x_pca.shape, x_train.shape, x_test.shape)


(649, 532) (519, 532) (130, 532)


In [None]:
x_train = np.load(folderpath+'pca_x_train.npy')
x_test = np.load(folderpath+'pca_x_test.npy')

# **Model Implementation**

In [None]:
def performance(model):
    y_pred = model.predict(x_test)

    # model's performance
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# LogisticRegression
LR = LogisticRegression(max_iter=1000)
LR_cv = cross_val_score(LR, x_train, y_train, scoring='accuracy', cv=5)
print(LR_cv)
LR.fit(x_train, y_train)
performance(LR)

[0.82692308 0.72115385 0.77884615 0.77884615 0.81553398]
Confusion Matrix:
[[54 11]
 [ 9 56]]
Accuracy: 0.8461538461538461
Precision: 0.835820895522388
Recall: 0.8615384615384616
F1 Score: 0.8484848484848485


In [None]:
# Random forest classifier
DT = DecisionTreeClassifier()
DT_cv = cross_val_score(DT, x_train, y_train, scoring='accuracy', cv=5)
print(DT_cv)
DT.fit(x_train, y_train)
performance(DT)

[0.57692308 0.64423077 0.57692308 0.58653846 0.59223301]
Confusion Matrix:
[[32 33]
 [27 38]]
Accuracy: 0.5384615384615384
Precision: 0.5352112676056338
Recall: 0.5846153846153846
F1 Score: 0.5588235294117647


In [None]:
# Random forest classifier
RF = RandomForestClassifier(n_estimators=200, max_depth=4)
RF_cv = cross_val_score(RF, x_train, y_train, scoring='accuracy', cv=5)
print(RF_cv)
RF.fit(x_train, y_train)
performance(RF)

[0.64423077 0.58653846 0.625      0.57692308 0.60194175]
Confusion Matrix:
[[35 30]
 [12 53]]
Accuracy: 0.676923076923077
Precision: 0.6385542168674698
Recall: 0.8153846153846154
F1 Score: 0.7162162162162162


In [None]:
# XGBoost classifier
XGB = xgb.XGBClassifier(learning_rate=0.1, n_estimators=100, max_depth=3)
XGB_cv = cross_val_score(XGB, x_train, y_train, scoring='accuracy', cv=5)
print(XGB_cv)
XGB.fit(x_train, y_train)
performance(XGB)

[0.64423077 0.60576923 0.67307692 0.68269231 0.63106796]
Confusion Matrix:
[[41 24]
 [16 49]]
Accuracy: 0.6923076923076923
Precision: 0.6712328767123288
Recall: 0.7538461538461538
F1 Score: 0.7101449275362318


In [None]:
mlp = MLPClassifier(max_iter=1000, random_state=42)

param_grid = {
    'hidden_layer_sizes': [(512,256, 64, 32), (512, 256, 128, 64)],
    'activation': ['relu'],
    'alpha': [0.00001,0.0001],
}
grid_search = GridSearchCV(mlp, param_grid, cv=3, n_jobs=-1, verbose=1)
grid_search.fit(x_train, y_train)

best_model = grid_search.best_estimator_
print(f"Best Hyperparameters: {best_model}")

performance(best_model)

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Best Hyperparameters: MLPClassifier(alpha=1e-05, hidden_layer_sizes=(512, 256, 64, 32), max_iter=1000,
              random_state=42)
Confusion Matrix:
[[51 14]
 [10 55]]
Accuracy: 0.8153846153846154
Precision: 0.7971014492753623
Recall: 0.8461538461538461
F1 Score: 0.8208955223880597


In [None]:
SVC_ = SVC()
SVC_cv = cross_val_score(SVC_, x_train, y_train, scoring='accuracy', cv=5)
print(SVC_cv)
SVC_.fit(x_train, y_train)
performance(SVC_)

[0.75961538 0.72115385 0.75961538 0.71153846 0.7961165 ]
Confusion Matrix:
[[50 15]
 [11 54]]
Accuracy: 0.8
Precision: 0.782608695652174
Recall: 0.8307692307692308
F1 Score: 0.8059701492537313


# **Ensemble**

Paper

In [None]:

# Create individual models
model1 = DT
model2 = SVC_
model3 = XGB

# Create an ensemble model using a VotingClassifier
ensemble_model = VotingClassifier(estimators=[('model1', model1), ('model2', model2), ('model3', model3)], voting='hard')

# Train the ensemble model
ensemble_model.fit(x_train, y_train)

performance(ensemble_model)


Confusion Matrix:
[[44 21]
 [12 53]]
Accuracy: 0.7461538461538462
Precision: 0.7162162162162162
Recall: 0.8153846153846154
F1 Score: 0.762589928057554


Mine

In [None]:

# Create individual models
model1 = LR
model2 = SVC_
model3 = XGB

# Create an ensemble model using a VotingClassifier
ensemble_model = VotingClassifier(estimators=[('model1', model1), ('model2', model2), ('model3', model3)], voting='hard')

# Train the ensemble model
ensemble_model.fit(x_train, y_train)

performance(ensemble_model)


Confusion Matrix:
[[52 13]
 [ 7 58]]
Accuracy: 0.8461538461538461
Precision: 0.8169014084507042
Recall: 0.8923076923076924
F1 Score: 0.8529411764705882


In [None]:

# Create individual models
model1 = LR
model2 = SVC_
model3 = best_model

# Create an ensemble model using a VotingClassifier
ensemble_model = VotingClassifier(estimators=[('model1', model1), ('model2', model2), ('model3', model3)], voting='hard')

# Train the ensemble model
ensemble_model.fit(x_train, y_train)

performance(ensemble_model)


Confusion Matrix:
[[53 12]
 [10 55]]
Accuracy: 0.8307692307692308
Precision: 0.8208955223880597
Recall: 0.8461538461538461
F1 Score: 0.8333333333333334
