In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter
from sklearn.preprocessing import LabelBinarizer

#load data
df1 = pd.read_excel('CTG.xls', "Raw Data")

#drop missing values
df=df1.dropna()

#x,y data
X = df.iloc[1:2126, 3:-2].values
y = df.iloc[1:2126, -1].values

#split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)

#feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#principal component analysis
from sklearn.decomposition import PCA
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
#pca.fit(X_train)

#SVM classifier
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred=svc.predict(X_test)

#label binarizer
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation    
print("SVC")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#MLP classifier
mlp = MLPClassifier()
mlp.fit(X_train, y_train)
y_pred=mlp.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("MLP")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#Perceptron classifier
p = Perceptron()
p.fit(X_train, y_train)
y_pred=p.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("Perceptron")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#Decision tree classifier
d = DecisionTreeClassifier()
d.fit(X_train, y_train)
y_pred=d.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation    
print("Decisiontree")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#Random forest classifier
r = RandomForestClassifier()
r.fit(X_train, y_train)
y_pred=r.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("Random forest")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#Gradient boosting
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
y_pred=gb.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("Gradientboost")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#Adaboost classifier
ab = AdaBoostClassifier()
ab.fit(X_train, y_train)
y_pred=ab.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("Adaboost")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
pca = PCA(0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
pca.fit(X_train)

#KNN classifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred=knn.predict(X_test)
lb = LabelBinarizer()
lb.fit(y_test)
y_test = lb.transform(y_test)
y_pred = lb.transform(y_pred)
average="macro"

#metrics valuation
print("KNN")
print(roc_auc_score(y_test, y_pred, average=average))
print(classification_report(y_test,y_pred))
    


SVC
0.9845377798380271
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       324
           1       0.95      0.95      0.95        66
           2       1.00      1.00      1.00        35

   micro avg       0.99      0.99      0.99       425
   macro avg       0.98      0.98      0.98       425
weighted avg       0.99      0.99      0.99       425
 samples avg       0.99      0.99      0.99       425

MLP
0.9761869447545188
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       324
           1       0.95      0.92      0.94        66
           2       1.00      1.00      1.00        35

   micro avg       0.98      0.98      0.98       425
   macro avg       0.98      0.97      0.98       425
weighted avg       0.98      0.98      0.98       425
 samples avg       0.98      0.98      0.98       425

Perceptron
0.9687962191305423
              precision    recall  f1-score   support


In [None]:
# SESHA SAI SREEVANI KAPPAGANTULA
#N11264916
#SSK785
#CLASSIFICATION ON CARDIOTOCOGRAPHY