<a href="https://colab.research.google.com/github/sreemolgagarin/ML-project_2/blob/main/elm_Imbalanced_Classificatio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def data_prep(file_name):
    from sklearn.preprocessing import StandardScaler
    stdsc = StandardScaler()
    df = pd.read_csv(file_name)
    data = df.to_numpy()
    x = data[:,:-1]
    y = data[:,-1]
    x= stdsc.fit_transform(x)
    return x,y
        

In [None]:
def find_clusters(X,no_of_clusters):
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=no_of_clusters,init='k-means++')
    kmeans.fit(X)
    centroids = kmeans.cluster_centers_
    clusters  = kmeans.labels_
    return clusters, centroids

    

In [None]:
def select_samples(X_train,y_train,clusters,centroids):
    
    from sklearn.metrics.pairwise import euclidean_distances
    k=105
    knn = []
    knn_class =[]
    for cluster in np.unique(clusters):
        indices = np.where(clusters == cluster)
        indices = np.reshape(indices,(-1))
        test_sample = X_train[indices[0]]
        test_sample=np.reshape(test_sample,(-1,test_sample.shape[0]))
        eu_dist = euclidean_distances(test_sample,centroids)
        center  = centroids[np.argmin(eu_dist)] 
        center  = np.reshape(center,(-1,center.shape[0]))
        X = []
        for i in indices:
            X.append(X_train[i])
        dist_mat = euclidean_distances(center,X)
        knn_index = dist_mat.argsort()
        knn_index=np.reshape(knn_index,(-1))
        count = 0
        for j in knn_index:
            knn.append(X_train[indices[j]])
            knn_class.append(y_train[indices[j]])
            count+=1
            if count == k:
                break;
    knn = np.asarray(knn)
    knn_class = np.asarray(knn_class)
    #knn_class = np.reshape(knn_class,(knn_class.shape[0],-1))
    knn_class = np.reshape(knn_class,(-1))
    return knn, knn_class
            
            

In [None]:
def SM_Oversampling(selected_samples,class_label):
    from imblearn.over_sampling import SMOTE
    smote = SMOTE(sampling_strategy=.69,random_state=0)
    class_label = class_label.astype(int)
    X_os,y_os = smote.fit_resample(selected_samples,class_label)
    return X_os, y_os

In [None]:
import seaborn as sn
from sklearn.metrics import confusion_matrix
def confusion_mat(y,y_predicted):
    y=np.reshape(y,(-1))
    cm=confusion_matrix(y,y_predicted)
    df = pd.DataFrame(cm,index = ['Clss0','Class1'],columns=['Clss0','Class1'])
    plt.figure(figsize=(3,2))
    sn.heatmap(df,cmap="YlGnBu", annot=True,fmt="d")

In [None]:
from sklearn.model_selection import train_test_split,KFold
X,Y = data_prep("cardiotocographyC10.csv")
train_X,test_X,train_y,test_y = train_test_split(X, Y, test_size=0.2)
print("Numer of samples in initial training set",train_X.shape[0])

In [None]:
clusters, centroids = find_clusters(train_X,no_of_clusters=10)
selected_samples,class_label = select_samples(train_X,train_y,clusters,centroids)
print("Done")

In [None]:
print("Number of instances in Majority and minority class before applying SMOTE:" )
label,count = np.unique(class_label,return_counts=True)
print(count)
X_os,y_os = SM_Oversampling(selected_samples,class_label)
print("Number of instances in Majority and minority class after SMOTE are:" )
label,count = np.unique(y_os,return_counts=True)
print(count)

In [None]:
from elm import ElmClssifier
from OSELM import OSElmClassifier

ModuleNotFoundError: ignored

In [None]:
elm=ElmClssifier(20)
oselm = OSElmClassifier(20)
elm_s=ElmClssifier(20)
oselm_s = OSElmClassifier(30)

elm.fit(selected_samples,class_label)
oselm.fit(selected_samples,class_label)
elm_s.fit(X_os,y_os)
oselm_s.fit(X_os,y_os)
n_batches = 5
count = 0
kf = KFold(n_splits=n_batches)
for train_index, test_index in kf.split(test_X):
    X_test,y_test = test_X[test_index],test_y[test_index]
    oselm.train(X_test,y_test)
    oselm_s.train(X_test,y_test)
    if count == (n_batches-1):
        '''print("Prediction Result of OSELM without SMOTE:")
        y_pred = oselm.predict(X_test,y_test)
        print(oselm.score(y_test))
        confusion_mat(y_test,y_pred)
        plt.show()
        print("Prediction Result of ELM without SMOTE:")
        y_pred_elm = elm.predict(X_test,y_test)
        print(elm.score(y_test))
        confusion_mat(y_test,y_pred_elm) 
        plt.show()
        print()'''
        print("Prediction Result of OSELM with SMOTE:")
        y_pred = oselm_s.predict(X_test,y_test)
        print(oselm_s.score(y_test))
        confusion_mat(y_test,y_pred)
        plt.show()
        '''print()
        print("Presdiction Result of ELM with SMOTE:")
        y_pred_elm = elm_s.predict(X_test,y_test)
        print(elm_s.score(y_test))
        confusion_mat(y_test,y_pred_elm)
        plt.show()'''
        break;
    count += 1

In [None]:
elm=ElmClssifier(30)
oselm = OSElmClassifier(30)
print("Results of ELM and OSELM with SMOTE:")
elm.fit(X_os,y_os)
oselm.fit(X_os,y_os)
n_batches = 10
count = 0
kf = KFold(n_splits=n_batches)
for train_index, test_index in kf.split(test_X):
    X_test,y_test = test_X[test_index],test_y[test_index]
    oselm.train(X_test,y_test)
    if count == (n_batches-1):
        print("Presdiction Result of OSELM:")
        y_pred = oselm.predict(X_test,y_test)
        print(oselm.score(y_test))
        confusion_matrix(y_test,y_pred)
        print("Presdiction Result of ELM:")
        y_pred_elm = elm.predict(X_test,y_test)
        print(elm.score(y_test))
        confusion_matrix(y_test,y_pred_elm) 
        break;
    count += 1

In [None]:

def make_toy(x):
    y = x*np.cos(x)+0.5*np.sqrt(x)*np.random.randn(x.shape[0])
    x = x.reshape(-1,1)
    y = y.reshape(-1,1)
    return x, y


In [None]:
stdsc = StandardScaler()
x = np.arange(0.25,20,0.1)
xtoy, ytoy = make_toy(x)
xtoy, ytoy= stdsc.fit_transform(xtoy), stdsc.fit_transform(ytoy)
'''x = np.arange(20.25,30,0.1)
xtoy_test, ytoy_test = make_toy(x)
print(xtoy_test.shape)
xtoy, ytoy = np.concatenate((xtoy_train,xtoy_test),axis=0),np.concatenate((ytoy_train,ytoy_test),axis=0)
print(xtoy.shape)
xtoy, ytoy= stdsc.fit_transform(xtoy), stdsc.fit_transform(ytoy)
n = xtoy_train.shape[0]
xtrain, ytrain = xtoy[:n,:],ytoy[:n,:]
xtest, ytest = xtoy[n:-1,:],ytoy[n:-1,:]'''
xtrain, xtest, ytrain, ytest = train_test_split(xtoy, ytoy, test_size=0.6)
print("# samples in training set:",xtrain.shape[0])
print("# samples in Test set:",xtoy.shape[0])
plt.plot(xtoy, ytoy)
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Test Data')
#plt.legend()
plt.show()

In [None]:
oselm = OSElmClassifier(50)
oselm.regression_fit(xtrain, ytrain )
plt.plot(xtoy, ytoy,label = 'Test set')
plt.plot(xtoy,oselm.regression_predict(xtoy, ytoy),label = 'OSELM Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Test data Vs OSELM Prediction')
plt.legend()
plt.show()