In [644]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

In [645]:
class PreprocessData:
    def NomalizeData(Data):
        Data=np.array(Data)
        lenght = len(Data)
        normalizedData = []
        
        for i in range (lenght):
            normalizedData.append(float((Data[i] - min(Data) ) / ( max(Data) - min(Data)) ))
            
        return normalizedData
    
    def TestAndTrain(Data , PercentageOfTrainData):
        PercentageOfTrainData = float(PercentageOfTrainData / 100)
        Train_DataFrame = Data.sample(frac=PercentageOfTrainData)
        Test_DataFrame =Data.drop(Train_DataFrame.index)
        
        return Train_DataFrame , Test_DataFrame
    

In [646]:
Data = pd.read_excel('Detection of COVID-19 Infection from Routine Blood Exams_dataset.xlsx')
Data["SESSO"].replace({'M': 0.,'F': 1.}, inplace=True)

In [647]:
def Suprimun_Tnorm(Relation, data):
    ror = np.vstack((Relation, data))
    sup_norm = np.max(np.min(ror, axis=0))
    return sup_norm


def composition(relation , data):
    composed = np.array(list(map(lambda x: list(map(lambda y: Suprimun_Tnorm(relation[x], data[y]),range(data.shape[0]))),range(relation.shape[0]))))
    return composed


def split_by_class(TrainData):
    Data0 = TrainData[TrainData['TARGET']==0]
    target0 = Data0['TARGET']
    Data0 = Data0.drop('TARGET',axis=1)
    Data0=(Data0-Data0.min())/(Data0.max()-Data0.min())
    Data1 = TrainData[TrainData['TARGET']==1]
    target1 = Data1['TARGET']
    Data1 = Data1.drop('TARGET',axis=1)
    Data1=(Data1-Data1.min())/(Data1.max()-Data1.min())
    Data2 = TrainData[TrainData['TARGET']==2]
    target2 = Data2['TARGET']
    Data2 = Data2.drop('TARGET',axis=1)
    Data2=(Data2-Data2.min())/(Data2.max()-Data2.min())
    return Data0 , Data1, Data2 , target0, target1, target2 



In [648]:
#fill misiing values 
imputer= KNNImputer(n_neighbors=4)
Data = imputer.fit_transform(Data[['SESSO', 'AGE','WBC','Piastrine','Neutrofili','Linfociti','Monociti','Eosinofili','Basofili','PCR','AST','ALT','ALP','GGT','LDH','TARGET']])
Data = pd.DataFrame(Data, columns = ['SESSO', 'AGE','WBC','Piastrine','Neutrofili','Linfociti','Monociti','Eosinofili','Basofili','PCR','AST','ALT','ALP','GGT','LDH','TARGET'])
# Data = pd.concat([Data1, Data2,Data0])






In [649]:
#split to teset and train 
# Dataset1 = PreprocessData.NomalizeData(Data)
Dataset = PreprocessData.TestAndTrain(Data , 80)

TrainData = Dataset[0]
TestData = Dataset[1]
targetTest = TestData['TARGET']
TestData = TestData.drop('TARGET',axis=1)
TestData=(TestData-TestData.min())/(TestData.max()-TestData.min())

In [650]:
Data0 , Data1, Data2 , target0, target1, target2  = split_by_class(TrainData)
    

In [651]:
min0 = Data0.min(axis=0)
max0 = Data0.max(axis=0)
min1 = Data1.min(axis=0)
max1 = Data1.max(axis=0)
min2 = Data2.min(axis=0)
max2 = Data2.max(axis=0)

In [652]:
# method 1 create similarity matrix based on 
# avg0 = (min0 + max0 )/2
# avg0['SESSO']=0
# avg1 = (min1 + max1 )/2
# avg1['SESSO']=0
# avg2 = (min2 + max2 )/2
# avg1['SESSO']=0
# avg0 = np.array(avg0)
# avg1 = np.array(avg1)
# avg2 = np.array(avg2)


#method 2 , create similarity matrix based on mean of the class 
avg0 = Data0.mean()
avg0['SESSO']=0
avg1 = Data1.mean()
avg1['SESSO']=0
avg2 = Data2.mean()
avg1['SESSO']=0
avg0 = np.array(avg0)
avg1 = np.array(avg1)
avg2 = np.array(avg2)


In [653]:
avg0

array([0.        , 0.61423506, 0.32420029, 0.44602997, 0.23274039,
       0.31875763, 0.19383641, 0.13095238, 0.20833333, 0.17992498,
       0.07453802, 0.07854952, 0.05869688, 0.06648075, 0.33017042])

In [654]:
avg1

array([0.        , 0.68557692, 0.3476    , 0.3098    , 0.35322086,
       0.28317308, 0.35833333, 0.07395833, 0.0125    , 0.22378056,
       0.14761691, 0.19309476, 0.11133673, 0.08606378, 0.30522615])

In [655]:
avg2

array([0.13559322, 0.65806927, 0.26852619, 0.28926769, 0.3170987 ,
       0.27627119, 0.34222333, 0.13220339, 0.07768362, 0.28758956,
       0.11150257, 0.17235369, 0.11526728, 0.11414797, 0.33952942])

In [656]:
RelationMatrix =[ avg0,avg1,avg2]

In [657]:
RelationMatrix = np.array(RelationMatrix)

In [658]:
RelationMatrix.shape

(3, 15)

In [659]:
RelationMatrix

array([[0.        , 0.61423506, 0.32420029, 0.44602997, 0.23274039,
        0.31875763, 0.19383641, 0.13095238, 0.20833333, 0.17992498,
        0.07453802, 0.07854952, 0.05869688, 0.06648075, 0.33017042],
       [0.        , 0.68557692, 0.3476    , 0.3098    , 0.35322086,
        0.28317308, 0.35833333, 0.07395833, 0.0125    , 0.22378056,
        0.14761691, 0.19309476, 0.11133673, 0.08606378, 0.30522615],
       [0.13559322, 0.65806927, 0.26852619, 0.28926769, 0.3170987 ,
        0.27627119, 0.34222333, 0.13220339, 0.07768362, 0.28758956,
        0.11150257, 0.17235369, 0.11526728, 0.11414797, 0.33952942]])

In [660]:
TestData = np.array(TestData)
SimilarityMatrix = composition(RelationMatrix , TestData)

In [661]:

predict = []
for i in range(SimilarityMatrix.shape[1]):
    a = np.argmax(SimilarityMatrix[:,i])
    predict.append(a)

In [662]:
atcutal= np.array(targetTest)
predict = np.array(predict)

In [663]:
confusion_matrix(atcutal, predict )

array([[ 9,  9,  0],
       [15,  9,  1],
       [ 8,  5,  0]])

In [664]:
accuracy_score(atcutal, predict)

0.32142857142857145

In [668]:
#change 3 class to two class , class 0 and class 1
actual1 =np.zeros(len(predict))
pred1 =np.zeros(len(predict))
for i in range(len(predict)):
    if(predict[i] == 2 ):
        pred1[i]=1
    else:
        pred1[i]=predict[i]
    if(atcutal[i]==2):
        actual1[i]=1
    else:
        actual1[i] = atcutal[i]

In [669]:
accuracy_score(actual1, pred1)

0.42857142857142855

In [670]:
confusion_matrix(actual1, pred1 )

array([[ 9,  9],
       [23, 15]])