In [160]:
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

In [161]:
class PreprocessData:
    def NomalizeData(Data):
        Data=np.array(Data)
        lenght = len(Data)
        normalizedData = []
        
        for i in range (lenght):
            normalizedData.append(float((Data[i] - min(Data) ) / ( max(Data) - min(Data)) ))
            
        return normalizedData
    
    def TestAndTrain(Data , PercentageOfTrainData):
        PercentageOfTrainData = float(PercentageOfTrainData / 100)
        Train_DataFrame = Data.sample(frac=PercentageOfTrainData)
        Test_DataFrame =Data.drop(Train_DataFrame.index)
        
        return Train_DataFrame , Test_DataFrame
    

In [162]:
Data = pd.read_excel('Detection of COVID-19 Infection from Routine Blood Exams_dataset.xlsx')
Data["SESSO"].replace({'M': 0.,'F': 1.}, inplace=True)

In [163]:
def Suprimun_Tnorm(Relation, data):
    ror = np.vstack((Relation, data))
    sup_norm = np.max(np.min(ror, axis=0))
    return sup_norm


def composition(relation , data):
    composed = np.array(list(map(lambda x: list(map(lambda y: Suprimun_Tnorm(relation[x], data[y]),range(data.shape[0]))),range(relation.shape[0]))))
    return composed


def split_by_class(TrainData):
    Data0 = TrainData[TrainData['TARGET']==0]
    target0 = Data0['TARGET']
    Data0 = Data0.drop('TARGET',axis=1)
    Data0=(Data0-Data0.min())/(Data0.max()-Data0.min())
    Data1 = TrainData[TrainData['TARGET']==1]
    target1 = Data1['TARGET']
    Data1 = Data1.drop('TARGET',axis=1)
    Data1=(Data1-Data1.min())/(Data1.max()-Data1.min())
    Data2 = TrainData[TrainData['TARGET']==2]
    target2 = Data2['TARGET']
    Data2 = Data2.drop('TARGET',axis=1)
    Data2=(Data2-Data2.min())/(Data2.max()-Data2.min())
    return Data0 , Data1, Data2 , target0, target1, target2 



In [164]:
#fill misiing values 
imputer= KNNImputer(n_neighbors=4)
Data = imputer.fit_transform(Data[['SESSO', 'AGE','WBC','Piastrine','Neutrofili','Linfociti','Monociti','Eosinofili','Basofili','PCR','AST','ALT','ALP','GGT','LDH','TARGET']])
Data = pd.DataFrame(Data, columns = ['SESSO', 'AGE','WBC','Piastrine','Neutrofili','Linfociti','Monociti','Eosinofili','Basofili','PCR','AST','ALT','ALP','GGT','LDH','TARGET'])
# Data = pd.concat([Data1, Data2,Data0])






In [165]:
#split to teset and train 
# Dataset1 = PreprocessData.NomalizeData(Data)
Dataset = PreprocessData.TestAndTrain(Data , 80)

TrainData = Dataset[0]
TestData = Dataset[1]
targetTest = TestData['TARGET']
TestData = TestData.drop('TARGET',axis=1)
TestData=(TestData-TestData.min())/(TestData.max()-TestData.min())

In [166]:
Data0 , Data1, Data2 , target0, target1, target2  = split_by_class(TrainData)
    

In [167]:
min0 = Data0.min(axis=0)
max0 = Data0.max(axis=0)
min1 = Data1.min(axis=0)
max1 = Data1.max(axis=0)
min2 = Data2.min(axis=0)
max2 = Data2.max(axis=0)

In [168]:
# method 1 create similarity matrix based on numeric range 
# avg0 = (min0 + max0 )/2
# avg0['SESSO']=0
# avg1 = (min1 + max1 )/2
# avg1['SESSO']=0
# avg2 = (min2 + max2 )/2
# avg1['SESSO']=0
# avg0 = np.array(avg0)
# avg1 = np.array(avg1)
# avg2 = np.array(avg2)

var0 = np.var(Data0)
var1 = np.var(Data1)
var2 = np.var(Data2)

var0 = np.array(var0)
var1 = np.array(var1)
var2 = np.array(var2)

#method 2 , create similarity matrix based on mean of the class 
avg0 = Data0.mean()
avg0['SESSO']=0
avg1 = Data1.mean()
avg1['SESSO']=0
avg2 = Data2.mean()
avg1['SESSO']=0
avg0 = np.array(avg0)
avg1 = np.array(avg1)
avg2 = np.array(avg2)


In [169]:
var0.shape

(15,)

In [170]:
avg1.shape

(15,)

In [171]:
avg2

array([0.18644068, 0.66912307, 0.26427467, 0.37129237, 0.31684945,
       0.26813559, 0.37768362, 0.13474576, 0.08757062, 0.28777488,
       0.10870945, 0.18626479, 0.27609607, 0.24978619, 0.33239914])

In [172]:
# RelationMatrix =[ avg0,avg1,avg2]
RelationMatrix =[ var0,var1,var2]

In [173]:
RelationMatrix = np.array(RelationMatrix)

In [174]:
RelationMatrix.shape

(3, 15)

In [175]:
RelationMatrix

array([[0.24940512, 0.05104034, 0.04365102, 0.03821551, 0.03173455,
        0.0422683 , 0.02393545, 0.01811916, 0.12976837, 0.05431602,
        0.01971466, 0.03758211, 0.012059  , 0.01364626, 0.04650559],
       [0.22085068, 0.03060411, 0.02535514, 0.03475495, 0.03435983,
        0.01470908, 0.04713165, 0.0287992 , 0.0120464 , 0.03401927,
        0.02105163, 0.02140683, 0.01739943, 0.02258097, 0.02675911],
       [0.15168055, 0.0603217 , 0.04683758, 0.04432855, 0.05393579,
        0.03066263, 0.03793418, 0.04120799, 0.03211481, 0.05740114,
        0.02090188, 0.04290647, 0.03958623, 0.03948783, 0.04488176]])

In [176]:
TestData = np.array(TestData)
SimilarityMatrix = composition(RelationMatrix , TestData)

In [177]:

predict = []
for i in range(SimilarityMatrix.shape[1]):
    a = np.argmax(SimilarityMatrix[:,i])
    predict.append(a)

In [178]:
atcutal= np.array(targetTest)
predict = np.array(predict)

In [179]:
confusion_matrix(atcutal, predict )

array([[12,  0,  8],
       [ 4,  0, 19],
       [ 4,  0,  9]])

In [180]:
accuracy_score(atcutal, predict)

0.375

In [181]:
#change 3 class to two class , class 0 and class 1
actual1 =np.zeros(len(predict))
pred1 =np.zeros(len(predict))
for i in range(len(predict)):
    if(predict[i] == 2 ):
        pred1[i]=1
    else:
        pred1[i]=predict[i]
    if(atcutal[i]==2):
        actual1[i]=1
    else:
        actual1[i] = atcutal[i]

In [182]:
accuracy_score(actual1, pred1)

0.7142857142857143

In [183]:
confusion_matrix(actual1, pred1 )

array([[12,  8],
       [ 8, 28]])