In [458]:
import pandas as pd
import numpy as np
from scipy.stats import kurtosis
from scipy import fftpack as fft
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

import warnings

In [459]:
warnings.filterwarnings("ignore")

In [460]:
class Patient():
    def __init__(self,cgm):
        self.cgm = cgm
        
    def preprocess(self):
        #drop rows with 30% of values missing
        self.cgm=self.cgm.loc[self.cgm.isnull().mean(axis=1)<0.3,:]
        
        #drop last column as it has many missing values for all patients
        self.cgm=self.cgm.iloc[:,:30]
        
        #reset the indices
        self.cgm.reset_index(inplace=True,drop=True)
        
        #interpolate the remaining missing values
        self.cgm.interpolate(method='polynomial',order=3,inplace=True)
        self.cgm.bfill(inplace=True)
        self.cgm.ffill(inplace=True)
        self.cgm=self.cgm.astype('float64')
        
    def fft(self,):
        ndarr = fft.rfft(self.cgm, n=5, axis=1)
        df= pd.DataFrame(data=ndarr)
        df.columns=['fft'+str(i) for i in range(1,df.shape[1]+1)]
        return df
        
    def rolling_mean(self,win,olap):
        df=self.cgm.rolling(window=win,axis=1).apply(np.mean).dropna(axis=1).iloc[:,::olap]
        df.columns=['rm'+str(i) for i in range(1,df.shape[1]+1)]
        return df
    
    def kurtosis(self,win,olap):
        df=self.cgm.rolling(window=win,axis=1).apply(kurtosis).dropna(axis=1).iloc[:,::olap]
        df.columns=['kt'+str(i) for i in range(1,df.shape[1]+1) ]
        return df

    def stdev(self,win,olap):
        df=self.cgm.rolling(window=win,axis=1).apply(np.std).dropna(axis=1).iloc[:,::olap]
        df.columns=['st'+str(i) for i in range(1,df.shape[1]+1)]
        return df
    
    def featureMatrix(self):
        self.preprocess()
        df=pd.concat([self.fft(),self.rolling_mean(10,5),self.stdev(10,5),self.kurtosis(10,5)],axis=1)
        return df

In [461]:
p1m=Patient(pd.read_csv('mealData1.csv'))
p1nm=Patient(pd.read_csv('Nomeal1.csv'))

p2m=Patient(pd.read_csv('mealData2.csv'))
p2nm=Patient(pd.read_csv('Nomeal2.csv'))

p3m=Patient(pd.read_csv('mealData3.csv'))
p3nm=Patient(pd.read_csv('Nomeal3.csv'))

p3m=Patient(pd.read_csv('mealData3.csv'))
p3nm=Patient(pd.read_csv('Nomeal3.csv'))

p4m=Patient(pd.read_csv('mealData4.csv'))
p4nm=Patient(pd.read_csv('Nomeal4.csv'))

p5m=Patient(pd.read_csv('mealData5.csv'))
p5nm=Patient(pd.read_csv('Nomeal5.csv'))

In [462]:
p1m=p1m.featureMatrix()
p1nm=p1nm.featureMatrix()

p2m=p2m.featureMatrix()
p2nm=p2nm.featureMatrix()

p3m=p3m.featureMatrix()
p3nm=p3nm.featureMatrix()

p4m=p4m.featureMatrix()
p4nm=p4nm.featureMatrix()

p5m=p5m.featureMatrix()
p5nm=p5nm.featureMatrix()

In [463]:
alldata=p1m.append([p1nm,p2m,p2nm,p3m,p3nm,p4m,p4nm,p5m,p5nm])
mdata=p1m.append([p2m,p3m,p4m,p5m])
nmdata=p1nm.append([p2nm,p3nm,p4nm,p5nm])

In [464]:
stdscaler = StandardScaler()
mat = stdscaler.fit_transform(alldata)
p = PCA(n_components=5)
p.fit(mat)

mdata=pd.DataFrame(p.transform(mdata))
nmdata=pd.DataFrame(p.transform(nmdata))

mdata['label'] = 1
nmdata['label'] = 0

alldata=mdata.append(nmdata)

In [465]:
data=alldata.iloc[:,:5]
labels=alldata.iloc[:,5]

In [466]:
X_train, X_validation, Y_train, Y_validation = train_test_split(data, labels, test_size=0.20, shuffle=True)

In [467]:
model = MLPClassifier(hidden_layer_sizes=(100,60),learning_rate='adaptive',random_state=7)
results=[]
kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='f1')
results.append(cv_results)
print('%f (%f)' % (cv_results.mean(), cv_results.std()))

0.613961 (0.107355)


In [468]:
model.fit(X_train,Y_train)
predictions = model.predict(X_validation)

In [469]:
pscore = precision_score(Y_validation, predictions, average='binary')
rscore = recall_score(Y_validation, predictions, average='binary')
f1score = f1_score(Y_validation, predictions, average='binary')
print(pscore,rscore,f1score)

0.5053763440860215 1.0 0.6714285714285714


In [470]:
#def predictclass(sample):
    