In [20]:
import numpy as np
import pandas as pd
import datetime
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle


In [21]:
# Feature extraction
# tmax - tm
# CGM_max - CGM_min
# max CGM velocity, time at which the velocity is max
# FTT - half sinusoidal - get two most dominant frequency buckets
# windowed mean - window size = 6: you will get 4 and 5 means
# take middle 5 means - window size = 3

def absorption_time(row):
    if row.size == 30:
        newrow = row.iloc[6:30]
        return 5*int(newrow.idxmax(skipna = True))
    else:
        return 5*int(row.idxmax(skipna = True))

def CGM_max_velocity(row):
    vmax = None
    vmaxtime = None
    for i in range(row.size):
        if i == 0:
            v = (row.iloc[i+1] - row.iloc[i])/5
        elif i == row.size-1:
            v = (row.iloc[i] - row.iloc[i-1])/5
        else:
            v = (row.iloc[i+1] - row.iloc[i-1])/10
        if vmax == None or v > vmax:
            vmax = v
            vmaxtime = i*5
    return (vmax, vmaxtime)

def CGM_FFT(row):
    sp = np.fft.fft(row)
    power = np.square(sp.real) + np.square(sp.imag)
    freq = np.fft.fftfreq(row.size, d=300)
    mp = 0
    mp2 = 0
    mpi = None
    mp2i = None
    for i in range(1,row.size):
        p = power[i]
        f = freq[i]
        
        if p > mp:
            mp2 = mp
            mp2i = mpi
            mp = p
            mpi = f
        elif p > mp2:
            mp2 = p
            mp2i = f
            
    return (mpi, mp2i)

# take mean of middle 5 windows of 3 length
def windowed_mean(row):
    if row.size == 30:
        newrow = row.iloc[7:22]
    else:
        newrow = row.iloc[4:19]
    avgs = []
    for i in range(5):
        m = (newrow.iloc[i*3] + newrow.iloc[i*3 + 1] + newrow.iloc[i*3 + 2])/3
        avgs.append(m)
    return (avgs[0], avgs[1], avgs[2], avgs[3], avgs[4])

def extract_data(input_df):
    out_df = pd.DataFrame()
    out_df['absorption_time (mins)'] = input_df.apply(lambda row: absorption_time(row), axis=1)
    out_df['CGM_range'] = input_df.apply(lambda row: row.max() - row.min(), axis=1)

    cgm_velocity_data = input_df.apply(lambda row: CGM_max_velocity(row), axis=1)
    cgm_max_vel,cgm_max_vel_time = list(zip(*cgm_velocity_data))
    out_df['CGM_max_vel'] = cgm_max_vel
    out_df['CGM_max_vel_time'] = cgm_max_vel_time

    cgm_fft_data = input_df.apply(lambda row: CGM_FFT(row), axis=1)
    cgm_max_freq,cgm_max2_freq = list(zip(*cgm_fft_data))
    out_df['CGM_max_freq'] = cgm_max_freq
    out_df['CGM_max2_freq'] = cgm_max2_freq

    cgm_wm_data = input_df.apply(lambda row: windowed_mean(row), axis=1)
    cgm_wm1,cgm_wm2,cgm_wm3,cgm_wm4,cgm_wm5 = list(zip(*cgm_wm_data))
    out_df['CGM_wm1'] = cgm_wm1
    out_df['CGM_wm2'] = cgm_wm2
    out_df['CGM_wm3'] = cgm_wm3
    out_df['CGM_wm4'] = cgm_wm4
    out_df['CGM_wm5'] = cgm_wm5
    
    return out_df

In [22]:
test_df = pd.read_csv('./test.csv', header=None, skipinitialspace=True)
test_df = test_df.apply(pd.to_numeric)

test_ext_df = extract_data(test_df)

In [23]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,248.0,241.0,231.0,220.0,222.0,222.0,222.0,223.0,224.0,228.0,...,270.0,277.0,274.0,269.0,267.0,267.0,274.0,284.0,283.0,278.0
1,83.0,87.0,100.0,112.0,121.0,125.0,130.0,132.0,123.0,112.0,...,67.0,71.0,75.0,74.0,72.0,70.0,67.0,74.0,77.0,81.0
2,201.0,194.0,188.0,183.0,181.0,176.0,170.0,168.0,169.0,172.0,...,210.0,213.0,212.0,216.0,213.0,210.0,210.0,209.0,210.0,209.0
3,125.0,122.0,123.0,130.0,149.0,162.0,169.0,179.0,192.0,203.0,...,200.0,196.0,189.0,183.0,179.0,177.0,173.0,165.0,153.0,152.0
4,55.0,58.0,70.0,77.0,87.0,99.0,108.0,112.0,120.0,127.0,...,132.0,140.0,147.0,157.0,172.0,175.0,183.0,191.0,198.0,200.0
5,198.0,194.0,197.0,199.0,201.0,205.0,206.0,206.0,204.0,197.0,...,155.0,147.0,139.0,132.0,126.0,123.0,123.0,111.0,109.0,107.0
6,147.0,148.0,149.0,153.0,167.0,184.0,196.0,201.0,212.0,223.0,...,216.0,224.0,235.0,241.0,239.0,245.0,272.0,262.0,253.0,248.0
7,209.0,222.0,226.0,237.0,238.0,251.0,250.0,247.0,247.0,244.0,...,226.0,228.0,231.0,229.0,217.0,202.0,187.0,181.0,177.0,174.0
8,77.0,83.0,92.0,107.0,115.0,125.0,130.0,132.0,131.0,134.0,...,130.0,131.0,130.0,127.0,125.0,124.0,127.0,128.0,128.0,128.0
9,194.0,201.0,198.0,190.0,180.0,169.0,163.0,157.0,155.0,161.0,...,159.0,162.0,162.0,158.0,145.0,147.0,151.0,155.0,152.0,148.0


In [24]:
test_ext_df

Unnamed: 0,absorption_time (mins),CGM_range,CGM_max_vel,CGM_max_vel_time,CGM_max_freq,CGM_max2_freq,CGM_wm1,CGM_wm2,CGM_wm3,CGM_wm4,CGM_wm5
0,105,64.0,2.3,55,0.000139,-0.000139,222.000000,225.000000,244.000000,269.666667,270.000000
1,35,66.0,2.5,10,0.000139,-0.000139,125.333333,122.333333,78.333333,68.000000,73.666667
2,85,48.0,2.4,55,0.000139,-0.000139,175.666667,169.666667,191.333333,209.333333,213.666667
3,60,91.0,3.2,20,0.000139,-0.000139,160.000000,191.333333,212.000000,200.000000,183.666667
4,115,145.0,2.5,85,0.000139,-0.000139,98.000000,119.666667,132.333333,133.000000,158.666667
5,30,99.0,0.6,20,0.000139,-0.000139,204.000000,202.333333,178.666667,155.000000,132.333333
6,100,125.0,3.3,95,0.000139,-0.000139,182.333333,212.000000,217.666667,218.666667,238.333333
7,25,77.0,2.6,0,0.000139,-0.000139,246.333333,246.000000,230.333333,225.666667,225.666667
8,45,57.0,2.4,10,0.000139,-0.000139,123.333333,132.333333,130.333333,130.333333,127.333333
9,5,56.0,1.4,0,0.000278,-0.000278,170.666667,157.666667,168.666667,161.000000,155.000000


In [25]:
# load the model from disk
filename = 'finalized_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(test_ext_df.to_numpy())
np.savetxt("Result.csv", result, delimiter=",", fmt="%d")

In [26]:
file = pd.read_csv(r'Result.csv')

In [27]:
file

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0
