In [None]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from sklearn.preprocessing import MinMaxScaler

# read text file into pandas DataFrame
df = pd.read_csv("data/MU.txt", sep="\t")
df.columns = ["id", "event", "device", "channel", "code", "size", "data"]

# display DataFrame
df.head()

In [None]:
df['data'] = df['data'].apply(lambda x : np.array(list(x.split(','))))


In [None]:
grouped = df.groupby('channel')
fp1 = grouped.get_group('FP1')


In [None]:
median_fp1 = fp1['size'].median()
print(median_fp1)

In [None]:
nan_in_df = df.isnull().sum()
print(nan_in_df)

In [None]:
scaler = MinMaxScaler()
fp1['data_scaled'] = fp1['data'].apply(lambda x : scaler.fit_transform(x.reshape(-1,1)))

In [None]:
fp1['data_scaled'] = fp1['data_scaled'].apply(lambda x : [y for sublist in x for y in sublist] )

In [None]:
def interpolate_row(data, new_size):
    old_size = len(data)
    if old_size==new_size:
        return data
    x_old = np.linspace(0,1,old_size)
    x_new = np.linspace(0,1,new_size)
    interpolate = interp1d(x_old,data, kind='linear')
    return interpolate(x_new)


In [None]:
fp1.head()

In [None]:
temp = pd.Series()
for index, row in fp1.iterrows():
    #print(len(row[7]), row[5])
    temp[index] = interpolate_row(row[7], int(median_fp1))
    
fp1['new_data'] = temp


In [None]:
fp1.head()

Similar steps can be done for any other channel

In [None]:
def preprocess(tempdf):

    #Normalization
    median = tempdf['size'].median()
    tempdf['data_scaled'] = tempdf['data'].apply(lambda x : scaler.fit_transform(x.reshape(-1,1)))
    tempdf['data_scaled'] = tempdf['data_scaled'].apply(lambda x : [y for sublist in x for y in sublist] )
    
    temp = pd.Series()
    for index, row in tempdf.iterrows():
        
        temp[index] = interpolate_row(row[7], int(median))
        
    tempdf['new_data'] = temp
    return tempdf
    


In [None]:
    #Grouping
fp2 = grouped.get_group('FP2')
median = fp2['size'].median()
fp2 = preprocess(fp2)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(fp1['new_data'][0])

In [None]:
plt.plot(fp2['new_data'][1])

In [None]:
df2 = pd.read_csv("data/MW.txt", sep="\t")
df2.columns = ["id", "event", "device", "channel", "code", "size", "data"]
df2.head()


In [None]:
df2['data'] = df2['data'].apply(lambda x : np.array(list(x.split(','))))

In [None]:
# for index, x in df2.iterrows():
#     x[6] = np.array(x[6].split(','))

median = df2['size'].median()

df2['data_scaled'] = df2['data'].apply(lambda x : scaler.fit_transform(x.reshape(-1,1)))
df2['data_scaled'] = df2['data_scaled'].apply(lambda x : [y for sublist in x for y in sublist] )

temp = pd.Series()
for index, row in df2.iterrows():
    
    temp[index] = interpolate_row(row[7], int(median))
    
df2['new_data'] = temp
    

In [None]:
plt.plot(df2['new_data'][0])