In [1]:
import pandas as pd
import numpy as np
from pprint import pprint
from sklearn.impute import SimpleImputer
from tsfresh import extract_features, select_features,feature_selection
from tsfresh.utilities.dataframe_functions import impute
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from joblib import load, dump
import warnings
warnings.filterwarnings('ignore')
import logging
logging.getLogger('tsfresh').setLevel(logging.ERROR)
logging.getLogger('sklearn').setLevel(logging.ERROR)
from sklearn.cluster import KMeans

In [2]:
def train():
    result = pd.DataFrame()
    for x in range(5):
            d = pd.read_csv('data/mealData'+str(x+1)+'.csv', header = None,error_bad_lines=False)
            f = pd.read_csv('data/mealAmountData'+str(x+1)+'.csv', header = None,error_bad_lines=False)
            f.values.tolist()
            final_val =[]
            for x in range(d.shape[0]):
                final_val.append(f[0][x])
            d['carbo'] = final_val
            result = pd.concat([result,d])
    result = impute_data(result)
    result = pd.DataFrame(result)
    columns = list(result.columns)
    columns.pop()
    columns.append('target')
    result.columns = columns
    features = feature_extract(result,'data/features_file.csv')
    data = pd.read_csv("features_file.csv")
    X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,:-1], data['y'], test_size=0.33, random_state=42)
    X_train.to_csv('train_data.csv')
    scaler = MinMaxScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    print(X_test.shape)
    dump(scaler,"scaler.save")
    kmeans = KMeans(n_clusters=10, random_state=0).fit(X_train)
    dump(kmeans,"kmeans.bin")

In [3]:
def impute_data(result):
    imp_mean = SimpleImputer(missing_values=np.nan,strategy='mean')
    imp_mean.fit(result)
    return(imp_mean.transform(result))

In [4]:
def feature_extract(result, filename):
    y = result.target
    result.drop( 'target', axis = 1, inplace = True )
    d = result.stack()
    d.index.rename([ 'id', 'time' ], inplace = True )
    d = d.reset_index()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        f = extract_features( d, column_id = "id", column_sort = "time")
    impute(f)
    assert f.isnull().sum().sum() == 0
    f=f[['0__spkt_welch_density__coeff_2', '0__fft_coefficient__coeff_1__attr_"abs"','0__partial_autocorrelation__lag_1','0__autocorrelation__lag_1','0__autocorrelation__lag_2']]
    f['y'] = y  
    f.to_csv( filename, index = None )
    return f

In [5]:
if __name__=="__main__":
    train()

Feature Extraction: 100%|██████████| 20/20 [00:04<00:00,  4.27it/s]


In [7]:
kmeans_pro = load("kmeans.bin")

In [14]:
data = pd.read_csv("features_file.csv")
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,:-1], data['y'], test_size=0.33, random_state=42)
scaler =load("scaler.save")
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
y_test_df = pd.DataFrame(y_test)

In [16]:
y_test_df['Predicted'] = kmeans_pro.predict(X_test)

In [17]:
y_test_df

Unnamed: 0,y,Predicted
190,40.0,2
6,60.0,1
79,40.0,7
205,42.0,0
117,0.0,9
185,20.0,0
201,9.0,0
167,20.0,9
9,65.0,1
30,40.0,4


In [18]:
y_final = y_test_df.sort_values(by=["y"])

1 --> <10
2 -- > 10-20
3 --> 40 - 50
4 -- > 50-60
5--> 

In [19]:
y_final.to_csv('fin.csv')

In [20]:
carb_matrix = ["10-20","20-30","30-40","40-50","60-70","50-60","70-80","80-90","90-100","0-10"]

In [21]:
final = []
for x in y_final.iterrows():
    final.append(carb_matrix[np.int64(x[1]['Predicted'])])
#     print(x[1]['Predicted'])

In [22]:
y_final['carb_values'] = final

In [23]:
y_final

Unnamed: 0,y,Predicted,carb_values
56,0.0,4,60-70
75,0.0,4,60-70
137,0.0,9,0-10
67,0.0,8,90-100
38,0.0,1,20-30
219,0.0,2,30-40
104,0.0,7,80-90
152,0.0,9,0-10
120,0.0,9,0-10
139,0.0,9,0-10
