In [1]:
%load_ext autoreload
%autoreload 2

if __name__ == '__main__' and __package__ is None:
    from os import sys, path
    sys.path.append(path.dirname(path.dirname(path.abspath("__file__"))))

In [2]:
import numpy as np
import pandas as pd
import xgboost as xgb

In [3]:
model = xgb.Booster()
model.load_model('../models/basic_XGB_model_1.model')

#### Loading the data

In [4]:
from src.features import main_preprocess

In [5]:
df_raw = pd.read_csv('../data/raw/test_data.csv')
df_raw['target']=df_raw['subject']
df_raw.head()

Unnamed: 0,index,subject,timestamp,x,y,target
0,765,276fba62d85e,1572007000.0,1740,420,276fba62d85e
1,668,276fba62d85e,1572007000.0,1743,419,276fba62d85e
2,316,276fba62d85e,1572007000.0,1744,418,276fba62d85e
3,1334,276fba62d85e,1572007000.0,1745,417,276fba62d85e
4,900,276fba62d85e,1572007000.0,1746,416,276fba62d85e


In [6]:
df_raw.subject.nunique()

716

In [7]:
df_subjects = []
for subject in df_raw['subject'].unique():
    df_sub=df_raw[df_raw['subject']==subject].reset_index()
    df_sub['timestamp']= df_sub['timestamp'] - df_sub.loc[0,'timestamp']
    df_subjects.append(main_preprocess.process_files(df_sub,subject,1))


In [8]:
df_subjects=pd.concat(df_subjects)
df_subjects.head()

Unnamed: 0,traveled_distance_pixel,elapsed_time,direction_of_movement,straightness,num_points,sum_of_angles,mean_curv,sd_curv,max_curv,min_curv,...,max_a,min_a,mean_jerk,sd_jerk,max_jerk,min_jerk,a_beg_time,n_from,n_to,target
0,101.405679,1.672205,7,0.54058,49,-15.23383,0.037044,0.260066,0.785398,-0.463648,...,49910.007121,-31351.548232,154871.682837,2748253.0,7117331.0,-6654395.0,0.016481,2,1,276fba62d85e
1,226.686449,0.999709,3,0.740074,39,67.235246,-0.027324,0.289079,0.891712,-1.666081,...,81278.763774,-119938.827671,-136869.179527,6699607.0,10955440.0,-16771880.0,0.00785,2,1,276fba62d85e
2,116.964248,1.264075,7,0.647122,47,-3.902605,0.009193,0.413744,1.570796,-1.338973,...,48877.263943,-38149.601577,160784.276635,3922517.0,6649594.0,-7099774.0,0.072148,2,1,276fba62d85e
3,12.0,1.680359,2,1.0,9,10.995574,0.187,0.298028,0.785398,0.523599,...,46957.340379,-16022.767518,-312076.605277,4044690.0,5874821.0,-7707365.0,0.007993,2,1,276fba62d85e
4,49.589294,0.999678,0,0.961417,24,3.715897,-0.019986,0.184179,0.227512,-0.785398,...,90710.089313,-20109.435365,644042.397199,3717340.0,10306010.0,-9538733.0,0.799685,2,1,276fba62d85e


In [9]:
df_subjects.target.nunique()

713

In [10]:
for i in ['direction_of_movement', 'num_points', 'num_critical_points', 'n_from', 'n_to']:
    df_subjects[i]=df_subjects[i].astype(int)


### Predicting

In [11]:
predictions= {}
i=0
for subject in df_subjects.target.unique():
    df_sub = df_subjects[df_subjects['target']==subject].reset_index(drop=True)
    d_matrix =  xgb.DMatrix(data=df_sub.iloc[:,:-1])
    preds = model.predict(d_matrix)
    y_pred = np.asarray([np.argmax(line) for line in preds])
    count = np.bincount(y_pred)
    predictions[i]={
        'id' : subject,
        'predictions': y_pred,
        'label': np.argmax(count),
        'count' : count
    }
    i+=1

In [12]:
df_final = pd.read_csv('../data/raw/sample_submission.csv')
df_final.head()

Unnamed: 0,id,label
0,86e6f7fc2e7b,1
1,2a3e7156e716,1
2,9c4a194c95bf,3
3,6c72198b837c,3
4,5155dc4adbfe,3


In [26]:
df_final = pd.read_csv('../data/raw/sample_submission.csv')
df_pred=pd.DataFrame.from_dict(predictions,orient='Index')
for session in df_pred.id:
    df_final.loc[df_final['id']==session,'label']=df_pred.loc[df_pred['id']==session,'label'].values[0]

In [27]:
df_pred.head()

Unnamed: 0,id,predictions,label,count
0,276fba62d85e,"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...",3,"[0, 2, 0, 38]"
1,f8c8eb72f644,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,[11]
2,0febae6ce749,"[0, 0, 0, 0, 0]",0,[5]
3,dae029b817e4,"[0, 0, 0]",0,[3]
4,dada21bc7f87,"[1, 1, 1, 3, 3, 3, 1, 3, 3, 3, 3, 0, 3, 1, 1, ...",1,"[1, 14, 0, 8]"


In [28]:
print(df_final.shape)
df_final.head()

(731, 2)


Unnamed: 0,id,label
0,86e6f7fc2e7b,1
1,2a3e7156e716,1
2,9c4a194c95bf,3
3,6c72198b837c,1
4,5155dc4adbfe,3


In [29]:
df_final.to_csv('../Submissions/sub_1.csv',index=False)