## Predicting Freezing of Gate episodes for Parkinsons disease

This notebook was created to train machine learning model for Kaggle competition "Parkinson's Freezing of Gait Prediction" 
(Event detection from wearable sensor data) issued by THE MICHAEL J. FOX FOUNDATION in 03.2023 

competition and data detailed description as well as data can be found on
https://www.kaggle.com/competitions/tlvmc-parkinsons-freezing-gait-prediction


The goal was to predict probablility for 3 FOG types: Turn, Walking and StartHesitation based on values from 3 motion sensors weared by patients with Parkinsons disease on their back. Instead of motion sensor recordings there were also datasets with metadata related to information about patients like age, sex, being on medicines and many other.

this notebook scored 0.187187 public score (run on 1/3 test set) and 0.286148 private score (run on 2/3 test data left)
in this notebook several approaches were used:
- window of size 20 for sensor measurements data
- XGBoost classifier was chosen for model algorithm 
- sensor time series data was combined with metadata params: cyears_since_dx, age, sex, nfogq, meds, subjec, is_home, task

In [1]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, Boolean, Numeric
import gc

np.finfo(np.float16)

finfo(resolution=0.001, min=-6.55040e+04, max=6.55040e+04, dtype=float16)

In [2]:
filePath = 'freezing_gate.db'
if os.path.exists(filePath):
    os.remove(filePath)

engine = create_engine('sqlite:///freezing_gate.db')
meta = MetaData()

table_tdcsfog = Table(
   'tdcsfog', meta,
    Column('id', String, primary_key = True),
    Column('time', Integer),
    Column('accv0', Numeric(1, 5)),
    Column('accml0', Numeric(1, 5)),
    Column('accap0', Numeric(1, 5)),
    Column('accv1', Numeric(1, 5)),
    Column('accml1', Numeric(1, 5)),
    Column('accap1', Numeric(1, 5)),
    Column('accv2', Numeric(1, 5)),
    Column('accml2', Numeric(1, 5)),
    Column('accap2', Numeric(1, 5)),
    Column('accv3', Numeric(1, 5)),
    Column('accml3', Numeric(1, 5)),
    Column('accap3', Numeric(1, 5)),
    Column('accv4', Numeric(1, 5)),
    Column('accml4', Numeric(1, 5)),
    Column('accap4', Numeric(1, 5)),
    Column('accv5', Numeric(1, 5)),
    Column('accml5', Numeric(1, 5)),
    Column('accap5', Numeric(1, 5)),
    Column('accv6', Numeric(1, 5)),
    Column('accml6', Numeric(1, 5)),
    Column('accap6', Numeric(1, 5)),
    Column('accv7', Numeric(1, 5)),
    Column('accml7', Numeric(1, 5)),
    Column('accap7', Numeric(1, 5)),
    Column('accv8', Numeric(1, 5)),
    Column('accml8', Numeric(1, 5)),
    Column('accap8', Numeric(1, 5)),
    Column('accv9', Numeric(1, 5)),
    Column('accml9', Numeric(1, 5)),
    Column('accap9', Numeric(1, 5)),
    Column('accv10', Numeric(1, 5)),
    Column('accml10', Numeric(1, 5)),
    Column('accap10', Numeric(1, 5)),
    Column('accv11', Numeric(1, 5)),
    Column('accml11', Numeric(1, 5)),
    Column('accap11', Numeric(1, 5)),
    Column('accv12', Numeric(1, 5)),
    Column('accml12', Numeric(1, 5)),
    Column('accap12', Numeric(1, 5)),
    Column('accv13', Numeric(1, 5)),
    Column('accml13', Numeric(1, 5)),
    Column('accap13', Numeric(1, 5)),
    Column('accv14', Numeric(1, 5)),
    Column('accml14', Numeric(1, 5)),
    Column('accap14', Numeric(1, 5)),
    Column('accv15', Numeric(1, 5)),
    Column('accml15', Numeric(1, 5)),
    Column('accap15', Numeric(1, 5)),
    Column('accv16', Numeric(1, 5)),
    Column('accml16', Numeric(1, 5)),
    Column('accap16', Numeric(1, 5)),
    Column('accv17', Numeric(1, 5)),
    Column('accml17', Numeric(1, 5)),
    Column('accap17', Numeric(1, 5)),
    Column('accv18', Numeric(1, 5)),
    Column('accml18', Numeric(1, 5)),
    Column('accap18', Numeric(1, 5)),
    Column('accv19', Numeric(1, 5)),
    Column('accml19', Numeric(1, 5)),
    Column('accap19', Numeric(1, 5)),
    Column('accv20', Numeric(1, 5)),
    Column('accml20', Numeric(1, 5)),
    Column('accap20', Numeric(1, 5)),
    Column('result', Integer),    
    Column('years_since_dx', Float), 
    Column('age', Integer),
    Column('sex', Integer), 
    Column('nfogq', Integer),
    Column('meds', Integer),
    Column('subject', String),
    Column('is_home', Integer),
    Column('task', Integer)
    )

meta.create_all(engine)


In [3]:
import os
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, Boolean
from sqlalchemy.sql import text as sa_text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker
engine = create_engine('sqlite:///freezing_gate.db')
# assign directory

Base = declarative_base()
DBSession = scoped_session(sessionmaker())
DBSession.remove()
DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)

class Tdcsfog(Base):
    __tablename__ = "tdcsfog"
    id=Column(String, primary_key=True)
    time=Column(Integer)
    accv0=Column(Numeric(1, 5))
    accml0=Column(Numeric(1, 5))
    accap0=Column(Numeric(1, 5))
    accv1=Column(Numeric(1, 5))
    accml1=Column(Numeric(1, 5))
    accap1=Column(Numeric(1, 5))
    accv2=Column(Numeric(1, 5))
    accml2=Column(Numeric(1, 5))
    accap2=Column(Numeric(1, 5))
    accv3=Column(Numeric(1, 5))
    accml3=Column(Numeric(1, 5))
    accap3=Column(Numeric(1, 5))
    accv4=Column(Numeric(1, 5))
    accml4=Column(Numeric(1, 5))
    accap4=Column(Numeric(1, 5))
    accv5=Column(Numeric(1, 5))
    accml5=Column(Numeric(1, 5))
    accap5=Column(Numeric(1, 5))
    accv6=Column(Numeric(1, 5))
    accml6=Column(Numeric(1, 5))
    accap6=Column(Numeric(1, 5))
    accv7=Column(Numeric(1, 5))
    accml7=Column(Numeric(1, 5))
    accap7=Column(Numeric(1, 5))
    accv8=Column(Numeric(1, 5))
    accml8=Column(Numeric(1, 5))
    accap8=Column(Numeric(1, 5))
    accv9=Column(Numeric(1, 5))
    accml9=Column(Numeric(1, 5))
    accap9=Column(Numeric(1, 5))
    accv10=Column(Numeric(1, 5))
    accml10=Column(Numeric(1, 5))
    accap10=Column(Numeric(1, 5))
    accv11=Column(Numeric(1, 5))
    accml11=Column(Numeric(1, 5))
    accap11=Column(Numeric(1, 5))
    accv12=Column(Numeric(1, 5))
    accml12=Column(Numeric(1, 5))
    accap12=Column(Numeric(1, 5))
    accv13=Column(Numeric(1, 5))
    accml13=Column(Numeric(1, 5))
    accap13=Column(Numeric(1, 5))
    accv14=Column(Numeric(1, 5))
    accml14=Column(Numeric(1, 5))
    accap14=Column(Numeric(1, 5))
    accv15=Column(Numeric(1, 5))
    accml15=Column(Numeric(1, 5))
    accap15=Column(Numeric(1, 5))
    accv16=Column(Numeric(1, 5))
    accml16=Column(Numeric(1, 5))
    accap16=Column(Numeric(1, 5))
    accv17=Column(Numeric(1, 5))
    accml17=Column(Numeric(1, 5))
    accap17=Column(Numeric(1, 5))
    accv18=Column(Numeric(1, 5))
    accml18=Column(Numeric(1, 5))
    accap18=Column(Numeric(1, 5))
    accv19=Column(Numeric(1, 5))
    accml19=Column(Numeric(1, 5))
    accap19=Column(Numeric(1, 5))
    accv20=Column(Numeric(1, 5))
    accml20=Column(Numeric(1, 5))
    accap20=Column(Numeric(1, 5))
    result=Column(Integer)
    is_home=Column(Integer)
    task=Column(Integer, default=0)

t = 0

types = {
    'accv1': np.float16,
    'accml1': np.float16,
    'accap1': np.float16,
    'accv2': np.float16,
    'accml2': np.float16,
    'accap2': np.float16,
    'accv3': np.float16,
    'accml3': np.float16,
    'accap3': np.float16,
    'accv4': np.float16,
    'accml4': np.float16,
    'accap4': np.float16,
    'accv5': np.float16,
    'accml5': np.float16,
    'accap5': np.float16,
    'accv6': np.float16,
    'accml6': np.float16,
    'accap6': np.float16,
    'accv7': np.float16,
    'accml7': np.float16,
    'accap7': np.float16,
    'accv8': np.float16,
    'accml8': np.float16,
    'accap8': np.float16,
    'accv9': np.float16,
    'accml9': np.float16,
    'accap9': np.float16,
    'accv10': np.float16,
    'accml10': np.float16,
    'accap10': np.float16,
    'accv11': np.float16,
    'accml11': np.float16,
    'accap11': np.float16,
    'accv12': np.float16,
    'accml12': np.float16,
    'accap12': np.float16,
    'accv13': np.float16,
    'accml13': np.float16,
    'accap13': np.float16,
    'accv14': np.float16,
    'accml14': np.float16,
    'accap14': np.float16,
    'accv15': np.float16,
    'accml15': np.float16,
    'accap15': np.float16,
    'accv16': np.float16,
    'accml16': np.float16,
    'accap16': np.float16,
    'accv17': np.float16,
    'accml17': np.float16,
    'accap17': np.float16,
    'accv18': np.float16,
    'accml18': np.float16,
    'accap18': np.float16,
    'accv19': np.float16,
    'accml19': np.float16,
    'accap19': np.float16,
    'accv20': np.float16,
    'accml20': np.float16,
    'accap20': np.float16,
    'result': np.int8,
    'is_home': np.int8,
    'task': np.int8
}

  # This is added back by InteractiveShellApp.init_path()


## Import tdcsfog

In [4]:
bufferTpl = {
    'accv1': 0,
    'accml1': 0,
    'accap1': 0,
    'accv2': 0,
    'accml2': 0,
    'accap2': 0,
    'accv3': 0,
    'accml3': 0,
    'accap3': 0,
    'accv4': 0,
    'accml4': 0,
    'accap4': 0,
    'accv5': 0,
    'accml5': 0,
    'accap5': 0,
    'accv6': 0,
    'accml6': 0,
    'accap6': 0,
    'accv7': 0,
    'accml7': 0,
    'accap7': 0,
    'accv8': 0,
    'accml8': 0,
    'accap8': 0,
    'accv9': 0,
    'accml9': 0,
    'accap9': 0,
    'accv10': 0,
    'accml10': 0,
    'accap10': 0,
    'accv11': 0,
    'accml11': 0,
    'accap11': 0,
    'accv12': 0,
    'accml12': 0,
    'accap12': 0,
    'accv13': 0,
    'accml13': 0,
    'accap13': 0,
    'accv14': 0,
    'accml14': 0,
    'accap14': 0,
    'accv15': 0,
    'accml15': 0,
    'accap15': 0,
    'accv16': 0,
    'accml16': 0,
    'accap16': 0,
    'accv17': 0,
    'accml17': 0,
    'accap17': 0,
    'accv18': 0,
    'accml18': 0,
    'accap18': 0,
    'accv19': 0,
    'accml19': 0,
    'accap19': 0,
    'accv20': 0,
    'accml20': 0,
    'accap20': 0,
    }

def setAccValuesFromBuffer(tdcsfog, buffer):
    tdcsfog.accv1=buffer['accv1']
    tdcsfog.accml1=buffer['accml1']
    tdcsfog.accap1=buffer['accap1']
    tdcsfog.accv2=buffer['accv2']
    tdcsfog.accml2=buffer['accml2']
    tdcsfog.accap2=buffer['accap2']
    tdcsfog.accv3=buffer['accv3']
    tdcsfog.accml3=buffer['accml3']
    tdcsfog.accap3=buffer['accap3']
    tdcsfog.accv4=buffer['accv4']
    tdcsfog.accml4=buffer['accml4']
    tdcsfog.accap4=buffer['accap4']
    tdcsfog.accv5=buffer['accv5']
    tdcsfog.accml5=buffer['accml5']
    tdcsfog.accap5=buffer['accap5']
    tdcsfog.accv6=buffer['accv6']
    tdcsfog.accml6=buffer['accml6']
    tdcsfog.accap6=buffer['accap6']
    tdcsfog.accv7=buffer['accv7']
    tdcsfog.accml7=buffer['accml7']
    tdcsfog.accap7=buffer['accap7']
    tdcsfog.accv8=buffer['accv8']
    tdcsfog.accml8=buffer['accml8']
    tdcsfog.accap8=buffer['accap8']
    tdcsfog.accv9=buffer['accv9']
    tdcsfog.accml9=buffer['accml9']
    tdcsfog.accap9=buffer['accap9']
    tdcsfog.accv10=buffer['accv10']
    tdcsfog.accml10=buffer['accml10']
    tdcsfog.accap10=buffer['accap10']
    tdcsfog.accv11=buffer['accv11']
    tdcsfog.accml11=buffer['accml11']
    tdcsfog.accap11=buffer['accap11']
    tdcsfog.accv12=buffer['accv12']
    tdcsfog.accml12=buffer['accml12']
    tdcsfog.accap12=buffer['accap12']
    tdcsfog.accv13=buffer['accv13']
    tdcsfog.accml13=buffer['accml13']
    tdcsfog.accap13=buffer['accap13']
    tdcsfog.accv14=buffer['accv14']
    tdcsfog.accml14=buffer['accml14']
    tdcsfog.accap14=buffer['accap14']
    tdcsfog.accv15=buffer['accv15']
    tdcsfog.accml15=buffer['accml15']
    tdcsfog.accap15=buffer['accap15']
    tdcsfog.accv16=buffer['accv16']
    tdcsfog.accml16=buffer['accml16']
    tdcsfog.accap16=buffer['accap16']
    tdcsfog.accv17=buffer['accv17']
    tdcsfog.accml17=buffer['accml17']
    tdcsfog.accap17=buffer['accap17']
    tdcsfog.accv18=buffer['accv18']
    tdcsfog.accml18=buffer['accml18']
    tdcsfog.accap18=buffer['accap18']
    tdcsfog.accv19=buffer['accv19']
    tdcsfog.accml19=buffer['accml19']
    tdcsfog.accap19=buffer['accap19']
    tdcsfog.accv20=buffer['accv20']
    tdcsfog.accml20=buffer['accml20']
    tdcsfog.accap20=buffer['accap20']

directory = '../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog'
k = 0    
is_home = 0
files = []
tdcsSessions = []

# iterate over files in
# that directory
for filename in os.listdir(directory):
    k += 1
    if k % 2 != 1:
        continue
    if k > 800:
        break
    f = os.path.join(directory, filename)
    print(f)
    # checking if it is a file
    if os.path.isfile(f):
        end = f.split('.c')[0]
        subject = end.split('/')[5]
        files.append(subject)
        tdcsSessions.append(subject)
        df = pd.read_csv(f, dtype=types)
        z = 0
        buffer = bufferTpl.copy()
        for index, row in df.iterrows():
            z += 1
            idx = subject + '_' + str(int(row['Time']))
            result = 0
            if row['Walking']:
                result = 1
            elif row['Turn']:
                result = 2
            elif row['StartHesitation']:
                result = 3
            
            tdcsfog = Tdcsfog()

            tdcsfog.id=idx
            tdcsfog.time=int(row['Time'])
            tdcsfog.accv0=row['AccV']
            tdcsfog.accml0=row['AccML']
            tdcsfog.accap0=row['AccAP']
            setAccValuesFromBuffer(tdcsfog, buffer)
            tdcsfog.result = result
            tdcsfog.is_home = is_home
            
            if z > 20:
                DBSession.add(tdcsfog)
            if t % 500 == 0:
                DBSession.flush()
            
            for i in [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3]:
                buffer['accv{0}'.format(i)] = buffer['accv{0}'.format(i - 1)]
                buffer['accml{0}'.format(i)] = buffer['accml{0}'.format(i - 1)]
                buffer['accap{0}'.format(i)] = buffer['accap{0}'.format(i - 1)]

            buffer['accv2'] = row['AccV']
            buffer['accml2'] = row['AccML']
            buffer['accap2'] = row['AccAP']
        
        DBSession.commit()    

../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/a171e61840.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/0f985a8440.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/ae8c67086b.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/e18fcafee8.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/feba449e1a.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/d9bf33ecd9.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/5ff654941a.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/0ee65f094c.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/ea3885ab66.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/9d161e0027.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/09575b5dae.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/d30bf7abc2.csv
../input/tlvmc-parkinsons-freezing-gait-

## Import defog

In [5]:
directory = '../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog'
k = 0    
is_home = 1

# iterate over files in
# that directory
for filename in os.listdir(directory):
    k += 1
    if k > 500:
        break
    f = os.path.join(directory, filename)
    print(f)
    # checking if it is a file
    if os.path.isfile(f):
        end = f.split('.c')[0]
        subject = end.split('/')[5]
        files.append(subject)
        df = pd.read_csv(f, dtype=types)
        z = 0
        buffer = bufferTpl.copy()
        for index, row in df.iterrows():
            if row['Valid'] == False: #or row['Task'] == True
                continue
            z += 1
            idx = subject + '_' + str(int(row['Time']))
            result = 0
            if row['Walking']:
                result = 1
            elif row['Turn']:
                result = 2
            elif row['StartHesitation']:
                result = 3
            
            tdcsfog = Tdcsfog()

            tdcsfog.id=idx
            tdcsfog.time=int(row['Time'])
            tdcsfog.accv0=row['AccV']
            tdcsfog.accml0=row['AccML']
            tdcsfog.accap0=row['AccAP']
            setAccValuesFromBuffer(tdcsfog, buffer)
            tdcsfog.result = result
            tdcsfog.is_home = is_home
            
            if z > 20:
                DBSession.add(tdcsfog)
            if t % 500 == 0:
                DBSession.flush()
            
            for i in [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3]:
                buffer['accv{0}'.format(i)] = buffer['accv{0}'.format(i - 1)]
                buffer['accml{0}'.format(i)] = buffer['accml{0}'.format(i - 1)]
                buffer['accap{0}'.format(i)] = buffer['accap{0}'.format(i - 1)]

            buffer['accv2'] = row['AccV']
            buffer['accml2'] = row['AccML']
            buffer['accap2'] = row['AccAP']
        
        DBSession.commit()

../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/be9d33541d.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/4c3aa8ea6e.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/18e7abc37e.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/6a20935af5.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/e642d9ea5f.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/3f3b08f78d.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/68e7e02a47.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/f17eacf7d8.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/3f970065e5.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/7030643376.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/bdda73c9be.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/8282009100.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/0

In [6]:
directory = '../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype'
k = 0    
is_home = 1

# iterate over files in
# that directory
for filename in os.listdir(directory):
    k += 1
    if k > 500:
        break
    f = os.path.join(directory, filename)
    print(f)
    # checking if it is a file
    if os.path.isfile(f):
        end = f.split('.c')[0]
        subject = end.split('/')[5]
        files.append(subject)
        df = pd.read_csv(f, dtype=types)
        z = 0
        buffer = bufferTpl.copy()
        for index, row in df.iterrows():
            if row['Valid'] == False: #or row['Task'] == True
                continue
            z += 1
            idx = subject + '_' + str(int(row['Time']))
            result = row['Event']
            
            tdcsfog = Tdcsfog()

            tdcsfog.id=idx
            tdcsfog.time=int(row['Time'])
            tdcsfog.accv0=row['AccV']
            tdcsfog.accml0=row['AccML']
            tdcsfog.accap0=row['AccAP']
            setAccValuesFromBuffer(tdcsfog, buffer)
            tdcsfog.result = result
            tdcsfog.is_home = is_home
            
            if z > 20:
                DBSession.add(tdcsfog)
            if t % 500 == 0:
                DBSession.flush()
            
            for i in [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3]:
                buffer['accv{0}'.format(i)] = buffer['accv{0}'.format(i - 1)]
                buffer['accml{0}'.format(i)] = buffer['accml{0}'.format(i - 1)]
                buffer['accap{0}'.format(i)] = buffer['accap{0}'.format(i - 1)]

            buffer['accv2'] = row['AccV']
            buffer['accml2'] = row['AccML']
            buffer['accap2'] = row['AccAP']
        
        DBSession.commit()

../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/1e8d55d48d.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/89e9ed32d1.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/e5a0e226fe.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/1b3bc93401.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/34b979fc28.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/9cd837fd53.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/60f28aa837.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/02ab235146.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/6a886a3bb8.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/339c0cc15f.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/af02b83cbf.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/train/notype/71dd8ce20d.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/t

In [7]:
import pandas as pd
from sqlalchemy import bindparam

events = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/events.csv')
events.head(10)

engine = create_engine('sqlite:///freezing_gate.db')
meta = MetaData(bind=engine)
MetaData.reflect(meta)
# print(meta.tables)
table_tdcsfog = meta.tables['tdcsfog']

#use database
ptr = 0
params = []
stmt = table_tdcsfog.update().where(table_tdcsfog.c.id == bindparam('ident')).values({'result': bindparam('res')})

for index, row in events.iterrows():
    if row['Id'] not in files:
        continue
    if row['Id'] in tdcsSessions:
        factor = 128
    else:
        factor = 100
    start = int(row['Init'] * factor)
    end = int(row['Completion'] * factor)
    ran = range(start, end)
    for item in ran:
        ptr += 1
        col = row['Type']
        if pd.isna(col):
            continue
        if col == 'StartHesitation':
            res = 3
        elif col == 'Turn':
            res = 2
        elif col == 'Walking':
            res = 1

        params.append({'ident': row['Id'] + '_' + str(item),'res': res})
        if len(params) > 500:
            engine.execute(stmt, params)
            params = []
    
    if len(params):
        engine.execute(stmt, params)
        params = []

In [8]:
import pandas as pd

events = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/tasks.csv')
events.head(10)

tasks = {
    '4MW': 1,
    '4MW-C': 2,
    'Hotspot1': 3,
    'Hotspot1-C': 4,
    'Hotspot2': 5,
    'Hotspot2-C': 6,
    'MB1': 7,
    'MB10': 8,
    'MB11': 9,
    'MB12': 10,
    'MB13': 11,
    'MB2a': 12,
    'MB2b': 13,
    'MB3-L': 14,
    'MB3-R': 15,
    'MB4': 16,
    'MB5': 17,
    'MB6': 18,
    'MB6-L': 19,
    'MB6-R': 20,
    'MB7': 21,
    'MB8': 22,
    'MB9': 23,
    'Rest1': 24,
    'Rest2': 25,
    'TUG-C': 26,
    'TUG-DT': 27,
    'TUG-ST': 28,
    'Turning-C': 29,
    'Turning-DT': 30,
    'Turning-ST': 31
}

#use database
ptr = 0
params = []
stmt = table_tdcsfog.update().where(table_tdcsfog.c.id == bindparam('ident')).values({'task': bindparam('res')})
for index, row in events.iterrows():
    if row['Id'] not in files:
        continue
    if row['Id'] in tdcsSessions:
        factor = 128
    else:
        factor = 100
    start = int(row['Begin'] * factor)
    end = int(row['End'] * factor)
    ran = range(start, end)
    for item in ran:
        ptr += 1
        res = tasks[row['Task']]
        
        params.append({'ident': row['Id'] + '_' + str(item),'res': res})
        if len(params) > 500:
            engine.execute(stmt, params)
            params = []
    
    if len(params):
        engine.execute(stmt, params)
        params = []

In [9]:
import pandas as pd

engine = create_engine('sqlite:///freezing_gate.db')
meta = MetaData(bind=engine)
MetaData.reflect(meta)
table_tdcsfog = meta.tables['tdcsfog']

metadata = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/tdcsfog_metadata.csv')

ptr = 0
for index, row in metadata.iterrows():
    if row['Id'] not in files:
        continue
    med =  int(row['Medication'] == 'on')
    print(med, row['Subject'])
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.id.like(row['Id'] + '%')).values(meds = med,
     subject = row['Subject'])
    engine.execute(stmt)


metadata = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/defog_metadata.csv')

ptr = 0
for index, row in metadata.iterrows():
    if row['Id'] not in files:
        continue
    med =  0
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.id.like(row['Id'] + '%')).values(meds = med,
     subject = row['Subject'])
    engine.execute(stmt)    


1 231c3b
0 231c3b
1 fa8764
1 02bc69
1 220a17
0 54ee6e
1 242a3e
1 e9fc55
1 7fcee9
0 4ca9b3
0 3b2b7a
1 93f49f
1 a03db7
1 a80ae4
0 364459
1 bc3908
0 251738
1 a03db7
1 4ba1d3
1 364459
0 2a39f8
0 4b39ac
1 66341b
0 31d269
1 301ada
1 87174c
1 69cc45
0 4b39ac
0 31d269
1 2d57c2
1 31d269
0 3b2403
1 31d269
0 d8836b
1 07285e
1 eeaff0
1 07285e
0 f2c8aa
1 a03db7
1 87174c
0 c8e721
0 4dc2f8
1 7eb666
1 3b2403
0 2d57c2
0 4ca9b3
1 251738
0 220a17
0 c8e721
1 4b39ac
0 8db7dd
1 4bb5d0
1 516a67
0 4b39ac
0 93f49f
0 364459
1 4bb5d0
1 3b2b7a
0 516a67
1 2c98f7
1 220a17
0 f2c8aa
1 082f01
1 242a3e
1 4b39ac
1 02bc69
1 31d269
0 4b39ac
0 31d269
1 4dc2f8
1 59f492
1 b19f77
1 eeaff0
1 c8e721
1 b19f77
0 66341b
1 d9312a
0 d8836b
0 48fd62
1 f2c8aa
0 231c3b
0 66341b
1 251738
0 c8e721
0 3b2403
1 f2c8aa
1 8db7dd
1 31d269
1 5c0b8a
0 7688c1
0 3b2403
1 364459
0 54ee6e
0 251738
1 07285e
1 6a3e93
1 87174c
1 4dc2f8
0 3b2403
1 5c0b8a
1 4ca9b3
1 f2c8aa
0 d8836b
1 4bb5d0
0 2d57c2
1 f686f0
1 a80ae4
1 4ba1d3
0 66341b
1 364459
0 2d57c2
1

In [10]:
stmt = table_tdcsfog.delete().where(table_tdcsfog.c.time % 2 == 1).where(table_tdcsfog.c.result == 0)
engine.execute(stmt)

stmt = table_tdcsfog.delete().where(table_tdcsfog.c.time % 3 == 1).where(table_tdcsfog.c.result == 0)
engine.execute(stmt)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f7f2f857790>

In [11]:
import pandas as pd

subjects = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/subjects.csv')
print(subjects.head(10))

engine = create_engine('sqlite:///freezing_gate.db')
meta = MetaData(bind=engine)
MetaData.reflect(meta)
print(meta.tables)
table_tdcsfog = meta.tables['tdcsfog']

#use database
ptr = 0
for index, row in subjects.iterrows():
    s = row['Sex'] == 'F'
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.subject == row['Subject']).values(age = row['Age'],
     sex = s,
     years_since_dx = row['YearsSinceDx'],
     nfogq = row['NFOGQ'])
        
    engine.execute(stmt)
      

  Subject  Visit  Age Sex  YearsSinceDx  UPDRSIII_On  UPDRSIII_Off  NFOGQ
0  00f674    2.0   63   M          27.0         43.0          49.0     24
1  00f674    1.0   63   M          27.0         31.0          30.0     26
2  02bc69    NaN   69   M           4.0         21.0           NaN     22
3  040587    2.0   75   M          26.0         52.0          69.0     21
4  040587    1.0   75   M          26.0         47.0          75.0     24
5  056372    2.0   69   M          13.0         44.0          50.0     22
6  07285e    NaN   58   M           1.0         18.0          26.0     10
7  082f01    NaN   64   M          17.0         45.0          57.0     24
8  08de77    1.0   71   F          16.0         47.0          54.0     17
9  08de77    2.0   71   F          16.0         38.0          49.0     22
FacadeDict({'tdcsfog': Table('tdcsfog', MetaData(bind=Engine(sqlite:///freezing_gate.db)), Column('id', VARCHAR(), table=<tdcsfog>, primary_key=True, nullable=False), Column('time', INTE

## Analyze data distribution and correlation

In [12]:
import seaborn as sns
import matplotlib.pyplot as plt

for chunk in pd.read_sql_table('tdcsfog_test', engine, chunksize=1000):
dfAll = pd.read_sql_table('tdcsfog', engine).astype(types)
dfAll[['result', 'age', 'meds', 'years_since_dx', 'sex']].hist()
print(dfAll.subject.nunique())

dfAll.age.plot.box()

plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(dfAll[['result', 'age', 'meds', 'years_since_dx', 'sex', 'time']].corr(), vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':18}, pad=12);

print(dfAll.nfogq.unique())

## train model

In [13]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm, ensemble
import pandas as pd
import pickle

model_name = 'lr_v1_0_0'

dfAll = pd.read_sql_table('tdcsfog', engine).astype(types)

print(dfAll.head(10))

y = dfAll['result'].to_numpy()
X = dfAll.drop(['result', 'time', 'id', 'subject'], axis=1).to_numpy()

y[0] = 3
y[1] = 2
y[2] = 1
y[3] = 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12345)


def train_model(classifier, feature_vector_train, label, feature_vector_valid):
    # train model
    classifier.fit(feature_vector_train, label)

    # wygeneruj przewidywania modelu dla zbioru testowego
    predictions = classifier.predict(feature_vector_valid)

    # dokonaj ewaluacji modelu na podstawie danych testowych
    scores = list(metrics.precision_recall_fscore_support(predictions, y_test))
    score_vals = [
        scores[0][0],
        scores[1][0],
        scores[2][0]
    ]
    score_vals.append(metrics.accuracy_score(predictions, y_test))
    print("LR: ", score_vals)
    return classifier

classifier = train_model(XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'), X_train, y_train, X_test)

              id  time     accv0    accml0    accap0  accv1  accml1  accap1  \
0  a171e61840_20    20 -9.680701  0.046148  0.170617    0.0     0.0     0.0   
1  a171e61840_24    24 -9.682190  0.035471  0.207058    0.0     0.0     0.0   
2  a171e61840_26    26 -9.680018  0.039914  0.222862    0.0     0.0     0.0   
3  a171e61840_30    30 -9.668988  0.030376  0.206861    0.0     0.0     0.0   
4  a171e61840_32    32 -9.675831  0.034978  0.211378    0.0     0.0     0.0   
5  a171e61840_36    36 -9.677156  0.029044  0.193703    0.0     0.0     0.0   
6  a171e61840_38    38 -9.673158  0.035341  0.191242    0.0     0.0     0.0   
7  a171e61840_42    42 -9.679507  0.035850  0.171112    0.0     0.0     0.0   
8  a171e61840_44    44 -9.686703  0.037850  0.152852    0.0     0.0     0.0   
9  a171e61840_48    48 -9.680708  0.020887  0.187325    0.0     0.0     0.0   

      accv2    accml2  ...   accap20  result  years_since_dx  age  sex  nfogq  \
0 -9.679688  0.046570  ...  0.184692       0     

## test model manually

In [14]:
# import pickle
# with open('../input/model-v1-0-0/lr_v1_0_0.pickle', 'rb') as pickle_file:
#     classifier = pickle.load(pickle_file)

In [15]:
# y_test = [1,1,1,1,1]

# results = classifier.predict_proba([
#   [-10.5640649631707,-5.7344755236052,-2.25347106210746,-8.57909712460222,-3.95192171002669,3.25418216825264,-8.1906375293986,-1.28812870739416,9.02153967397922,-7.9056709964904,1.04496165829512,10.3074736317969,-7.73251891636128,2.04009811842352,7.86433971443364,-7.7072161651829,2.39761025667793,4.8994163622527,5, 0, 67, 24, 1],         
#   [-13.9070678095068,-4.64674086128608,-3.78920256120196,-10.5640649631707,-5.7344755236052,-2.25347106210746,-8.57909712460222,-3.95192171002669,3.25418216825264,-8.1906375293986,-1.28812870739416,9.02153967397922,-7.9056709964904,1.04496165829512,10.3074736317969,-7.73251891636128,2.04009811842352,7.86433971443364,5, 0, 67, 24, 1],     
#   [-15.9099643466356,-1.81603914985874,-2.26002107143409,-13.9070678095068,-4.64674086128608,-3.78920256120196,-10.5640649631707,-5.7344755236052,-2.25347106210746,-8.57909712460222,-3.95192171002669,3.25418216825264,-8.1906375293986,-1.28812870739416,9.02153967397922,-7.9056709964904,1.04496165829512,10.3074736317969,5, 0, 67, 24, 1],   
#   [-14.0965277938019,0.0716618144836338,0.54618846642608,-15.9099643466356,-1.81603914985874,-2.26002107143409,-13.9070678095068,-4.64674086128608,-3.78920256120196,-10.5640649631707,-5.7344755236052,-2.25347106210746,-8.57909712460222,-3.95192171002669,3.25418216825264,-8.1906375293986,-1.28812870739416,9.02153967397922,5, 0, 67, 24, 1],
#   [-10.6959713312204,1.41937557139802,3.40185461789888,-14.0965277938019,0.0716618144836338,0.54618846642608,-15.9099643466356,-1.81603914985874,-2.26002107143409,-13.9070678095068,-4.64674086128608,-3.78920256120196,-10.5640649631707,-5.7344755236052,-2.25347106210746,-8.57909712460222,-3.95192171002669,3.25418216825264,5, 0, 67, 24, 1] 
# ])
# for idx,result in enumerate(results):
#     print(f'expected {y_test[idx]}, predicted {result}')

In [16]:
import os
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, Boolean
from sqlalchemy.sql import text as sa_text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker

filePath = 'freezing_gate_test.db'
if os.path.exists(filePath):
    os.remove(filePath)
    
engine = create_engine('sqlite:///freezing_gate_test.db')    
meta = MetaData()
table_tdcsfog = Table(
   'tdcsfog_test', meta,
    Column('id', String, primary_key = True),
    Column('time', Integer),
    Column('accv0', Numeric(1, 5)),
    Column('accml0', Numeric(1, 5)),
    Column('accap0', Numeric(1, 5)),
    Column('accv1', Numeric(1, 5)),
    Column('accml1', Numeric(1, 5)),
    Column('accap1', Numeric(1, 5)),
    Column('accv2', Numeric(1, 5)),
    Column('accml2', Numeric(1, 5)),
    Column('accap2', Numeric(1, 5)),
    Column('accv3', Numeric(1, 5)),
    Column('accml3', Numeric(1, 5)),
    Column('accap3', Numeric(1, 5)),
    Column('accv4', Numeric(1, 5)),
    Column('accml4', Numeric(1, 5)),
    Column('accap4', Numeric(1, 5)),
    Column('accv5', Numeric(1, 5)),
    Column('accml5', Numeric(1, 5)),
    Column('accap5', Numeric(1, 5)),
    Column('accv6', Numeric(1, 5)),
    Column('accml6', Numeric(1, 5)),
    Column('accap6', Numeric(1, 5)),
    Column('accv7', Numeric(1, 5)),
    Column('accml7', Numeric(1, 5)),
    Column('accap7', Numeric(1, 5)),
    Column('accv8', Numeric(1, 5)),
    Column('accml8', Numeric(1, 5)),
    Column('accap8', Numeric(1, 5)),
    Column('accv9', Numeric(1, 5)),
    Column('accml9', Numeric(1, 5)),
    Column('accap9', Numeric(1, 5)),
    Column('accv10', Numeric(1, 5)),
    Column('accml10', Numeric(1, 5)),
    Column('accap10', Numeric(1, 5)),
    Column('accv11', Numeric(1, 5)),
    Column('accml11', Numeric(1, 5)),
    Column('accap11', Numeric(1, 5)),
    Column('accv12', Numeric(1, 5)),
    Column('accml12', Numeric(1, 5)),
    Column('accap12', Numeric(1, 5)),
    Column('accv13', Numeric(1, 5)),
    Column('accml13', Numeric(1, 5)),
    Column('accap13', Numeric(1, 5)),
    Column('accv14', Numeric(1, 5)),
    Column('accml14', Numeric(1, 5)),
    Column('accap14', Numeric(1, 5)),
    Column('accv15', Numeric(1, 5)),
    Column('accml15', Numeric(1, 5)),
    Column('accap15', Numeric(1, 5)),
    Column('accv16', Numeric(1, 5)),
    Column('accml16', Numeric(1, 5)),
    Column('accap16', Numeric(1, 5)),
    Column('accv17', Numeric(1, 5)),
    Column('accml17', Numeric(1, 5)),
    Column('accap17', Numeric(1, 5)),
    Column('accv18', Numeric(1, 5)),
    Column('accml18', Numeric(1, 5)),
    Column('accap18', Numeric(1, 5)),
    Column('accv19', Numeric(1, 5)),
    Column('accml19', Numeric(1, 5)),
    Column('accap19', Numeric(1, 5)),
    Column('accv20', Numeric(1, 5)),
    Column('accml20', Numeric(1, 5)),
    Column('accap20', Numeric(1, 5)),
#     Column('result', Integer),   
    Column('years_since_dx', Float), 
    Column('age', Integer),
    Column('sex', Integer), 
    Column('nfogq', Integer),
    Column('meds', Integer),
    Column('subject', String),
    Column('is_home', Integer),
    Column('task', Integer),
)

meta.create_all(engine)

Base = declarative_base()
DBSession = scoped_session(sessionmaker())
DBSession.remove()
DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)

# assign directory
directories = ['../input/tlvmc-parkinsons-freezing-gait-prediction/test/tdcsfog', '../input/tlvmc-parkinsons-freezing-gait-prediction/test/defog']

class Tdcsfog(Base):
    __tablename__ = "tdcsfog_test"
    id=Column(String, primary_key=True)
    time=Column(Integer)
    accv0 = Column(Numeric(1, 5))
    accml0 = Column(Numeric(1, 5))
    accap0 = Column(Numeric(1, 5))
    accv1 = Column(Numeric(1, 5))
    accml1 = Column(Numeric(1, 5))
    accap1 = Column(Numeric(1, 5))
    accv2 = Column(Numeric(1, 5))
    accml2 = Column(Numeric(1, 5))
    accap2 = Column(Numeric(1, 5))
    accv3 = Column(Numeric(1, 5))
    accml3 = Column(Numeric(1, 5))
    accap3 = Column(Numeric(1, 5))
    accv4 = Column(Numeric(1, 5))
    accml4 = Column(Numeric(1, 5))
    accap4 = Column(Numeric(1, 5))
    accv5 = Column(Numeric(1, 5))
    accml5 = Column(Numeric(1, 5))
    accap5 = Column(Numeric(1, 5))
    accv6 = Column(Numeric(1, 5))
    accml6 = Column(Numeric(1, 5))
    accap6 = Column(Numeric(1, 5))
    accv7 = Column(Numeric(1, 5))
    accml7 = Column(Numeric(1, 5))
    accap7 = Column(Numeric(1, 5))
    accv8 = Column(Numeric(1, 5))
    accml8 = Column(Numeric(1, 5))
    accap8 = Column(Numeric(1, 5))
    accv9 = Column(Numeric(1, 5))
    accml9 = Column(Numeric(1, 5))
    accap9 = Column(Numeric(1, 5))
    accv10 = Column(Numeric(1, 5))
    accml10 = Column(Numeric(1, 5))
    accap10 = Column(Numeric(1, 5))
    accv11 = Column(Numeric(1, 5))
    accml11 = Column(Numeric(1, 5))
    accap11 = Column(Numeric(1, 5))
    accv12 = Column(Numeric(1, 5))
    accml12 = Column(Numeric(1, 5))
    accap12 = Column(Numeric(1, 5))
    accv13 = Column(Numeric(1, 5))
    accml13 = Column(Numeric(1, 5))
    accap13 = Column(Numeric(1, 5))
    accv14 = Column(Numeric(1, 5))
    accml14 = Column(Numeric(1, 5))
    accap14 = Column(Numeric(1, 5))
    accv15 = Column(Numeric(1, 5))
    accml15 = Column(Numeric(1, 5))
    accap15 = Column(Numeric(1, 5))
    accv16 = Column(Numeric(1, 5))
    accml16 = Column(Numeric(1, 5))
    accap16 = Column(Numeric(1, 5))
    accv17 = Column(Numeric(1, 5))
    accml17 = Column(Numeric(1, 5))
    accap17 = Column(Numeric(1, 5))
    accv18 = Column(Numeric(1, 5))
    accml18 = Column(Numeric(1, 5))
    accap18 = Column(Numeric(1, 5))
    accv19 = Column(Numeric(1, 5))
    accml19 = Column(Numeric(1, 5))
    accap19 = Column(Numeric(1, 5))
    accv20 = Column(Numeric(1, 5))
    accml20 = Column(Numeric(1, 5))
    accap20 = Column(Numeric(1, 5))
    is_home=Column(Integer)
    task=Column(Integer, default=0)

t = 0
session_ids = set([])
tdcsSessions = []
for directory in directories:
    data = pd.DataFrame()
    
    
    # iterate over files in
    # that directory
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            
            df = pd.read_csv(f, dtype=types)
            buffer = bufferTpl.copy()
            for index, row in df.iterrows():
                t += 1
                end = f.split('.c')[0]
                subject = end.split('/')[5]
                idx = subject + '_' + str(int(row['Time']))
                session_ids.add(subject)
                if 'tdcsfog' in f:
                    tdcsSessions.append(idx)
                tdcsfog = Tdcsfog()
                tdcsfog.id=idx,
                tdcsfog.time=int(row['Time'])
                tdcsfog.accv0=row['AccV']
                tdcsfog.accml0=row['AccML']
                tdcsfog.accap0=row['AccAP']
                setAccValuesFromBuffer(tdcsfog, buffer)
                tdcsfog.id = tdcsfog.id[0]
            
                DBSession.add(tdcsfog)
                if t % 1000 == 0:
                    DBSession.flush()
                for i in [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3]:
                    buffer['accv{0}'.format(i)] = buffer['accv{0}'.format(i - 1)]
                    buffer['accml{0}'.format(i)] = buffer['accml{0}'.format(i - 1)]
                    buffer['accap{0}'.format(i)] = buffer['accap{0}'.format(i - 1)]

                buffer['accv2'] = row['AccV']
                buffer['accml2'] = row['AccML']
                buffer['accap2'] = row['AccAP']
                
            DBSession.commit()
            
print(session_ids)

../input/tlvmc-parkinsons-freezing-gait-prediction/test/tdcsfog/003f117e14.csv
../input/tlvmc-parkinsons-freezing-gait-prediction/test/defog/02ab235146.csv
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000
90000
95000
100000
105000
110000
115000
120000
125000
130000
135000
140000
145000
150000
155000
160000
165000
170000
175000
180000
185000
190000
195000
200000
205000
210000
215000
220000
225000
230000
235000
240000
245000
250000
255000
260000
265000
270000
275000
280000
285000
{'003f117e14', '02ab235146'}


In [17]:
import pandas as pd

metadata = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/tdcsfog_metadata.csv')

meta = MetaData(bind=engine)
MetaData.reflect(meta)
table_tdcsfog = meta.tables['tdcsfog_test']
subject_ids = set([])

for index, row in metadata.iterrows():
    if row['Id'] not in session_ids:
        continue
    subject_ids.add(row['Subject'])
    med =  int(row['Medication'] == 'on')
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.id.like(row['Id'] + '%')).values(meds = med,
     subject = row['Subject'], is_home = 0)
    engine.execute(stmt)
    

In [18]:
import pandas as pd

metadata = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/defog_metadata.csv')

meta = MetaData(bind=engine)
MetaData.reflect(meta)
table_tdcsfog = meta.tables['tdcsfog_test']

for index, row in metadata.iterrows():
    if row['Id']not in session_ids:
        continue
    subject_ids.add(row['Subject'])
    med =  int(row['Medication'] == 'on')
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.id.like(row['Id'] + '%')).values(meds = med,
     subject = row['Subject'], is_home = 1)
    engine.execute(stmt)

print(subject_ids)

for index, row in subjects.iterrows():
    if row['Subject'] not in subject_ids:
        continue
    s = row['Sex'] == 'F'
    stmt = table_tdcsfog.update().where(table_tdcsfog.c.subject == row['Subject']).values(age = row['Age'],
     sex = s,
     years_since_dx = row['YearsSinceDx'],
     nfogq = row['NFOGQ'])
    engine.execute(stmt)    

{'e1f62e', '4dc2f8'}


In [19]:
events = pd.read_csv('../input/tlvmc-parkinsons-freezing-gait-prediction/tasks.csv')
events.head(10)

#use database
ptr = 0
params = []
stmt = table_tdcsfog.update().where(table_tdcsfog.c.id == bindparam('ident')).values({'task': bindparam('task')})
for index, row in events.iterrows():
    if row['Id']not in session_ids:
        continue
    if row['Id'] in tdcsSessions:
        factor = 128
    else:
        factor = 100
    start = int(row['Begin'] * factor)
    end = int(row['End'] * factor)
    ran = range(start, end)
    for item in ran:
        if tasks[row['Task']]:
            res = tasks[row['Task']]
        else:
            res = 0
        params.append({'ident': row['Id'] + '_' + str(item), 'task': res})
        if len(params) > 500:
            engine.execute(stmt, params)
            params = []
    
    if len(params):
        engine.execute(stmt, params)
        params = []


## Generate submission

In [20]:
header = True
l = 1
for chunk in pd.read_sql_table('tdcsfog_test', engine, chunksize=1000):
    id = chunk['id']
    X = chunk.drop(['time', 'id', 'subject'], axis=1)

    result = classifier.predict_proba(X.to_numpy())
    resultDf = pd.DataFrame(result)

    X['Walking'] = resultDf[1]
    X['Turn'] = resultDf[2]
    X['StartHesitation']= resultDf[3]
    X['id'] = id
    X.drop(['accv0','accml0','accap0','accv1','accml1','accap1','accv2','accml2','accap2','accv3','accml3','accap3','accv4','accml4','accap4','accv5','accml5','accap5','accv6','accml6','accap6','accv7','accml7','accap7','accv8','accml8','accap8','accv9','accml9','accap9','accv10','accml10','accap10','accv11','accml11','accap11','accv12','accml12','accap12','accv13','accml13','accap13','accv14','accml14','accap14','accv15','accml15','accap15','accv16','accml16','accap16','accv17','accml17','accap17','accv18','accml18','accap18','accv19','accml19','accap19','accv20','accml20','accap20'], axis=1, inplace = True)
    
    X = X[['id', 'StartHesitation', 'Turn', 'Walking']]
    X.rename(columns = {'id':'Id'}, inplace = True)
    X.to_csv('submission.csv', index=False, mode='a', header=header)
    header=False
    
    
print(X.head(10))

                  Id  StartHesitation      Turn   Walking
0  02ab235146_281318         0.000089  0.000161  0.000328
1  02ab235146_281319         0.000089  0.000161  0.000328
2  02ab235146_281320         0.000078  0.000161  0.000328
3  02ab235146_281321         0.000089  0.000161  0.000328
4  02ab235146_281322         0.000089  0.000161  0.000328
5  02ab235146_281323         0.000089  0.000161  0.000328
6  02ab235146_281324         0.000089  0.000151  0.000328
7  02ab235146_281325         0.000077  0.000151  0.000328
8  02ab235146_281326         0.000077  0.000151  0.000328
9  02ab235146_281327         0.000077  0.000161  0.000328
