In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
from pandas.plotting import scatter_matrix
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from scipy.optimize import minimize
from statsmodels.tsa.seasonal import seasonal_decompose
import re
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

"""for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))"""

"for dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))"

In [2]:
## !pip install 'polars[numpy,pandas,pyarrow]' --target=/kaggle/working/mysitepackages

!pip download polars[numpy,pandas,pyarrow] -d /kaggle/working/mysitepackages

Collecting polars[numpy,pandas,pyarrow]
  Downloading polars-0.17.9-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.8/17.8 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting typing_extensions>=4.0.1
  Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)
Collecting numpy>=1.16.0
  Downloading numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pyarrow>=7.0.0
  Downloading pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.1/35.1 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pandas
  Downloading pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2

#### create the zip file with the code below and then download the zip file into local machine

In [3]:
from zipfile import ZipFile

dirName = "/kaggle/working/mysitepackages"
zipName = "packages.zip"

# Create a ZipFile Object
with ZipFile(zipName, 'w') as zipObj:
    # Iterate over all the files in directory
    for folderName, subfolders, filenames in os.walk(dirName):
        for filename in filenames:
            if (filename != zipName):
                # create complete filepath of file in directory
                filePath = os.path.join(folderName, filename)
                # Add file to zip
                zipObj.write(filePath)

In [5]:
! ls /kaggle/input/packages-to-install-polars-offline/kaggle/working/mysitepackages

numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
polars-0.17.9-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
python_dateutil-2.8.2-py2.py3-none-any.whl
pytz-2023.3-py2.py3-none-any.whl
six-1.16.0-py2.py3-none-any.whl
typing_extensions-4.5.0-py3-none-any.whl


#### Upload the downloaded zipfile from local machine and then fire the command below

In [6]:
!pip install polars[numpy,pandas,pyarrow] --no-index --find-links=file:///kaggle/input/packages-to-install-polars-offline/kaggle/working/mysitepackages/ 

Looking in links: file:///kaggle/input/packages-to-install-polars-offline/kaggle/working/mysitepackages/
[0m

In [7]:
import polars as pl

In [5]:
defog_train_data = pl.DataFrame()

for dirname, _, filenames in os.walk('/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog'):
    for filename in filenames:
        #print(filename[0:-4])
        temp = pl.scan_csv(os.path.join(dirname, filename), try_parse_dates=True)\
        .filter((pl.col("Valid")) & (pl.col("Task")))\
        .with_columns(pl.lit(filename[0:-4]).alias('Id'))\
        .collect()
        #temp = temp.with_columns(pl.lit(filename[1:-5]).alias('Id'))
        defog_train_data = pl.concat([defog_train_data, temp], how="vertical")

In [6]:
print(defog_train_data.select('Id').n_unique())
print(defog_train_data.estimated_size("mb"))
print(defog_train_data.shape)

91
291.12402725219727
(4111322, 10)


In [7]:
defog_train_data.describe()

describe,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Valid,Task,Id
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
"""count""",4111322.0,4111322.0,4111322.0,4111322.0,4111322.0,4111322.0,4111322.0,4111322.0,4111322.0,"""4111322"""
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""0"""
"""mean""",91135.731763,-0.934887,-0.001301,-0.226019,2.1e-05,0.10079,0.017153,1.0,1.0,
"""std""",70209.818382,0.106942,0.139912,0.2442,0.004626,0.30105,0.129841,0.0,0.0,
"""min""",1000.0,-6.024701,-2.115008,-5.11865,0.0,0.0,0.0,1.0,1.0,"""02ea782681"""
"""max""",414387.0,4.458365,4.524038,4.388132,1.0,1.0,1.0,1.0,1.0,"""f9fc61ce85"""
"""median""",76864.0,-0.939016,0.0,-0.25,0.0,0.0,0.0,1.0,1.0,


In [8]:
defog_metadata = pl.scan_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/defog_metadata.csv", try_parse_dates=True)\
.collect()
print(defog_metadata.shape)
print(defog_metadata.select('Subject').n_unique())
print(defog_metadata.select('Id').n_unique())
defog_metadata.head(10)

(137, 4)
45
137


Id,Subject,Visit,Medication
str,str,i64,str
"""02ab235146""","""ab54e1""",2,"""on"""
"""02ea782681""","""bf608b""",2,"""on"""
"""06414383cf""","""c0b71e""",2,"""off"""
"""092b4c1819""","""b6a627""",1,"""off"""
"""0a900ed8a2""","""b7bd52""",2,"""on"""
"""0c55be4384""","""39f9c0""",2,"""off"""
"""0d7ab3a9f9""","""c0b71e""",2,"""on"""
"""0eaac04f17""","""d79889""",2,"""off"""
"""0ec76d2d8e""","""d79889""",1,"""on"""
"""139f60d29b""","""49f526""",1,"""off"""


In [9]:
subjects = pl.scan_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/subjects.csv", try_parse_dates=True)\
.collect()
print(subjects.shape)
print(subjects.select('Subject').n_unique())
subjects.head(10)

(173, 8)
136


Subject,Visit,Age,Sex,YearsSinceDx,UPDRSIII_On,UPDRSIII_Off,NFOGQ
str,i64,i64,str,f64,i64,i64,i64
"""04fcdb""",1.0,63,"""M""",3.0,30.0,,0
"""05595e""",1.0,56,"""M""",8.0,28.0,,0
"""0967b2""",1.0,59,"""M""",10.0,38.0,48.0,19
"""0967b2""",2.0,59,"""M""",10.0,37.0,44.0,13
"""097078""",,70,"""F""",10.0,27.0,50.0,20
"""0e0908""",,67,"""M""",6.0,16.0,61.0,21
"""109122""",2.0,75,"""F""",3.0,22.0,24.0,12
"""109122""",1.0,75,"""F""",3.0,21.0,30.0,19
"""10943a""",1.0,61,"""F""",1.0,14.0,,0
"""11404e""",,67,"""F""",4.0,,18.0,29


In [10]:
defog_subjects_metadata = defog_metadata.join(subjects, on=["Subject","Visit"], how="inner", suffix="_right")

del defog_metadata, subjects

print(defog_subjects_metadata.shape)
defog_subjects_metadata.head(10)

(137, 10)


Id,Subject,Visit,Medication,Age,Sex,YearsSinceDx,UPDRSIII_On,UPDRSIII_Off,NFOGQ
str,str,i64,str,i64,str,f64,i64,i64,i64
"""28209b9006""","""0967b2""",1,"""off""",59,"""M""",10.0,38,48,19
"""6214414fff""","""0967b2""",1,"""on""",59,"""M""",10.0,38,48,19
"""296c84448e""","""0967b2""",2,"""on""",59,"""M""",10.0,37,44,13
"""71dd8ce20d""","""0967b2""",2,"""off""",59,"""M""",10.0,37,44,13
"""54c6a21be6""","""109122""",2,"""on""",75,"""F""",3.0,22,24,12
"""97e44fa8c3""","""109122""",2,"""off""",75,"""F""",3.0,22,24,12
"""9cd837fd53""","""109122""",1,"""on""",75,"""F""",3.0,21,30,19
"""f0b8335d50""","""109122""",1,"""off""",75,"""F""",3.0,21,30,19
"""3f970065e5""","""21e523""",2,"""off""",69,"""M""",13.0,44,50,22
"""d59b65430b""","""21e523""",2,"""on""",69,"""M""",13.0,44,50,22
