In [3]:
!pip install plotly
!pip install cufflinks
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import plotly.express as px
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,iplot
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.linear_model import LogisticRegression
from catboost import Pool, CatBoostClassifier, cv
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_validate, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
from sklearn.metrics import average_precision_score,classification_report
from sklearn.ensemble import RandomForestClassifier
import optuna
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline



In [None]:
#combining all tdcsfog '.csv' train files
tdcsfog_path= '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog'
tdcsfog_list= []

for file_name in os.listdir(tdcsfog_path):
    if file_name.endswith('.csv'):
        file_path= os.path.join(tdcsfog_path,file_name)
        df= pd.read_csv(file_path)
        df['Time']= df['Time']/(len(df)-1) 
        tdcsfog_list.append(df)
     
tdcsfog = pd.concat(tdcsfog_list,axis= 0)
tdcsfog

In [None]:
defog_path = '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog'

defog_list = []

for file_name in os.listdir(defog_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(defog_path, file_name)
        file = pd.read_csv(file_path)
        file.Time = file.Time / (len(file) - 1)
        defog_list.append(file)

defog = pd.concat(defog_list, axis = 0)

defog.head()

In [None]:
#reducing memory usage of dataset

def reduce_memory_usage(df):
    
    init_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(init_mem))
    
    for col in df.columns:
        col_type = df[col].dtype.name
        if ((col_type != 'datetime64[ns]') & (col_type != 'category')):
            if (col_type != 'object'):
                c_min = df[col].min()
                c_max = df[col].max()

                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)

                else:
#                     if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
#                         df[col] = df[col].astype(np.float16)
                    if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        pass
            else:
                df[col] = df[col].astype('category')
    mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage became: ",mem_usg," MB")
    
    return df

In [None]:
defog= reduce_memory_usage(tdcsfog)
defog= reduce_memory_usage(defog)

In [None]:
defog= defog[(defog['Valid']==1) & (defog['Task']==1)]
defog.dropna()

In [None]:
defog= defog.iloc[:,:7]

In [None]:
merged= pd.concat([tdcsfog,defog],axis=0)
merged

In [None]:
X_merged = merged.iloc[:,0:4]  
X = tdcsfog.iloc[:,0:4]  
y1 = merged['StartHesitation']  # target variable for StartHesitation
y2 = merged['Turn']  # target variable for Turn
y3 = tdcsfog['Walking']  # target variable for Walking
X_merged.head()

In [None]:
y1_ones= np.where(y1==1)[0] #indices of ones
np.random.seed(10)
n1_ones= (y1==1).sum() #total of ones
y1_zeros= np.random.choice(np.where(y1==0)[0],size= n1_ones,replace= False) # chose same n1_ones from indices of zeros

y1_balanced_idx= np.sort(np.concatenate([y1_zeros,y1_ones]))
y1_balanced_idx

In [None]:
X1_balanced= X_merged.iloc[y1_balanced_idx,:]
y1_balanced= y1.iloc[y1_balanced_idx]
X1_balanced.shape

In [None]:
y2_ones= np.where(y2==1)[0]
np.random.seed(10)

n2_ones= (y2==1).sum()
y2_zeros= np.random.choice(np.where(y2==0)[0],size= n2_ones,replace= False)

y2_balanced_idx= np.sort(np.concatenate([y2_zeros,y2_ones]))

X2_balanced= X_merged.iloc[y2_balanced_idx,:]
y2_balanced= y2.iloc[y2_balanced_idx]

In [None]:
y3_ones= np.where(y3==1)[0]
np.random.seed(10)

n3_ones= (y3==1).sum()
y3_zeros= np.random.choice(np.where(y3==0)[0],size= n3_ones,replace= False)

y3_balanced_idx= np.sort(np.concatenate([y3_zeros,y3_ones]))

X3_balanced= X.iloc[y3_balanced_idx,:]
y3_balanced= y3.iloc[y3_balanced_idx]

In [None]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X1_balanced, y1_balanced, test_size = 0.2, random_state = 42)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2_balanced, y2_balanced, test_size = 0.2, random_state = 42)
X3_train, X3_test, y3_train, y3_test = train_test_split(X3_balanced, y3_balanced, test_size = 0.2, random_state = 42)

In [None]:
# to minimize outliers: 
scaler1 = MinMaxScaler()
X1_train = scaler1.fit_transform(X1_train)
X1_test = scaler1.transform(X1_test)

scaler2 = MinMaxScaler()
X2_train = scaler2.fit_transform(X2_train)
X2_test = scaler2.transform(X2_test)

scaler3 = MinMaxScaler()
X3_train = scaler3.fit_transform(X3_train)
X3_test = scaler3.transform(X3_test)

In [None]:
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score
import os
from keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout

from keras.layers import Dense, SimpleRNN
from keras import regularizers
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
# Assuming X2_train and X2_test have shapes (3624977, 4) and (906245, 4) respectively

X2_train_reshaped = X2_train.reshape((X2_train.shape[0], X2_train.shape[1], 1))
X2_test_reshaped = X2_test.reshape((X2_test.shape[0], X2_test.shape[1], 1))


# Define the BNN model

model_bnn = Sequential()
model_bnn.add(Bidirectional(LSTM(64, activation='relu'), input_shape=(X2_train_reshaped.shape[1], X2_train_reshaped.shape[2])))
model_bnn.add(Dropout(0.5))
model_bnn.add(Dense(32, activation='relu'))
model_bnn.add(Dense(1, activation='sigmoid'))  # Adjust this depending on your problem

# Compile the model
model_bnn.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])  # Adjust loss function as needed

# Train the model
model_bnn.fit(X2_train_reshaped, y2_train, epochs=10, batch_size=128, validation_split=0.2, callbacks=[EarlyStopping(patience=3)])

print(X2_train_reshaped)