In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import numpy as np

# Load the dataset
df = pd.read_csv('/work/Data/full_df_all.csv')

# Create a binary target variable: 1 if tremor is present, 0 otherwise
df['binary'] = (df['trem_score'] > 0).astype(int)

# Separate features and target variable for binary classification
X_binary = df.drop(['trem_score', 'task_code', 'sens_pos', 'binary'], axis=1)
y_binary = df['binary']

# Split the data into training and testing sets for binary classification
X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    X_binary, y_binary, test_size=0.3, random_state=30
)

# Scale features for binary classification
scaler_binary = StandardScaler()
X_train_scaled_binary = scaler_binary.fit_transform(X_train_binary)
X_test_scaled_binary = scaler_binary.transform(X_test_binary)

# Define the parameter grid for GridSearchCV for binary classification
# Hyperparamteres can be added and/or add/change hyperparamter values
param_grid_binary = {
    'max_depth': [15], 
    'min_samples_split': [2],
    'min_samples_leaf': [10],  
    'max_features': [None], 
    'random_state': [30] 
}

# Create a Decision Tree model for binary classification
dt_classifier_binary = DecisionTreeClassifier()

# Create GridSearchCV with 5-fold cross-validation for binary classification
grid_search_binary = GridSearchCV(
    dt_classifier_binary, param_grid_binary, cv=5, n_jobs=-1, verbose=1, scoring='accuracy'
)

smote_random_state = 4

# Apply SMOTE to balance the classes for binary classification
smote_binary = SMOTE(random_state=smote_random_state)
X_resampled_binary, y_resampled_binary = smote_binary.fit_resample(
    X_train_scaled_binary, y_train_binary
)

# Fit the grid search to the resampled data for binary classification
grid_search_binary.fit(X_resampled_binary, y_resampled_binary)

# Print corresponding mean cross-validation accuracy for binary classification
print("Corresponding Mean CV accuracy:", grid_search_binary.best_score_)

# Create a Decision Tree model for binary classification with the optimal hyperparameters
optimal_model_binary = grid_search_binary.best_estimator_

# Make predictions on the test set for binary classification
y_pred_binary = optimal_model_binary.predict(X_test_scaled_binary)

# Evaluate the binary model
accuracy_binary = accuracy_score(y_test_binary, y_pred_binary)
print(f'Binary Test set accuracy: {accuracy_binary:.2f}')

# Display classification report for binary classification
print("Binary Classification Report:")
print(classification_report(y_test_binary, y_pred_binary))


# Filter the instances where the binary classification is accurate (y_pred_binary == 1)
accurate_ones_indices_binary = np.where(y_pred_binary == 1)[0]
X_accurate_ones_binary = X_test_scaled_binary[accurate_ones_indices_binary]
y_accurate_ones_binary = y_test_binary.iloc[accurate_ones_indices_binary]

# Separate features and target variable for tremor score classification using accurate ones
X_trem_score = df.drop(['trem_score', 'task_code', 'sens_pos', 'binary'], axis=1)
y_trem_score = df['trem_score']

# Filter instances with trem_score > 0
df_filtered_trem_score = df[df['trem_score'] > 0]

# Separate features and target variable for tremor score classification
X_trem_score = df_filtered_trem_score.drop(['trem_score', 'task_code', 'sens_pos', 'binary'], axis=1)
y_trem_score = df_filtered_trem_score['trem_score']

# Split the data into training and testing sets for tremor score classification
X_train_trem_score, X_test_trem_score, y_train_trem_score, y_test_trem_score = train_test_split(
    X_trem_score, y_trem_score, test_size=0.3, random_state=30
)

# Scale features for tremor score classification
scaler_trem_score = StandardScaler()
X_train_scaled_trem_score = scaler_trem_score.fit_transform(X_train_trem_score)
X_test_scaled_trem_score = scaler_trem_score.transform(X_test_trem_score)

# Define the parameter grid for GridSearchCV for tremor score classification
# Hyperparamteres can be added and/or add/change hyperparamter values
param_grid_trem_score = {
    'max_depth': [20], 
    'min_samples_split': [2],
    'min_samples_leaf': [1],  
    'max_features': [None],  
    'random_state': [30]  
}

# Create a Decision Tree model for tremor score classification
dt_classifier_trem_score = DecisionTreeClassifier()

# Create GridSearchCV with 5-fold cross-validation for tremor score classification
grid_search_trem_score = GridSearchCV(
    dt_classifier_trem_score, param_grid_trem_score, cv=6, n_jobs=-1, verbose=1, scoring='accuracy'
)

# Apply SMOTE to balance the classes for tremor score classification
smote_trem_score = SMOTE(random_state=smote_random_state)
X_resampled_trem_score, y_resampled_trem_score = smote_trem_score.fit_resample(
    X_train_scaled_trem_score, y_train_trem_score
)

# Fit the grid search to the resampled data for tremor score classification
grid_search_trem_score.fit(X_resampled_trem_score, y_resampled_trem_score)

# Print corresponding mean cross-validation accuracy for tremor score classification
print("Corresponding Mean CV accuracy:", grid_search_trem_score.best_score_)

# Create a Decision Tree model for tremor score classification with the optimal hyperparameters
optimal_model_trem_score = grid_search_trem_score.best_estimator_

# Make predictions on the test set for tremor score classification
y_pred_trem_score = optimal_model_trem_score.predict(X_test_scaled_trem_score)

# Evaluate the tremor score model
accuracy_trem_score = accuracy_score(y_test_trem_score, y_pred_trem_score)
print(f'Tremor Score Test set accuracy: {accuracy_trem_score:.2f}')

# Display classification report for tremor score classification
print("Tremor Score Classification Report:")
print(classification_report(y_test_trem_score, y_pred_trem_score))


Fitting 5 folds for each of 1 candidates, totalling 5 fits
Corresponding Mean CV accuracy: 0.7886740331491713
Binary Test set accuracy: 0.74
Binary Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.79      0.78       730
           1       0.69      0.68      0.69       515

    accuracy                           0.74      1245
   macro avg       0.74      0.74      0.74      1245
weighted avg       0.74      0.74      0.74      1245

Fitting 6 folds for each of 1 candidates, totalling 6 fits
Corresponding Mean CV accuracy: 0.8630514705882354
Tremor Score Test set accuracy: 0.73
Tremor Score Classification Report:
              precision    recall  f1-score   support

           1       0.86      0.83      0.84       356
           2       0.49      0.49      0.49       112
           3       0.21      0.23      0.22        13
           4       0.00      0.00      0.00         2

    accuracy                           0.73    

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import numpy as np

# Load the dataset
df = pd.read_csv('/work/Data/full_df_all.csv')

# Create a binary target variable: 1 if tremor is present, 0 otherwise
df['binary'] = (df['trem_score'] > 0).astype(int)

# Separate features and target variable for binary classification
X_binary = df.drop(['trem_score', 'task_code', 'sens_pos', 'binary'], axis=1)
y_binary = df['binary']

# Split the data into training and testing sets for binary classification
X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    X_binary, y_binary, test_size=0.3, random_state=30
)

# Scale features for binary classification
scaler_binary = StandardScaler()
X_train_scaled_binary = scaler_binary.fit_transform(X_train_binary)
X_test_scaled_binary = scaler_binary.transform(X_test_binary)

# Define the parameter grid for GridSearchCV for binary classification
# Hyperparamteres can be added and/or add/change hyperparamter values
param_grid_binary = {
    'n_estimators': [100],
    'max_depth': [20],
    'min_samples_split': [2],
    'min_samples_leaf': [5],
    'max_features': [None],
    'random_state': [30]
}

# Create a Random Forest model for binary classification
rf_classifier_binary = RandomForestClassifier()

# Create GridSearchCV with 5-fold cross-validation for binary classification
grid_search_binary = GridSearchCV(
    rf_classifier_binary, param_grid_binary, cv=5, n_jobs=-1, verbose=1, scoring='accuracy'
)

# Apply SMOTE to balance the classes for binary classification
smote_binary = SMOTE(random_state=4)
X_resampled_binary, y_resampled_binary = smote_binary.fit_resample(
    X_train_scaled_binary, y_train_binary
)

# Fit the grid search to the resampled data for binary classification
grid_search_binary.fit(X_resampled_binary, y_resampled_binary)

# Print corresponding mean cross-validation accuracy for binary classification
print("Corresponding Mean CV accuracy:", grid_search_binary.best_score_)
# Print optimal hyperparameters for binary classification
print("Optimal Hyperparameters for Binary Classification:", grid_search_binary.best_params_)

# Create a Random Forest model for binary classification with the optimal hyperparameters
optimal_model_binary = grid_search_binary.best_estimator_

# Make predictions on the test set for binary classification
y_pred_binary = optimal_model_binary.predict(X_test_scaled_binary)

# Evaluate the binary model
accuracy_binary = accuracy_score(y_test_binary, y_pred_binary)
print(f'Binary Test set accuracy: {accuracy_binary:.2f}')

# Display classification report for binary classification
print("Binary Classification Report:")
print(classification_report(y_test_binary, y_pred_binary))

# Filter the instances where the binary classification is accurate (y_pred_binary == 1)
accurate_ones_indices_binary = np.where(y_pred_binary == 1)[0]
X_accurate_ones_binary = X_test_scaled_binary[accurate_ones_indices_binary]
y_accurate_ones_binary = y_test_binary.iloc[accurate_ones_indices_binary]


# Separate features and target variable for tremor score classification using accurate ones
X_trem_score = df[df['trem_score'] > 0].drop(['trem_score', 'task_code', 'sens_pos', 'binary'], axis=1)
y_trem_score = df[df['trem_score'] > 0]['trem_score']

# Split the data into training and testing sets for tremor score classification using accurate ones
X_train_trem_score, X_test_trem_score, y_train_trem_score, y_test_trem_score = train_test_split(
    X_trem_score, y_trem_score, test_size=0.3, random_state=30
)

# Scale features for tremor score classification
scaler_trem_score = StandardScaler()
X_train_scaled_trem_score = scaler_trem_score.fit_transform(X_train_trem_score)
X_test_scaled_trem_score = scaler_trem_score.transform(X_test_trem_score)

# Define the parameter grid for GridSearchCV for tremor score classification
# Hyperparamteres can be added and/or add/change hyperparamter values
param_grid_trem_score = {
    'n_estimators': [50],
    'max_depth': [10],
    'min_samples_split': [2],
    'min_samples_leaf': [1],
    'max_features': [None],
    'random_state': [30]
}

# Create a Random Forest model for tremor score classification
rf_classifier_trem_score = RandomForestClassifier()

# Create GridSearchCV with 5-fold cross-validation for tremor score classification
grid_search_trem_score = GridSearchCV(
    rf_classifier_trem_score, param_grid_trem_score, cv=6, n_jobs=-1, verbose=1, scoring='accuracy'
)

# Apply SMOTE to balance the classes for tremor score classification
smote_trem_score = SMOTE(random_state=4)
X_resampled_trem_score, y_resampled_trem_score = smote_trem_score.fit_resample(
    X_train_scaled_trem_score, y_train_trem_score
)

# Fit the grid search to the resampled data for tremor score classification
grid_search_trem_score.fit(X_resampled_trem_score, y_resampled_trem_score)

# Print corresponding mean cross-validation accuracy for tremor score classification
print("Corresponding Mean CV accuracy:", grid_search_trem_score.best_score_)
# Print optimal hyperparameters for tremor score classification
print("Optimal Hyperparameters for Tremor Score Classification:", grid_search_trem_score.best_params_)

# Create a Random Forest model for tremor score classification with the optimal hyperparameters
optimal_model_trem_score = grid_search_trem_score.best_estimator_

# Make predictions on the test set for tremor score classification
y_pred_trem_score = optimal_model_trem_score.predict(X_test_scaled_trem_score)

# Evaluate the tremor score model
accuracy_trem_score = accuracy_score(y_test_trem_score, y_pred_trem_score)
print(f'Tremor Score Test set accuracy: {accuracy_trem_score:.2f}')

# Display classification report for tremor score classification
print("Tremor Score Classification Report:")
print(classification_report(y_test_trem_score, y_pred_trem_score))

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Corresponding Mean CV accuracy: 0.7947513812154696
Optimal Hyperparameters for Binary Classification: {'max_depth': 20, 'max_features': None, 'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 100, 'random_state': 30}
Binary Test set accuracy: 0.77
Binary Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.77      0.80       730
           1       0.70      0.77      0.74       515

    accuracy                           0.77      1245
   macro avg       0.77      0.77      0.77      1245
weighted avg       0.78      0.77      0.77      1245

Fitting 6 folds for each of 1 candidates, totalling 6 fits
Corresponding Mean CV accuracy: 0.8802083333333334
Optimal Hyperparameters for Tremor Score Classification: {'max_depth': 10, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50, 'random_state': 30}
Tremor Score Test set accurac

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=95af3c37-43e2-4ca4-b289-de5e336bd5a8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>