In [30]:
import pandas as pd
df = pd.read_csv('parkinsons_updrs.csv')

df.head()
X = df.drop(columns = ['subject#', 'motor_UPDRS', 'total_UPDRS'])
y = df['motor_UPDRS']

In [31]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [28]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# AdaBoost 

In [50]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error


adaboost_model = AdaBoostRegressor(base_estimator = DecisionTreeRegressor(max_depth = 10), n_estimators = 100, random_state = 42)


adaboost_model.fit(X_train_scaled, y_train)

y_train_pred = adaboost_model.predict(X_train_scaled)
y_test_pred =  adaboost_model.predict(X_test_scaled)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

R2_score_train = r2_score(y_train,  y_train_pred)
R2_score_test = r2_score(y_test, y_test_pred)

train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_train, y_train_pred)

print("Train MSE adaboost:", train_mse)
print("Test MSE adaboost:",  test_mse)

print("R2 Score Train adaboost:", R2_score_train)
print("R2 Score Test adaboost:", R2_score_test)

print("Train MAE adaboost:", train_mae)
print("Test MAE adaboost:",  test_mae)




Train MSE: 0.15577547636826256
Test MSE: 0.7538382417419265
R2 Score Train: 0.9976620304077846
R2 Score Test: 0.9881897636798131
Train MAE: 0.2403665238584856
Test MAE: 0.2403665238584856


# GradientBoost

In [51]:
from sklearn.ensemble import GradientBoostingRegressor

gradient_boost_model = GradientBoostingRegressor(n_estimators = 100, learning_rate = 0.1, max_depth = 7, random_state = 42)

gradient_boost_model.fit(X_train_scaled, y_train)


y_train_pred = gradient_boost_model.predict(X_train_scaled)
y_test_pred = gradient_boost_model.predict(X_test_scaled)


train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

R2_score_train = r2_score(y_train,  y_train_pred)
R2_score_test = r2_score(y_test, y_test_pred)

train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_train, y_train_pred)

print("Train MSE GradientBoost:", train_mse)
print("Test MSE GradientBoost:",  test_mse)

print("R2 Score Train GradientBoost:", R2_score_train)
print("R2 Score Test GradientBoost:", R2_score_test)

print("Train MAE GradientBoost:", train_mae)
print("Test MAE GradientBoost:",  test_mae)

Train MSE: 0.3678907403736349
Test MSE: 1.8467804622491724
R2 Score Train: 0.9944784802826228
R2 Score Test: 0.971066851636145
Train MAE: 0.4184999384429958
Test MAE: 0.4184999384429958


# Decision Tree

In [52]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error


param_grid = {
    'max_depth': [3, 5, 7, 10, 15],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4, 6, 8]
}

dt_regressor = DecisionTreeRegressor(random_state=42)
grid_search = GridSearchCV(estimator = dt_regressor, param_grid = param_grid, cv = 5, scoring = 'neg_mean_squared_error')


grid_search.fit(X_train_scaled, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_


y_train_pred = best_estimator.predict(X_train_scaled)
y_test_pred = best_estimator.predict(X_test_scaled)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

R2_score_train = r2_score(y_train,  y_train_pred)
R2_score_test = r2_score(y_test, y_test_pred)

train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_train, y_train_pred)

print("Train MSE DecisionTree:", train_mse)
print("Test MSE DecisionTree:",  test_mse)

print("R2 Score Train DecisionTree:", R2_score_train)
print("R2 Score Test DecisionTree:", R2_score_test)

print("Train MAE DecisionTree:", train_mae)
print("Test MAE DecisionTree:",  test_mae)

print("Best Parameters:", best_params)

Train MSE: 0.9887422520610514
Test MSE: 5.306450593560369
R2 Score Train: 0.9851603771418262
R2 Score Test: 0.9168648762279177
Train MAE: 0.3996614746801549
Test MAE: 0.3996614746801549
Best Parameters: {'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 10}


# SVM

In [53]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error


param_grid = {
    'kernel': ['rbf'],
    'gamma': [ 0.3, 0.4],  
    'C': [5.0, 10.0,25]       
}


svm_regressor = SVR()

grid_search = GridSearchCV(estimator = svm_regressor, param_grid=param_grid, cv = 5, scoring='neg_mean_squared_error')


grid_search.fit(X_train, y_train)


best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

print("Best Parameters:", best_params)


y_train_pred = best_estimator.predict(X_train)
y_test_pred = best_estimator.predict(X_test)


train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

R2_score_train = r2_score(y_train,  y_train_pred)
R2_score_test = r2_score(y_test, y_test_pred)

train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_train, y_train_pred)

print("Train MSE SVM:", train_mse)
print("Test MSE SVM:",  test_mse)

print("R2 Score Train SVM:", R2_score_train)
print("R2 Score Test SVM:", R2_score_test)

print("Train MAE SVM:", train_mae)
print("Test MAE SVM:",  test_mae)

Best Parameters: {'C': 25, 'gamma': 0.3, 'kernel': 'rbf'}
Train MSE: 0.7984704300104782
Test MSE: 13.288623266939107
R2 Score Train: 0.9880160880956995
R2 Score Test: 0.7918097378691843
Train MAE: 0.22024089728979943
Test MAE: 0.22024089728979943


# Linear Regression

In [55]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X_train_scaled, y_train)

y_test_pred = model.predict(X_test_scaled)
y_train_pred = model.predict(X_train_scaled)


train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

R2_score_train = r2_score(y_train,  y_train_pred)
R2_score_test = r2_score(y_test, y_test_pred)

train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_train, y_train_pred)

print("Train MSE LinearRegression:", train_mse)
print("Test MSE LinearRegression:",  test_mse)

print("R2 Score Train LinearRegression:", R2_score_train)
print("R2 Score Test LinearRegression:", R2_score_test)

print("Train MAE LinearRegression:", train_mae)
print("Test MAE LinearRegression:",  test_mae)

Train MSE: 55.732264308118886
Test MSE: 56.01419722157733
R2 Score Train: 0.1635375330218255
R2 Score Test: 0.12243652571441144
Train MAE: 6.306060271125649
Test MAE: 6.306060271125649


# With - LDA

In [56]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error



pca = PCA(n_components=0.95) 
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)


model = LinearRegression()
model.fit(X_train_pca, y_train)


predictions = model.predict(X_test_pca)


mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error (MSE) with PCA:", mse)


Mean Squared Error (MSE) with PCA: 57.62782199056316
