In [2]:
import pandas as pd
df = pd.read_csv('parkinsons_updrs.csv')

df.head()
X = df.drop(columns = ['subject#', 'motor_UPDRS', 'total_UPDRS'])
y = df['motor_UPDRS']

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 1. Import the data
# Assuming you've imported your dataset into X and y

# 2. Preprocessing
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Feature Engineering (if needed)

# 5. PCA
pca = PCA(n_components=0.95)  # Keep 95% of the variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# 6. Model Selection
model = LinearRegression()
model.fit(X_train_pca, y_train)

# 7. Model Evaluation
train_mse = mean_squared_error(y_train, model.predict(X_train_pca))
test_mse = mean_squared_error(y_test, model.predict(X_test_pca))

print("Train MSE:", train_mse)
print("Test MSE:", test_mse)


Train MSE: 57.65922330300517
Test MSE: 57.62782199056316


In [5]:
from sklearn.tree import DecisionTreeRegressor

# Create and train the decision tree model
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train_pca, y_train)

# Evaluate the model
dt_train_mse = mean_squared_error(y_train, dt_model.predict(X_train_pca))
dt_test_mse = mean_squared_error(y_test, dt_model.predict(X_test_pca))

print("Decision Tree Train MSE:", dt_train_mse)
print("Decision Tree Test MSE:", dt_test_mse)

Decision Tree Train MSE: 1.0758720005248268e-31
Decision Tree Test MSE: 55.9844632303234


In [6]:
from sklearn.ensemble import RandomForestRegressor

# Create and train the random forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_pca, y_train)

# Evaluate the model
rf_train_mse = mean_squared_error(y_train, rf_model.predict(X_train_pca))
rf_test_mse = mean_squared_error(y_test, rf_model.predict(X_test_pca))

print("Random Forest Train MSE:", rf_train_mse)
print("Random Forest Test MSE:", rf_test_mse)


Random Forest Train MSE: 3.5002903724339496
Random Forest Test MSE: 24.841065495715316


In [26]:
from sklearn.ensemble import GradientBoostingRegressor

# Create and train the gradient boosting model
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train_pca, y_train)

# Evaluate the model
gb_train_mse = mean_squared_error(y_train, gb_model.predict(X_train_pca))
gb_test_mse = mean_squared_error(y_test, gb_model.predict(X_test_pca))

print("Gradient Boosting Train MSE:", gb_train_mse)
print("Gradient Boosting Test MSE:", gb_test_mse)


Gradient Boosting Train MSE: 43.55462763963456
Gradient Boosting Test MSE: 48.721595835228


In [17]:
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline

# Define the steps for the pipeline including scaling, PCA, and SVM
svm_pipeline = Pipeline([
    ('scaling', StandardScaler()),  # Step 1: Scaling
    ('pca', PCA(n_components = 0.95)),  # Step 2: PCA
    ('svm', SVR(kernel='poly'))  # Step 3: SVM with radial basis function (RBF) kernel
])

# Train the SVM model
svm_pipeline.fit(X_train, y_train)

# Evaluate the model
svm_train_mse = mean_squared_error(y_train, svm_pipeline.predict(X_train))
svm_test_mse = mean_squared_error(y_test, svm_pipeline.predict(X_test))

print("SVM Train MSE:", svm_train_mse)
print("SVM Test MSE:", svm_test_mse)


SVM Train MSE: 61.169282419383784
SVM Test MSE: 60.90017403701152


In [7]:
import pandas as pd
df = pd.read_csv('parkinsons_updrs.csv')

df.shape

(5875, 22)

In [9]:
X = df.drop(columns = ['motor_UPDRS', 'total_UPDRS','subject#'])
y_motor = df['motor_UPDRS']
y_total = df['total_UPDRS']

In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train, y_test = train_test_split(X, y_motor, test_size = 0.2, random_state = 42)

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
from sklearn.tree import DecisionTreeRegressor


dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train_scaled, y_train)


dt_train_mse = mean_squared_error(y_train, dt_model.predict(X_train))
dt_test_mse = mean_squared_error(y_test, dt_model.predict(X_test))

print("Decision Tree Train MSE:", dt_train_mse)
print("Decision Tree Test MSE:", dt_test_mse)

Decision Tree Train MSE: 104.49565952852977
Decision Tree Test MSE: 99.53411412204254


