In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold

# Step 1: Load and Preprocess Data
parkinsons_data = pd.read_csv('parkinsons.csv')
X = parkinsons_data.drop(columns=['status','name'])  # Features
y = parkinsons_data['status']  # Target variable

# Step 2: Feature Selection using SelectKBest
selector = SelectKBest(score_func=f_classif, k=9)
X_selected = selector.fit_transform(X, y)

selected_indices = selector.get_support(indices=True)
# Get the names of the selected features
selected_feature_names = X.columns[selected_indices]

print("Top six features:")
for feature in selected_feature_names:
    print(feature)

# Step 3: Split Data
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Step 4: Data Standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Model Selection and Hyperparameter Tuning using GridSearchCV
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['linear', 'rbf']}

svm_classifier = SVC()
grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_

# Step 6: Evaluate Model
best_svm_classifier = SVC(**best_params)
best_svm_classifier.fit(X_train, y_train)

y_train_pred = best_svm_classifier.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_accuracy)

y_test_pred = best_svm_classifier.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Testing Accuracy:", test_accuracy)

# Calculate Sensitivity (Recall)
train_sensitivity = recall_score(y_train, y_train_pred)
print("Training Sensitivity:", train_sensitivity)
test_sensitivity = recall_score(y_test, y_test_pred)
print("Testing Sensitivity:", test_sensitivity)

# Calculate Precision
train_precision = precision_score(y_train, y_train_pred)
print("Training Precision:", train_precision)
test_precision = precision_score(y_test, y_test_pred)
print("Testing Precision:", test_precision)

# Calculate F1-score
train_f1_score = f1_score(y_train, y_train_pred)
print("Training F1-score:", train_f1_score)
test_f1_score = f1_score(y_test, y_test_pred)
print("Testing F1-score:", test_f1_score)

# Cross-Validation
k_fold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(best_svm_classifier, X_selected, y, cv=k_fold, scoring='accuracy')
print("Cross-Validation Mean Accuracy:", np.mean(cv_scores))

import pickle

filename = 'parkinsons_model.sav'
pickle.dump(best_svm_classifier,open(filename, 'wb'))

loaded_model = pickle.load(open('parkinsons_model.sav','rb'))

ModuleNotFoundError: No module named 'pandas'