In [None]:
import pandas as pd

# Define file paths

file_path_basic = r"C:\Users\shail\OneDrive\Desktop\dverse project\1\parkinsons.data"

# Try reading the files
try:
    
    df_basic = pd.read_csv(file_path_basic)


    print("\nBasic Parkinson’s Dataset Head:")
    print(df_basic.head())

except Exception as e:
    print("Error loading files:", e)



Basic Parkinson’s Dataset Head:
             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1   197.810593    199.547511    196.148841        0.000448   
1  phon_R01_S01_1   119.992000    157.302000     74.997000        0.007840   
2  phon_R01_S01_2   122.400000    148.650000    113.819000        0.009680   
3  phon_R01_S01_3   116.682000    131.111000    111.555000        0.010500   
4  phon_R01_S01_4   116.676000    137.871000    111.366000        0.009970   

   MDVP:Jitter(Abs)    MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0          0.028524 -472.782304  90.87889   84.092766     76.005376  ...   
1          0.000070    0.003700   0.00554    0.011090      0.043740  ...   
2          0.000080    0.004650   0.00696    0.013940      0.061340  ...   
3          0.000090    0.005440   0.00781    0.016330      0.052330  ...   
4          0.000090    0.005020   0.00698    0.015050      0.054920  ...   

   Shimmer:DDA       NHR        HNR  stat

In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_path = r"C:\Users\shail\OneDrive\Desktop\dverse project\1\parkinsons.data"
df = pd.read_csv(file_path)

# Drop the 'name' column (not needed for training)
df.drop(columns=['name'], inplace=True)

# Separate features and target
X = df.drop(columns=['status']).values  # Features
y = df['status'].values  # Labels (0 = Healthy, 1 = Parkinson's)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape data for CNN (convert 1D to 2D spectrogram-like shape)
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42, stratify=y)

# Define CNN model
def build_cnn_model(input_shape):
    model = keras.Sequential([
        layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(128, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(256, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu', name="feature_layer"), # Feature extraction layer
        layers.Dense(1, activation='sigmoid')  # Binary classification layer
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and train CNN
cnn_model = build_cnn_model(X_train.shape[1:])
cnn_model.fit(X_train, y_train, epochs=30, batch_size=8, validation_data=(X_test, y_test), verbose=1)

# Extract deep features from CNN (before classification layer)
feature_extractor = keras.Model(inputs=cnn_model.input, outputs=cnn_model.get_layer("feature_layer").output)
X_train_features = feature_extractor.predict(X_train)
X_test_features = feature_extractor.predict(X_test)


# Train XGBoost classifier on extracted features
xgb_model = XGBClassifier(eval_metric='logloss')
xgb_model.fit(X_train_features, y_train)

# Evaluate model
y_pred = xgb_model.predict(X_test_features)
accuracy = accuracy_score(y_test, y_pred)

print(f"XGBoost Classifier Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
XGBoost Classifier Accuracy: 0.9250
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.70      0.82        10
           1       0.91      1.00      0.95        30

    accuracy                           0.93        40
   macro avg       0.95      0.85      0.89        40
weighted avg       0.93      0.93      0.92        40



In [11]:
import joblib

# Assuming scaler, feature_extractor, and xgb_model are trained
joblib.dump(scaler, "scaler.pkl")
joblib.dump(feature_extractor, "feature_extractor.pkl")
joblib.dump(xgb_model, "xgb_model.pkl")


['xgb_model.pkl']

In [12]:
scaler = joblib.load("scaler.pkl")
feature_extractor = joblib.load("feature_extractor.pkl")
xgb_model = joblib.load("xgb_model.pkl")


In [7]:
# Function to make a new prediction
def predict_parkinsons(new_data):
    # Convert input to numpy array and reshape
    new_data = np.array(new_data).reshape(1, -1)
    
    # Normalize using the same scaler
    new_data_scaled = scaler.transform(new_data)
    
    # Reshape to match CNN input shape
    new_data_reshaped = new_data_scaled.reshape(1, new_data_scaled.shape[1], 1)
    
    # Extract deep features using CNN
    new_features = feature_extractor.predict(new_data_reshaped)
    
    # Predict with XGBoost
    prediction = xgb_model.predict(new_features)
    
    # Output result
    return "Parkinson’s Detected" if prediction[0] == 1 else "Healthy"

# Example input (replace with actual values from new patient)
sample_input = [241.40400,248.83400,232.48300,0.00281,0.00001,0.00157,0.00173,0.00470,0.01760,0.15400,0.01006,0.01038,0.01251,0.03017,0.00675,23.14500,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
]
# Add one more value!
 # Example from dataset

# Make prediction
result = predict_parkinsons(sample_input)
print("Prediction:", result)


Prediction: Healthy


In [8]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
# Train SVM
svm = SVC()
svm.fit(X_train_features, y_train)
y_pred_svm = svm.predict(X_test_features)
svm_acc = accuracy_score(y_test, y_pred_svm)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train_features, y_train)
y_pred_rf = rf.predict(X_test_features)
rf_acc = accuracy_score(y_test, y_pred_rf)

# Train XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train_features, y_train)
y_pred_xgb = xgb.predict(X_test_features)
xgb_acc = accuracy_score(y_test, y_pred_xgb)

# Print accuracy scores
print(f"SVM Accuracy: {svm_acc:.2f}")
print(f"Random Forest Accuracy: {rf_acc:.2f}")
print(f"XGBoost Accuracy: {xgb_acc:.2f}")


SVM Accuracy: 0.90
Random Forest Accuracy: 0.93
XGBoost Accuracy: 0.93


Parameters: { "use_label_encoder" } are not used.



In [10]:
import numpy as np
import librosa
from scipy.signal import butter, filtfilt
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures


def extract_audio_features(file_path):
    """Extract 22 audio features and align them with Parkinson's dataset format."""
    try:
        y, sr = librosa.load(file_path, sr=44100)  # Ensure correct sampling rate

        # Apply a strong low-pass filter to remove unwanted high frequencies
        def apply_lowpass_filter(y, sr, cutoff=500):
            nyquist = 0.5 * sr
            normal_cutoff = cutoff / nyquist
            b, a = butter(6, normal_cutoff, btype='low', analog=False)
            return filtfilt(b, a, y)

        y = apply_lowpass_filter(y, sr, cutoff=500)  # Ensure no frequencies above 500Hz

        features = []

        # Extract MDVP:Fo (Fundamental Frequency)
        features.append(np.mean(librosa.yin(y, fmin=190, fmax=210, sr=sr)))  

        # Extract MDVP:Fhi & MDVP:Flo (Using pitch estimation instead of spectral methods)
        features.append(np.max(librosa.yin(y, fmin=200, fmax=220, sr=sr)))  # Max pitch
        features.append(np.min(librosa.yin(y, fmin=180, fmax=200, sr=sr)))  # Min pitch

        # Extract jitter & shimmer values
        features.append(np.std(librosa.feature.zero_crossing_rate(y=y)))  # MDVP:Jitter(%)
        features.append(np.std(librosa.feature.rms(y=y)))  # MDVP:Jitter(Abs)


        # 13 MFCCs mapped to jitter, shimmer, RPDE, DFA, and spread values
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_means = np.mean(mfccs, axis=1)
        features.extend(mfcc_means.tolist())

        # Extract additional features using PyAudioAnalysis
        [fs, x] = audioBasicIO.read_audio_file(file_path)
        x = audioBasicIO.stereo_to_mono(x)
        short_features, _ = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs, 0.025 * fs)

        # Energy Entropy → RPDE
        energy_entropy = np.mean(short_features[1]) if len(short_features) > 1 else 0
        features.append(energy_entropy)

        # Fundamental Frequency (Pitch) → D2
        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
        pitch_mean = np.mean(pitches[pitches > 0]) if pitches.any() else 0
        features.append(pitch_mean)

        # Harmonic-to-Noise Ratio (HNR) → HNR
        harmonic, percussive = librosa.effects.hpss(y)
        hnr = np.sum(np.abs(harmonic)) / (np.sum(np.abs(percussive)) + 1e-6)
        features.append(hnr)

        # Tempo → PPE
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        features.append(float(tempo))

        # Ensure correct feature count
        if len(features) != 22:
            print(f"Feature extraction issue! Expected 22, got {len(features)}")
            return None
        
        print("extracted Features",features)

        return np.array(features)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Function to make a new prediction
def predict_parkinsons(new_data):
    new_data = np.array(new_data).reshape(1, -1)
    new_data_scaled = scaler.transform(new_data)
    new_data_reshaped = new_data_scaled.reshape(1, new_data_scaled.shape[1], 1)

    # Extract deep features using CNN
    new_features = feature_extractor.predict(new_data_reshaped)

    # Predict with XGBoost
    prediction = xgb_model.predict(new_features)

    return "Parkinson’s Detected" if prediction[0] == 1 else "Healthy"

# Example Usage
file_path = r"C:\Users\shail\Downloads\final_fine_tuned_audio.wav"
extracted_features = extract_audio_features(file_path)

if extracted_features is not None:
    result = predict_parkinsons(extracted_features)
    print("Prediction:", result)
else:
    print("Feature extraction failed.")


extracted Features [197.8105931050871, 199.54751131221718, 196.14884107493458, 0.0004481281172223465, 0.028523533, -472.7823043973156, 90.8788900213032, 84.09276620887884, 76.00537613235005, 65.34663081368411, 53.21114203479015, 40.282371978794444, 27.477788229635625, 15.518249630446077, 5.048162963071039, -3.52019325415012, -9.955683710503264, -14.253115923648428, 0.07560193101427107, 206.11452087079246, 27.933184302255373, 0.0]
Prediction: Healthy


In [1]:
import numpy as np
import librosa
from scipy.signal import butter, filtfilt

def extract_audio_features(file_path):
    """Extract 22 audio features and align them with Parkinson's dataset format."""
    try:
        y, sr = librosa.load(file_path, sr=None)
        features = []

        # Extracting features based on Parkinson’s dataset order
        features.append(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))  # MDVP:Fo(Hz)
        features.append(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)))  # MDVP:Fhi(Hz)
        features.append(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)))  # MDVP:Flo(Hz)
        features.append(np.mean(librosa.feature.zero_crossing_rate(y=y)))  # MDVP:Jitter(%)
        features.append(np.mean(librosa.feature.rms(y=y)))  # MDVP:Jitter(Abs)

        # 13 MFCCs mapped to jitter, shimmer, RPDE, DFA, and spread values
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_means = np.mean(mfccs, axis=1)
        features.extend(mfcc_means.tolist())

        # Extract additional features using PyAudioAnalysis
        [fs, x] = audioBasicIO.read_audio_file(file_path)
        x = audioBasicIO.stereo_to_mono(x)
        short_features, _ = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs, 0.025 * fs)

        # Energy Entropy → RPDE
        energy_entropy = np.mean(short_features[1]) if len(short_features) > 1 else 0
        features.append(energy_entropy)

        # Fundamental Frequency (Pitch) → D2
        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
        pitch_mean = np.mean(pitches[pitches > 0]) if pitches.any() else 0
        features.append(pitch_mean)

        # Harmonic-to-Noise Ratio (HNR) → HNR
        harmonic, percussive = librosa.effects.hpss(y)
        hnr = np.sum(np.abs(harmonic)) / (np.sum(np.abs(percussive)) + 1e-6)
        features.append(hnr)

        # Tempo → PPE
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        features.append(float(tempo))

        # Ensure correct feature count
        if len(features) != 22:
            print(f"Feature extraction issue! Expected 22, got {len(features)}")
            return None
        
        print("extracted Features",features)

        return np.array(features)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None
# Function to make a new prediction
def predict_parkinsons(new_data):
    new_data = np.array(new_data).reshape(1, -1)
    new_data_scaled = scaler.transform(new_data)
    new_data_reshaped = new_data_scaled.reshape(1, new_data_scaled.shape[1], 1)

    # Extract deep features using CNN
    new_features = feature_extractor.predict(new_data_reshaped)

    # Predict with XGBoost
    prediction = xgb_model.predict(new_features)

    return "Parkinson’s Detected" if prediction[0] == 1 else "Healthy"

# Example Usage
file_path = r"C:\Users\shail\Downloads\final_fine_tuned_audio.wav"
extracted_features = extract_audio_features(file_path)

if extracted_features is not None:
    result = predict_parkinsons(extracted_features)
    print("Prediction:", result)
else:
    print("Feature extraction failed.")


Error processing C:\Users\shail\Downloads\final_fine_tuned_audio.wav: name 'audioBasicIO' is not defined
Feature extraction failed.
