In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_validate,cross_val_score,train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, f1_score

import joblib
import parselmouth
from parselmouth.praat import call
from scipy.io import wavfile
from pyrpde import  rpde
import re
import numpy as np
import IPython.display as ipd
import librosa
import noisereduce as nr

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
dataset = pd.read_csv("./parkinsons_updrs.data")
# df = dataset.drop(['name','status'], axis=1)
# df = dataset.drop(["name","RPDE","DFA","spread1","spread2","D2","PPE"], axis=1)
colums = ["Jitter(%)","Jitter(Abs)","Jitter:RAP","Jitter:PPQ5","Jitter:DDP","Shimmer","Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","Shimmer:APQ11","Shimmer:DDA","NHR","HNR"]
df = dataset[colums]
dataset.count()

subject#         5875
age              5875
sex              5875
test_time        5875
motor_UPDRS      5875
total_UPDRS      5875
Jitter(%)        5875
Jitter(Abs)      5875
Jitter:RAP       5875
Jitter:PPQ5      5875
Jitter:DDP       5875
Shimmer          5875
Shimmer(dB)      5875
Shimmer:APQ3     5875
Shimmer:APQ5     5875
Shimmer:APQ11    5875
Shimmer:DDA      5875
NHR              5875
HNR              5875
RPDE             5875
DFA              5875
PPE              5875
dtype: int64

In [190]:
#MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","status
dataset2 = pd.read_csv("./data/dataset-kaggle.csv")
columns = ["Median Pitch","Maximum pitch","Minimum pitch","Jitter(local)","Jitter(local, absolute)","Jitter (rap)","Jitter (ppq5)","Jitter (ddp)","Shimmer (local)","Shimmer (local, db)","Shimmer (apq3)","Shimmer (apq5)","Shimmer (apq11)","Shimmer (dda)","NDH","HTM","status"]
df2 = dataset2[columns]
df2 = df2.rename(columns={
    "Median Pitch":"MDVP:Fo(Hz)","Maximum pitch":"MDVP:Fhi(Hz)","Minimum pitch":"MDVP:Flo(Hz)","Jitter(local)":"MDVP:Jitter(%)","Jitter(local, absolute)":"MDVP:Jitter(Abs)","Jitter (rap)":"MDVP:RAP","Jitter (ppq5)":"MDVP:PPQ","Jitter (ddp)":"Jitter:DDP","Shimmer (local)":"MDVP:Shimmer","Shimmer (local, db)":"MDVP:Shimmer(dB)","Shimmer (apq3)":"Shimmer:APQ3","Shimmer (apq5)":"Shimmer:APQ5","Shimmer (apq11)":"MDVP:APQ","Shimmer (dda)":"Shimmer:DDA","NDH":"NHR","HTM":"HNR"
})
df3 =  pd.read_csv("./data/dataset.csv")
df3["status"] = 0
df_result = pd.concat([df,df2,df3],axis=0, ignore_index=True)
df_result.count()


MDVP:Fo(Hz)         287
MDVP:Fhi(Hz)        287
MDVP:Flo(Hz)        287
MDVP:Jitter(%)      287
MDVP:Jitter(Abs)    287
MDVP:RAP            287
MDVP:PPQ            287
Jitter:DDP          287
MDVP:Shimmer        287
MDVP:Shimmer(dB)    287
Shimmer:APQ3        287
Shimmer:APQ5        287
MDVP:APQ            287
Shimmer:DDA         287
NHR                 287
HNR                 287
status              287
dtype: int64

In [191]:
X = df_result.iloc[:,:-1]
y = df_result['status']
sm = SMOTE(random_state=300)
X, y = sm.fit_resample(X,y)
scaler = MinMaxScaler((-1,1))
X_features = scaler.fit_transform(X)
Y_labels = y


In [223]:
validation_size = 0.2

seed = 20

X_train, X_validation, Y_train, Y_validation = train_test_split(X_features,Y_labels, test_size=validation_size, random_state=seed)
num_instances = len(X_train)

scoring = 'accuracy'
results = []
rfc = RandomForestClassifier()
rfc.fit(X_train, Y_train)

svmc = SVC(kernel='linear')
svmc.fit(X_train, Y_train)

knnc = KNeighborsClassifier(n_neighbors=5)
knnc.fit(X_train, Y_train)


eclf = VotingClassifier(estimators=[('rf', rfc),('knn',knnc),('svc', svmc)], voting='soft')
eclf.fit(X_train, Y_train)
predictions = rfc.predict(X_validation)
cm = confusion_matrix(Y_validation, predictions)

TN, FP, FN, TP = cm.ravel()
report = classification_report(Y_validation, predictions, output_dict=True)

exactitud = accuracy_score(Y_validation, predictions)
sensibilidad = TP / (TP + FN)
especificidad = TN / (TN + FP)
precision = precision_score(Y_validation, predictions)
f1 = f1_score(Y_validation, predictions)

print(f"Precisión: {precision:.2f}")
print(f"Sensibilidad: {sensibilidad:.2f}")
print(f"Puntuación F1: {f1:.2f}")
print(f"Exactitud: {exactitud:.2f}")
print(f"Especificidad: {especificidad:.2f}")

joblib.dump(rfc, './export/modelo.pkl')
joblib.dump(scaler, './export/scaler.pkl')

Precisión: 1.00
Sensibilidad: 0.96
Puntuación F1: 0.98
Exactitud: 0.98
Especificidad: 1.00


['./export/scaler.pkl']

In [224]:
def measurePitch(sound, unit, startTime, endTime,f0min,f0max):

    pitch = call(sound, "To Pitch", float(startTime), 75, 300) 

    meanF0 = call(pitch, "Get mean", startTime, endTime, unit) 

    maxf0 = call(pitch, "Get maximum", startTime, endTime, unit, "Parabolic")

    minf0 = call(pitch, "Get minimum", startTime, endTime, unit, "Parabolic")

    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)

    localJitter = call(pointProcess, "Get jitter (local)", startTime, endTime, 0.0001, 0.02, 1.3)

    pulses = call([sound, pitch], "To PointProcess (cc)")
    voice_report = call([sound, pitch, pulses], "Voice report", startTime, endTime, f0min, f0max, 1.3, 1.6, 0.03, 0.45)
    voice_report_array=re.findall(r'-?\d+\.?\d*',voice_report)
    hnr = voice_report_array[-1]
    nhr = voice_report_array[-2]
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", startTime, endTime, 0.0001, 0.02, 1.3)

    rapJitter = call(pointProcess, "Get jitter (rap)", startTime, endTime, 0.0001, 0.02, 1.3)

    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", startTime, endTime, 0.0001, 0.02, 1.3)

    ddpJitter = call(pointProcess, "Get jitter (ddp)", startTime, endTime, 0.0001, 0.02, 1.3)

    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    apq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", startTime, endTime, 0.0001, 0.02, 1.3, 1.6)

    return meanF0,maxf0,minf0,localJitter,localabsoluteJitter, rapJitter, ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer, apq5Shimmer, apq11Shimmer,ddaShimmer, nhr, hnr

In [225]:
def noiseReduce (sound_file):
    y, sr = librosa.load(sound_file, sr=None)
    reduced_noise = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.2)
    sound = parselmouth.Sound(reduced_noise)
    return sound

In [230]:
sound_file = "./uploaded_files/EvaMachado.wav"

sound = parselmouth.Sound(sound_file)
# sound = noiseReduce(sound_file)
f0min = 75
f0max = 300
startTime = 0
endTime = 0
unit = "Hertz"


(meanF0,maxf0,minf0,localJitter,localabsoluteJitter, rapJitter, ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer, apq5Shimmer, apq11Shimmer,ddaShimmer, nhr, hnr) = measurePitch(
    sound, unit, startTime, endTime, f0min, f0max)


In [231]:
dataframe = pd.DataFrame(np.column_stack([meanF0,maxf0,minf0,localJitter,localabsoluteJitter, rapJitter, ppq5Jitter,ddpJitter,localShimmer,localdbShimmer,apq3Shimmer, apq5Shimmer, apq11Shimmer,ddaShimmer, nhr, hnr]), 
                  columns=["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR"])
# dataframe = dataframe.apply(pd.to_numeric, errors="coerce")
dataframe.head()
# dataframe.to_csv('./data/dataset2.csv', index=False)
model = joblib.load("./export/modelo.pkl")
scaler = joblib.load("./export/scaler.pkl")
#No Parkinson Data
parkinson_data = np.array([[119.99200,157.30200,74.99700,0.00784,0.00007,0.00370,0.00554,0.01109,0.04374,0.42600,0.02182,0.03130,0.02971,0.06545,0.02211,21.03300]])
#Parkinson Data
no_parkinson_data = np.array([[202.26600,211.60400,197.07900,0.00180,0.000009,0.00093,0.00107,0.00278,0.00954,0.08500,0.00469,0.00606,0.00719,0.01407,0.00072,32.684001]])

scaled_new_data = scaler.transform(dataframe.values)
prediction = model.predict(scaled_new_data)
if (prediction[0] == 1):
    print("Parkinson")
else:
    print("No Parkinson")

No Parkinson


