In [1]:
#ref from https://link.springer.com/article/10.1007/s11042-023-14932-x#Sec3
import pandas as pd
voice2 = pd.read_csv('Voice2.csv')

#normalise 
from sklearn import preprocessing
import numpy as np

columns_to_normalize = ['Jitter (local, absolute)', 'Jitter (rap)', 'Jitter (ppq5)',
       'Jitter (ddp)', 'Shimmer (local)', 'Shimmer (local,dB)',
       'Shimmer (apq3)', 'Shimmer (apq5)', 'Shimmer (apq11)', 'Shimmer (dda)',
       'Noise to harmonics', 'Harmonics to noise']

scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(voice2[columns_to_normalize])
Y = voice2['UPDRS']

print(X)
print(Y)


[[0.07116434 0.06432442 0.03963547 ... 0.07926653 0.01872258 0.55171747]
 [0.02988677 0.01590631 0.01301895 ... 0.01552217 0.01196833 0.72531478]
 [0.04161592 0.02447125 0.01764791 ... 0.02444224 0.04078217 0.68980561]
 ...
 [0.05063834 0.02080056 0.02083032 ... 0.07508589 0.02328289 0.64628893]
 [0.04071367 0.01660549 0.01562274 ... 0.03298978 0.0102918  0.62853435]
 [0.04251816 0.01782905 0.01779256 ... 0.0487396  0.01054315 0.59642147]]
0       34.398
1       34.398
2       34.398
3       34.398
4       34.399
         ...  
5870    33.485
5871    32.988
5872    32.495
5873    32.007
5874    31.513
Name: UPDRS, Length: 5875, dtype: float64


In [2]:
# autoencoder first:
# - feature reduction using unsupervised autoencoder to eliminate non relevant info 
# - 1 encoder, dense layer of 200 neurons btw input layer and latent space, decoder symmetrical with encoder 
# - latent space 10 neurons so initial data is reduced to 10 features 

# MLP (can be used alone):
# - 80% training 20% testing 
# - 1000 epochs, batch size 20
# - MLP 1 input layer, 1 output layer, 4 dense intermediate layers with 100, 200, 300, 100 neurons respectively + 2 dropout layers to avoid overfitting
# - if binary classification - 1 output neuron w sigmoid activation function 
# - if regression - relu activation function




In [6]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader

train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size = 0.2)
train_x = torch.Tensor(train_x)
train_y = torch.Tensor(train_y.to_numpy())
test_x = torch.Tensor(test_x)
test_y = torch.Tensor(test_y.to_numpy())
print("train x: ")
print(train_x)
print("train y: ")
print(train_y)

train_dataset = TensorDataset(train_x, train_y)
test_dataset = TensorDataset(test_x, test_y)

train_loader = DataLoader(train_dataset)
test_loader = DataLoader(test_dataset)


train x: 
tensor([[0.1345, 0.0615, 0.0567,  ..., 0.1165, 0.0325, 0.4745],
        [0.0842, 0.0699, 0.0433,  ..., 0.0313, 0.0940, 0.6500],
        [0.0694, 0.0393, 0.0305,  ..., 0.1045, 0.0139, 0.5273],
        ...,
        [0.0321, 0.0149, 0.0113,  ..., 0.0646, 0.0044, 0.7125],
        [0.0256, 0.0215, 0.0165,  ..., 0.0441, 0.0072, 0.6500],
        [0.1023, 0.0451, 0.0383,  ..., 0.0903, 0.0373, 0.5270]])
train y: 
tensor([14.1800, 10.8580, 16.9840,  ..., 31.8800, 36.9840, 37.8910])


In [7]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

mlp = MLPRegressor(hidden_layer_sizes=(100, 200, 300, 100), activation='relu', tol=1e-6, max_iter=1000)
mlp.fit(train_x, train_y)

train_y_pred = mlp.predict(train_x)
train_loss = mean_squared_error(train_y, train_y_pred)
train_r2 = r2_score(train_y, train_y_pred)

# Evaluate the model on the testing set
test_y_pred = mlp.predict(test_x)
test_loss = mean_squared_error(test_y, test_y_pred)
test_r2 = r2_score(test_y, test_y_pred)

print(f"Training Loss: {train_loss:.4f}")
print(f"Training R-squared: {train_r2:.4f}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test R-squared: {test_r2:.4f}")



Training Loss: 83.0876
Training R-squared: 0.2779
Test Loss: 90.4429
Test R-squared: 0.1919


In [8]:
tolerance = 0.5
total = len(test_y)

accuracy_test = (sum(abs(test_y - test_y_pred) <= tolerance)/total)*100.0
print(f"Accuracy on test data: {accuracy_test:.2f}%")
accuracy_train = (sum(abs(train_y - train_y_pred) <= tolerance)/total)*100.0
print(f"Accuracy on train data: {accuracy_train:.2f}%")

rmse = np.sqrt(test_loss)

print(f"Root Mean Square Error (RMSE) on test data: {rmse:.4f}")


Accuracy on test data: 4.09%
Accuracy on train data: 20.26%
Root Mean Square Error (RMSE) on test data: 9.5101
