# Regression algorithms

In [1]:
import numpy as np
import pandas as pd


In [2]:
from sklearn.preprocessing import LabelEncoder

In [3]:

le = LabelEncoder()
computer_hardware = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.data', header=None, names=['vendor', 'model', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'label'])
computer_hardware.head()

Unnamed: 0,vendor,model,MYCT,MMIN,MMAX,CACH,CHMIN,CHMAX,PRP,label
0,adviser,32/60,125,256,6000,256,16,128,198,199
1,amdahl,470v/7,29,8000,32000,32,8,32,269,253
2,amdahl,470v/7a,29,8000,32000,32,8,32,220,253
3,amdahl,470v/7b,29,8000,32000,32,8,32,172,253
4,amdahl,470v/7c,29,8000,16000,32,8,16,132,132


In [6]:
computer_hardware['vendor']=le.fit_transform(computer_hardware['vendor'])
computer_hardware['model'] = le.fit_transform(computer_hardware['model'])
computer_hardware.head()

Unnamed: 0,vendor,model,MYCT,MMIN,MMAX,CACH,CHMIN,CHMAX,PRP,label
0,0,29,125,256,6000,256,16,128,198,199
1,1,62,29,8000,32000,32,8,32,269,253
2,1,63,29,8000,32000,32,8,32,220,253
3,1,64,29,8000,32000,32,8,32,172,253
4,1,65,29,8000,16000,32,8,16,132,132


In [7]:
parkinsons = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/telemonitoring/parkinsons_updrs.data')
parkinsons.head()

Unnamed: 0,subject#,age,sex,test_time,motor_UPDRS,total_UPDRS,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,...,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE
0,1,72,0,5.6431,28.199,34.398,0.00662,3.4e-05,0.00401,0.00317,...,0.23,0.01438,0.01309,0.01662,0.04314,0.01429,21.64,0.41888,0.54842,0.16006
1,1,72,0,12.666,28.447,34.894,0.003,1.7e-05,0.00132,0.0015,...,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.1081
2,1,72,0,19.681,28.695,35.389,0.00481,2.5e-05,0.00205,0.00208,...,0.181,0.00734,0.00844,0.01458,0.02202,0.02022,23.047,0.46222,0.54405,0.21014
3,1,72,0,25.647,28.905,35.81,0.00528,2.7e-05,0.00191,0.00264,...,0.327,0.01106,0.01265,0.01963,0.03317,0.027837,24.445,0.4873,0.57794,0.33277
4,1,72,0,33.642,29.187,36.375,0.00335,2e-05,0.00093,0.0013,...,0.176,0.00679,0.00929,0.01819,0.02036,0.011625,26.126,0.47188,0.56122,0.19361


In [9]:
parkinsons.rename(columns={'motor_UPDRS':'label'},inplace=True)
parkinsons.head()

Unnamed: 0,subject#,age,sex,test_time,label,total_UPDRS,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,...,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE
0,1,72,0,5.6431,28.199,34.398,0.00662,3.4e-05,0.00401,0.00317,...,0.23,0.01438,0.01309,0.01662,0.04314,0.01429,21.64,0.41888,0.54842,0.16006
1,1,72,0,12.666,28.447,34.894,0.003,1.7e-05,0.00132,0.0015,...,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.1081
2,1,72,0,19.681,28.695,35.389,0.00481,2.5e-05,0.00205,0.00208,...,0.181,0.00734,0.00844,0.01458,0.02202,0.02022,23.047,0.46222,0.54405,0.21014
3,1,72,0,25.647,28.905,35.81,0.00528,2.7e-05,0.00191,0.00264,...,0.327,0.01106,0.01265,0.01963,0.03317,0.027837,24.445,0.4873,0.57794,0.33277
4,1,72,0,33.642,29.187,36.375,0.00335,2e-05,0.00093,0.0013,...,0.176,0.00679,0.00929,0.01819,0.02036,0.011625,26.126,0.47188,0.56122,0.19361


# comparison of diff algorith with diff dataset

In [10]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor

models = [
    ('LR', LinearRegression()),
    ('L', Lasso()),
    ('R', Ridge()),
    ('SVR', LinearSVR()),
    ('DT', DecisionTreeRegressor()),
]

In [14]:
from sklearn.model_selection import train_test_split       
for dataset_name,dataset in [('computer_hardware',computer_hardware),('parkinsons',parkinsons)]:
    X=np.array(dataset.drop(['label'],axis=1))
    y=np.array(dataset['label'])
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    for name,model in models:
        clf=model
        clf.fit(x_train,y_train)
        accuracy=clf.score(x_test,y_test)
        print(dataset_name,name,accuracy)     

computer_hardware LR 0.9438611618299353
computer_hardware L 0.9439215152431056
computer_hardware R 0.9438612466623207
computer_hardware SVR 0.6730979966143437
computer_hardware DT 0.8753025237169103
parkinsons LR 0.9068547520909814
parkinsons L 0.8983382060322663
parkinsons R 0.9049789120180208




parkinsons SVR 0.5024533088512617
parkinsons DT 0.9988988507417164


