In [1]:
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import sklearn
# ^^^ pyforest auto-imports - don't write above this line
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv('pmsm_temperature_data.csv')

In [3]:
data.shape

(998070, 13)

In [4]:
data.describe()

Unnamed: 0,ambient,coolant,u_d,u_q,motor_speed,torque,i_d,i_q,pm,stator_yoke,stator_tooth,stator_winding,profile_id
count,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0,998070.0
mean,-0.003905,0.004723,0.00478,-0.00569,-0.006336,-0.003333,0.006043,-0.003194,-0.004396,0.000609,-0.002208,-0.003935,50.732001
std,0.993127,1.002423,0.997878,1.00233,1.001229,0.997907,0.998994,0.997912,0.995686,1.001049,0.999597,0.998343,22.073125
min,-8.573954,-1.429349,-1.655373,-1.861463,-1.371529,-3.345953,-3.245874,-3.341639,-2.631991,-1.834688,-2.066143,-2.019973,4.0
25%,-0.599385,-1.037925,-0.826359,-0.92739,-0.951892,-0.266917,-0.756296,-0.257269,-0.672308,-0.747265,-0.761951,-0.725622,32.0
50%,0.266157,-0.177187,0.267542,-0.099818,-0.140246,-0.187246,0.213935,-0.190076,0.094367,-0.057226,0.005085,0.006536,56.0
75%,0.686675,0.650709,0.358491,0.852625,0.853584,0.547171,1.013975,0.49926,0.680691,0.697344,0.772239,0.72566,68.0
max,2.967117,2.649032,2.274734,1.793498,2.024164,3.016971,1.060937,2.914185,2.917456,2.449158,2.326668,2.653781,81.0


In [5]:
data.head()

Unnamed: 0,ambient,coolant,u_d,u_q,motor_speed,torque,i_d,i_q,pm,stator_yoke,stator_tooth,stator_winding,profile_id
0,-0.752143,-1.118446,0.327935,-1.297858,-1.222428,-0.250182,1.029572,-0.24586,-2.522071,-1.831422,-2.066143,-2.018033,4
1,-0.771263,-1.117021,0.329665,-1.297686,-1.222429,-0.249133,1.029509,-0.245832,-2.522418,-1.830969,-2.064859,-2.017631,4
2,-0.782892,-1.116681,0.332771,-1.301822,-1.222428,-0.249431,1.029448,-0.245818,-2.522673,-1.8304,-2.064073,-2.017343,4
3,-0.780935,-1.116764,0.3337,-1.301852,-1.22243,-0.248636,1.032845,-0.246955,-2.521639,-1.830333,-2.063137,-2.017632,4
4,-0.774043,-1.116775,0.335206,-1.303118,-1.222429,-0.248701,1.031807,-0.24661,-2.5219,-1.830498,-2.062795,-2.018145,4


## Process

In [59]:
data.isnull().sum()

ambient           0
coolant           0
u_d               0
u_q               0
motor_speed       0
torque            0
i_d               0
i_q               0
pm                0
stator_yoke       0
stator_tooth      0
stator_winding    0
profile_id        0
dtype: int64

####  Data is anonymized and looks like scaled by some factor

In [61]:
data_model = data.drop(columns='profile_id')

## EDA

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
plt.figure(figsize=(20, 12))
for i,j in enumerate(data.columns[:-1]):
    plt.subplot(3, 4, i+1)
    
    skew_ = np.round(data[j].skew(), 2)
    sns.distplot(data[j], label = 'skewness = '+str(skew_))
    
    plt.vlines(data[j].mean(), ymin = 0, ymax =1, color = 'r')
    plt.legend()
plt.show()

![dist](./img/distplots.png)

* Our tagret(pm) looks like normally distributed.
* Most of the attributes looks like multi-model, this may be due to profile_id.
* Most of the attributes have skewness less than 0.5 expect ambient temp, coolant, i_d
    > i_d(current through d axis) : this may directly related to the current through field windings but the motor used in testing is PM(permanent magnet motor) so field flux is constant but i_d may depend on voltage and load, which inturn depend on speed.
    
    > ambient temp and coolant temp, this may directly depend on testing scenario.
* Stator_tooth and Stator_winding distributions looks like similar

In [None]:
counts_ = data['profile_id'].value_counts()

fig = px.bar(x = counts_.index,y = counts_.values , template = 'plotly_dark', labels = {'x':'Profile_id', 'y': 'Counts'}, range_x = (0,82))
fig.show()

![id](./img/id_counts.png)

#### Is profile_id is a random number or they cut some data of some profile Id's till 40 and afterwards it is continous

### ***Profile Id*** does not make sense while to use in a model, bcoz each test case is given a profile id and we don't know which parameters are varying, and in actual situation we don't determine the profile_id(but still we can, assuming as classfication problem(profile_id) first and later regression(pm)).
#### But still we can check the attribute changes in every profile id

In [19]:
# data['profile_id'].value_counts()

In [None]:
plt.figure(figsize=(25, 40))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    sns.distplot(data_['pm'],label = 'profile_id = '+str(j))
    plt.legend()
plt.show()

![pm](./img/pm_dist.png)

> I thought the distribution of pm is slightly normal, but for each test case it is a multi-modal distribution.

In [None]:
plt.figure(figsize=(25, 50))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    sns.distplot(data_['ambient'], hist = False, label = 'ambient')
    sns.distplot(data_['coolant'], hist = False, label = 'coolant')
    plt.title('profile_id = '+str(j))
    plt.legend()
    
plt.show()

![am](./img/ambient_coolant.png)

> The temp of coolant can increase if increase in heat from motor(some cases might be continous working, critical tests,high eddy currents) and also increase in ambinet temp around the motor.

> As the data is anonymized, We can hypothesis that if density of ambient is greater than zero is more, then likely the density of coolant greater than zero is more.
But only few cases follow the hypothesis.

In [None]:
plt.figure(figsize=(25, 55))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    sns.distplot(data_['motor_speed'], hist = False, label = 'motor_speed')
    sns.distplot(data_['torque'], hist = False, label = 'torque')
    plt.title('profile_id = '+str(j))
    plt.legend()
    
plt.show()

![st](./img/speed_torque.png)

> Torque and Speed are inversly proportional.

> But the density plotly are slightly overlapping, which shouldn't be the case, This may be because of the axis are normalized  

> The test cases from 46 to 59 follow some type of pattern and remaining won't.

In [None]:
plt.figure(figsize=(25, 59))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    sns.distplot(data_['stator_winding'], hist = False, label = 's_winding')
    sns.distplot(data_['stator_yoke'], hist = False, label = 's_yoke')
    sns.distplot(data_['stator_tooth'], hist = False, label = 's_tooth')
    plt.title('profile_id = '+str(j))
    plt.legend()
    
plt.show()

![stator](./img/stator.png)

> We can clearly observe that Stator winding and Stator tooth are more overlapping in most of the cases than yoke. This is obivous because the winding sits in tooth. 

> We may have multi-colinearity if we use both winding and tooth. 

#### lets Check attributes w.r.t pm in different test cases.

In [63]:
data.corr()['pm']

ambient           0.501496
coolant           0.430548
u_d              -0.082564
u_q               0.101236
motor_speed       0.332419
torque           -0.072905
i_d              -0.299227
i_q              -0.086486
pm                1.000000
stator_yoke       0.695014
stator_tooth      0.768352
stator_winding    0.729561
profile_id        0.156735
Name: pm, dtype: float64

In [None]:
fig = px.scatter(data,x = 'stator_yoke' ,y='pm',template='plotly_dark')
fig.show()

![yoke](./img/stator_yoke.png)

In [None]:
plt.figure(figsize=(25, 40))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    plt.xticks([-2.5,-1.5,0,1.5,2.5])
    plt.yticks([-3,-2,-1,0,1,2,3])
    sns.regplot(x = data_['stator_yoke'] ,y=data_['pm'],label = 'profile_id = '+str(j))
    plt.legend()
plt.show()

![st_pm](./img/stator_y_pm.png)

> By looking at the above graphs we can clearly say the test cases are very distinct, Some of them are strongly positive co-related and some moderately and some are neutral(52,59).

In [None]:
fig = px.scatter(data,x = 'stator_tooth' ,y='pm',template='plotly_dark')
fig.show()

![tooth](./img/tooth.png)

In [None]:
plt.figure(figsize=(25, 40))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    plt.xticks([-2.5,-1.5,0,1.5,2.5])
    plt.yticks([-3,-2,-1,0,1,2,3])
    sns.regplot(x = data_['stator_tooth'] ,y=data_['pm'],label = 'profile_id = '+str(j))
    plt.legend()
plt.show()

![tooth](./img/tooth_pm.png)

In [None]:
fig = px.scatter(data,x = 'ambient' ,y='pm',template='plotly_dark')
fig.show()

![ambinet](./img/ambient.png)

In [None]:
plt.figure(figsize=(25, 40))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    plt.yticks([-3,-2,-1,0,1,2,3])
    plt.xticks([-10,-5,0,2.5,5])
    sns.regplot(x = data_['ambient'] ,y=data_['pm'], label = 'profile_id = '+str(j))
    plt.legend()
plt.show()

![ambinet](./img/ambient_pm.png)

> from above graphs we can observe that ambient temp is also a factor of testing parameters, bcoz in some test cases they are postively co-related and in some test cases they are negatively co-related.

In [None]:
fig = px.scatter(data,x = 'coolant' ,y='pm',template='plotly_dark')
fig.show()

![coolant](./img/coolant.png)

In [None]:
plt.figure(figsize=(25, 40))
for i,j in enumerate(data['profile_id'].unique()):
    data_ = data[data['profile_id'] == j]
    
    plt.subplot(11,5, i + 1)
    plt.yticks([-3,-2,-1,0,1,2,3])
    plt.xticks([-2,-1,0,1,2,3])
    sns.regplot(x = data_['coolant'] ,y=data_['pm'], label = 'profile_id = '+str(j))
    plt.legend()
plt.show()

![coolant](./img/coolant_pm.png)

> We can find some pattern in pm temp and coolant temp in most of the test cases.

> some of the interseting test cases to dig deeper are 51,53,62,69,78

##### If we access the other parameters used in test cases(profile_id) we can combine similar type of tests, So we can perform better EDA

## Feature-Selection

> If we isolate some test_cases(profile_id) we may get better results while predicting pm, but for now lets consider all test cases.

In [5]:
X,Y = data.drop(columns = 'pm'), data['pm']

In [19]:
data.corr()['pm'] 

ambient           0.501496
coolant           0.430548
u_d              -0.082564
u_q               0.101236
motor_speed       0.332419
torque           -0.072905
i_d              -0.299227
i_q              -0.086486
pm                1.000000
stator_yoke       0.695014
stator_tooth      0.768352
stator_winding    0.729561
profile_id        0.156735
Name: pm, dtype: float64

In [3]:
selected_cols = ['stator_tooth','stator_winding','stator_yoke','ambient']

In [6]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_regression,f_regression,f_oneway

In [11]:
fs = SelectKBest(score_func=f_regression, k='all')
fs.fit(X, Y)

SelectKBest(k='all', score_func=<function f_regression at 0x00000270AD920DC8>)

In [None]:
fig = px.bar(x =X.columns, y = fs.scores_, template = 'plotly_dark')
fig.show()

![best](./img/k_best.png)

In [None]:
plt.rcParams['figure.figsize'] = (15,8)

In [12]:
def check_mutlicolinearity(data_x):
    corr = data_x.corr()
    corr = pd.DataFrame(np.tril(corr, k=-1),      # gets Lower triangular matrix
                        columns=data_x.columns,
                        index=data_x.columns)  

    corr = corr.replace(0.000000, np.NAN)
    count_of_total_correlation_values = corr.count().sum()

    for i in [0.5, 0.6, 0.7, 0.8, 0.9]:
        data_corr = corr[abs(corr) > i]
        count_greater_than_thresh = data_corr.count().sum()
        print(f'Percent Values Greater than {i} co-relation : {count_greater_than_thresh/count_of_total_correlation_values}')
    return corr

def plot_corr(threshold, corr):
    data_corr = corr[abs(corr) > threshold]
    sns.heatmap(data_corr, annot=True, cmap="YlGnBu", center=0)
    plt.show()

In [53]:
corr = check_mutlicolinearity(X)

Percent Values Greater than 0.5 co-relation : 0.18181818181818182
Percent Values Greater than 0.6 co-relation : 0.15151515151515152
Percent Values Greater than 0.7 co-relation : 0.13636363636363635
Percent Values Greater than 0.8 co-relation : 0.09090909090909091
Percent Values Greater than 0.9 co-relation : 0.045454545454545456


In [None]:
plot_corr(0.7, corr)

![corr](./img/corr.png)

> As torque directly depends on current through quadratic axis. r = 1 b/w torque and i_q

In [64]:
data.corr()['pm']

ambient           0.501496
coolant           0.430548
u_d              -0.082564
u_q               0.101236
motor_speed       0.332419
torque           -0.072905
i_d              -0.299227
i_q              -0.086486
pm                1.000000
stator_yoke       0.695014
stator_tooth      0.768352
stator_winding    0.729561
profile_id        0.156735
Name: pm, dtype: float64

In [48]:
from scipy.stats import bartlett

H0 : variance_1 = variance_2

H1 : variance_1 != variance_2

pvalue is less than 0.05. So we reject the null hypothesis and can say that variance of attribute_1 is not equal to the variance of attribute_2

pvalue is higher than 0.05. So we fail to reject the null hypothesis and can say that we do not have enough evidence to reject the null hypothesis.                      
So we ***do not have enough evidence*** to prove that variance of attribute_1 is not equal to the variance of attribute_2.

In [49]:
bartlett(data['i_q'],data['torque'])  # Can remove one feature

BartlettResult(statistic=3.123727655647656e-05, pvalue=0.9955406210266459)

In [57]:
bartlett(data['stator_winding'],data['stator_tooth'])  # Can remove one feature , drop stator_tooth

BartlettResult(statistic=1.5746331070108353, pvalue=0.2095354479572652)

In [56]:
bartlett(data['stator_yoke'],data['stator_tooth'])  # Can remove one feature, drop stator_tooth

BartlettResult(statistic=2.1019162222902836, pvalue=0.14711466679808885)

In [52]:
bartlett(data['stator_yoke'],data['coolant'])  # Can remove one feature, but lets keep both

BartlettResult(statistic=1.8783254296993863, pvalue=0.17052459568439993)

In [59]:
bartlett(data['torque'],data['u_d'])  # Can remove one feature, drop torque

BartlettResult(statistic=0.0008074491427044544, pvalue=0.9773306426583245)

In [62]:
bartlett(data['i_d'],data['motor_speed'])  # keep both

BartlettResult(statistic=4.984344546843803, pvalue=0.025577672900163786)

In [63]:
bartlett(data['u_d'],data['motor_speed'])  # keep both

BartlettResult(statistic=11.218278606857885, pvalue=0.0008099558901449078)

## Modelling

In [73]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor,AdaBoostRegressor,GradientBoostingRegressor,RandomForestRegressor,VotingRegressor
from sklearn.model_selection import cross_val_score,GridSearchCV,KFold

In [72]:
from sklearn.metrics import mean_squared_error

In [65]:
import statsmodels.api as sm
from statsmodels.api import add_constant

In [80]:
X_new_c=sm.add_constant(X)

model2=sm.OLS(Y,X_new_c).fit()
model2.summary()

0,1,2,3
Dep. Variable:,pm,R-squared:,0.774
Model:,OLS,Adj. R-squared:,0.774
Method:,Least Squares,F-statistic:,284900.0
Date:,"Tue, 05 Jan 2021",Prob (F-statistic):,0.0
Time:,23:57:30,Log-Likelihood:,-669670.0
No. Observations:,998070,AIC:,1339000.0
Df Residuals:,998057,BIC:,1340000.0
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.1210,0.001,84.511,0.000,0.118,0.124
ambient,0.2289,0.001,392.974,0.000,0.228,0.230
coolant,-0.2439,0.003,-80.105,0.000,-0.250,-0.238
u_d,-0.0233,0.001,-21.653,0.000,-0.025,-0.021
u_q,-0.3465,0.001,-326.880,0.000,-0.349,-0.344
motor_speed,0.3280,0.002,189.177,0.000,0.325,0.331
torque,0.0540,0.008,7.004,0.000,0.039,0.069
i_d,0.1768,0.001,125.514,0.000,0.174,0.180
i_q,-0.0345,0.007,-4.771,0.000,-0.049,-0.020

0,1,2,3
Omnibus:,45721.267,Durbin-Watson:,0.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,100733.018
Skew:,0.304,Prob(JB):,0.0
Kurtosis:,4.433,Cond. No.,1840.0


> Durbin-watson statistic is less than 2, so there is negative auto correlation

In [82]:
X_selected = X.drop(columns = ['profile_id','torque'])
X_new=sm.add_constant(X_selected)

model=sm.OLS(Y,X_new).fit()
model.summary()

0,1,2,3
Dep. Variable:,pm,R-squared:,0.772
Model:,OLS,Adj. R-squared:,0.772
Method:,Least Squares,F-statistic:,338300.0
Date:,"Tue, 05 Jan 2021",Prob (F-statistic):,0.0
Time:,23:57:57,Log-Likelihood:,-673730.0
No. Observations:,998070,AIC:,1347000.0
Df Residuals:,998059,BIC:,1348000.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0010,0.000,-2.027,0.043,-0.002,-3.2e-05
ambient,0.2174,0.001,381.188,0.000,0.216,0.219
coolant,-0.2661,0.003,-87.333,0.000,-0.272,-0.260
u_d,-0.0363,0.001,-39.392,0.000,-0.038,-0.034
u_q,-0.3437,0.001,-323.263,0.000,-0.346,-0.342
motor_speed,0.3330,0.002,191.996,0.000,0.330,0.336
i_d,0.1795,0.001,129.021,0.000,0.177,0.182
i_q,0.0163,0.001,17.536,0.000,0.015,0.018
stator_yoke,-1.5581,0.009,-164.973,0.000,-1.577,-1.540

0,1,2,3
Omnibus:,45736.748,Durbin-Watson:,0.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,106942.02
Skew:,0.284,Prob(JB):,0.0
Kurtosis:,4.5,Cond. No.,65.9


In [84]:
train_error = mean_squared_error(Y, model.predict(X_new))
train_error

0.2258648737732453

While using the ols to test on whole trained data in a ***range of -3 to +3***, we are getting ***mse = 0.225***

In [6]:
# GB_bias=[]
# GB_ve=[]
# for n in np.arange(90,100):
#     GB=GradientBoostingRegressor(n_estimators=n,random_state=0)
#     scores=cross_val_score(GB,X_selected,Y,cv=3,scoring='neg_mean_squared_error', n_jobs = 3)
#     rmse=np.sqrt(np.abs(scores))
#     GB_bias.append(np.mean(rmse))
#     GB_ve.append((np.std(rmse,ddof=1)))

# # x_axis=np.arange(len(GB_bias))
# # plt.plot(x_axis,GB_bias)

# np.argmin(GB_bias)

In [7]:
# bias=[]
# ve=[]
# LR=LinearRegression()

# for n in np.arange(40,60):
#     mod=AdaBoostRegressor(base_estimator=LR,n_estimators=n,random_state=0)
#     scores=cross_val_score(mod,X_selected,Y,cv=3,scoring='neg_mean_squared_error', n_jobs = 3)
#     bias.append(np.mean(rmse))
#     ve.append((np.std(rmse,ddof=1)))

# # x_axis=np.arange(len(bias))
# # plt.plot(x_axis,bias)

# np.argmin(bias)

In [8]:
# bias=[]
# ve=[]
# for n in np.arange(40,60):
#     mod=AdaBoostRegressor(n_estimators=n,random_state=0)
#     scores=cross_val_score(mod,X_selected,Y,cv=3,scoring='neg_mean_squared_error', n_jobs = 3)
#     bias.append(np.mean(rmse))
#     ve.append((np.std(rmse,ddof=1)))

# # x_axis=np.arange(len(bias))
# # plt.plot(x_axis,bias)

# np.argmin(bias)

In [87]:
LR=LinearRegression()
LR_AB=AdaBoostRegressor(base_estimator=LR,n_estimators = 100 ,random_state=0)
DT_AB=AdaBoostRegressor(n_estimators = 50 ,random_state=0)
LR_GB=GradientBoostingRegressor(n_estimators = 100, random_state=0)
RF=RandomForestRegressor(criterion='mse',random_state=0)

In [88]:
models = []
models.append(('LinearRegression', LR))
models.append(('Adaboost',LR_AB))
models.append(('DT_boost',DT_AB))
models.append(('GBoost',LR_GB))
models.append(('RF',RF))

In [89]:
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(shuffle=True,n_splits=3,random_state=0)
    cv_results = cross_val_score(model, X_selected, Y,cv=kfold, scoring='neg_mean_squared_error', n_jobs=3)
    results.append(np.sqrt(np.abs(cv_results)))
    names.append(name)
    
    print("%s: %f (%f)" % (name, np.mean(np.sqrt(np.abs(cv_results))),np.std(np.sqrt(np.abs(cv_results)),ddof=1)))


LinearRegression: 0.475260 (0.000616)
Adaboost: 0.500461 (0.010645)
DT_boost: 0.552619 (0.000956)
GBoost: 0.398417 (0.000631)
RF: 0.033642 (0.001383)


In [90]:
from sklearn import neighbors
knn=neighbors.KNeighborsRegressor()

param_grid={
    'n_neighbors':np.arange(2,5),
    'weights':['uniform', 'distance']}

kfold= KFold(n_splits=3,shuffle=True,random_state=1)
model= GridSearchCV(estimator=knn,
                        param_grid=param_grid,
                        scoring='neg_mean_squared_error',
                        cv=kfold,
                        refit=True,
                        verbose=5,
                        n_jobs=3)
                        
model.fit(X_selected,Y)

print('Best Scorer{}'.format(model.best_score_))
print()
print('Best Parameters{}'.format(model.best_params_))

Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:  1.7min
[Parallel(n_jobs=3)]: Done  18 out of  18 | elapsed:  2.6min finished


Best Scorer-0.003927226150670194

Best Parameters{'n_neighbors': 2, 'weights': 'distance'}


In [91]:
res = pd.DataFrame(model.cv_results_)
res.sort_values('rank_test_score').head(3)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
1,8.791827,0.217084,12.223989,0.217746,2,distance,"{'n_neighbors': 2, 'weights': 'distance'}",-0.003973,-0.003831,-0.003977,-0.003927,6.8e-05,1
0,7.421773,0.20829,11.907636,0.086642,2,uniform,"{'n_neighbors': 2, 'weights': 'uniform'}",-0.004324,-0.00416,-0.00432,-0.004268,7.7e-05,2
3,13.049443,0.422002,21.197002,0.135826,3,distance,"{'n_neighbors': 3, 'weights': 'distance'}",-0.004357,-0.004292,-0.004391,-0.004347,4.1e-05,3


### KNeighborsRegressor gives the least bias error(0.003927) and least variance error(0.000068)