In [13]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

boston=pd.read_csv("boston.csv")

#define the X and y (target) features
X=boston.drop("MEDV",axis=1)
y=boston['MEDV']

scaler=MinMaxScaler(feature_range=(0,1))
X_=scaler.fit_transform(X)



In [14]:
from sklearn.metrics import r2_score
# Train test split
test_size = 0.33
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

# Create model
model = LinearRegression()

# Fit model
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
r2_score(y_test, predictions)

0.6590081405512086

In [15]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

#create 5 folds

seed=7
kFold=KFold(n_splits=5,shuffle=True,random_state=seed)

model=LinearRegression()
results=cross_val_score(model,X,y,cv=kFold,scoring='r2')
print(results)

print("Mean:" , results.mean())
print("std:" , results.std())

[0.57790144 0.76990344 0.64138006 0.73139225 0.80395154]
Mean: 0.7049057438479578
std: 0.08354868173255997


In [16]:
cross_val_score(model,X,y,cv=kFold)

array([0.57790144, 0.76990344, 0.64138006, 0.73139225, 0.80395154])

In [17]:
from sklearn.metrics import SCORERS
sorted(SCORERS.keys())

['accuracy',
 'adjusted_mutual_info_score',
 'adjusted_rand_score',
 'average_precision',
 'balanced_accuracy',
 'completeness_score',
 'explained_variance',
 'f1',
 'f1_macro',
 'f1_micro',
 'f1_samples',
 'f1_weighted',
 'fowlkes_mallows_score',
 'homogeneity_score',
 'jaccard',
 'jaccard_macro',
 'jaccard_micro',
 'jaccard_samples',
 'jaccard_weighted',
 'max_error',
 'mutual_info_score',
 'neg_brier_score',
 'neg_log_loss',
 'neg_mean_absolute_error',
 'neg_mean_absolute_percentage_error',
 'neg_mean_gamma_deviance',
 'neg_mean_poisson_deviance',
 'neg_mean_squared_error',
 'neg_mean_squared_log_error',
 'neg_median_absolute_error',
 'neg_root_mean_squared_error',
 'normalized_mutual_info_score',
 'precision',
 'precision_macro',
 'precision_micro',
 'precision_samples',
 'precision_weighted',
 'r2',
 'rand_score',
 'recall',
 'recall_macro',
 'recall_micro',
 'recall_samples',
 'recall_weighted',
 'roc_auc',
 'roc_auc_ovo',
 'roc_auc_ovo_weighted',
 'roc_auc_ovr',
 'roc_auc_ovr_we

In [18]:
model.fit(X,y)
model.coef_

array([-1.21388618e-01,  4.69634633e-02,  1.34676947e-02,  2.83999338e+00,
       -1.87580220e+01,  3.65811904e+00,  3.61071055e-03, -1.49075365e+00,
        2.89404521e-01, -1.26819813e-02, -9.37532900e-01, -5.52019101e-01])

In [20]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
algorithm=KNeighborsRegressor()
seed=13
kFold=KFold(n_splits=3,shuffle=True,random_state=seed)

hp_candidates=[{'n_neighbors':[2,3,4,5,6],'weights':['uniform','distance']}]
grid=GridSearchCV(estimator=algorithm,param_grid=hp_candidates,cv=kFold,scoring='r2')
grid.fit(X,y)
print(grid.best_score_)
print(grid.best_estimator_)
print(grid.best_params_)
print(grid.cv_results_)

0.6255708116560106
KNeighborsRegressor(n_neighbors=6, weights='distance')
{'n_neighbors': 6, 'weights': 'distance'}
{'mean_fit_time': array([0.00317947, 0.00200089, 0.00266743, 0.00265026, 0.00264653,
       0.00266671, 0.00267625, 0.00267323, 0.00232633, 0.00232482]), 'std_fit_time': array([6.11252614e-04, 2.14791485e-05, 4.71037202e-04, 4.76155151e-04,
       4.80822453e-04, 4.72381918e-04, 4.78417995e-04, 4.81778070e-04,
       4.59562406e-04, 4.81451713e-04]), 'mean_score_time': array([0.00285355, 0.00366457, 0.00333261, 0.00201456, 0.00234294,
       0.00232283, 0.00233444, 0.00266767, 0.00299891, 0.00266608]), 'std_score_time': array([2.30592088e-04, 9.63294706e-04, 4.71707569e-04, 1.47400196e-05,
       4.86340514e-04, 4.76952711e-04, 4.70137311e-04, 4.92216797e-04,
       2.29099731e-05, 4.73112514e-04]), 'param_n_neighbors': masked_array(data=[2, 2, 3, 3, 4, 4, 5, 5, 6, 6],
             mask=[False, False, False, False, False, False, False, False,
                   False, Fal