<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [18]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

#preprocessing and model selection
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, RepeatedKFold, train_test_split, cross_val_score

#regression model metrics
from sklearn.metrics import mean_squared_error, r2_score

#regression models
from sklearn.svm import LinearSVR


In [99]:
path = 'https://simplonline-v3-prod.s3.eu-west-3.amazonaws.com/media/file/csv/bdfc59ed-c3c7-48ac-a3d3-9e54663f6c1d.csv'
target = 'SalePrice'
df = pd.read_csv(path)
df = df[["OverallQual","GrLivArea","SalePrice"]]
df.head()


X = df.drop(columns = [target])
y = df[target]

In [100]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import PolynomialFeatures

numeric_transformer = Pipeline(
    steps=[('imputer',SimpleImputer(strategy='median')),
           ('scaler', StandardScaler())])

numeric_features = df.select_dtypes(include=['int64', 'float64'])#.drop(columns = [target]).columns
lsvr = Pipeline(steps=[('preprocessor', numeric_transformer),                       
                      ('regressor', LinearSVR())])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=1)

In [101]:
paramsLinearSVR = \
{
 'regressor__C': [float(i)*0.1+0.1 for i in range(100)],
 #'dual': [True],
 #'epsilon': [0.0],
 #'fit_intercept': [True],
 #'intercept_scaling': [1.0],
 #'loss': ['epsilon_insensitive'],
 'regressor__max_iter': [ 1000*float(i+1) for i in range(10)],
 #'random_state': [None],
 'regressor__tol': [0.01],
 #'verbose': [0]
}



In [102]:
cv1 = RepeatedKFold(n_splits=10, n_repeats=4, random_state=1)

Grid = RandomizedSearchCV(LinearSVR(), paramsLinearSVR, n_iter = 20, scoring='r2', cv=cv1)
Grid.fit(X, y)
best_model = Grid.best_estimator_
#show_classification_model_metrics(best_model, X_test, y_test)

In [103]:
print(f"R squarred :{best_model.score(X_test, y_test)}")
print(f"\n OverallQual coef : {best_model.coef_[0]}\n GrLivArea coef : {best_model.coef_[1]}\n intercept : {best_model.intercept_}")


R squarred :0.7756982110186206

 OverallQual coef : 0.5079479265274066
 GrLivArea coef : 0.35414715971618776
 intercept : [0.00288055]
