In [22]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVR
import numpy as np
import pandas as pd

In [10]:
housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

In [31]:
data = fetch_california_housing()
calf_hous_df = pd.DataFrame(data= data.data, columns=data.feature_names)   
calf_hous_df.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

[[-0.326196    0.34849025 -0.17491646 ...  0.05137609 -1.3728112
   1.27258656]
 [-0.03584338  1.61811813 -0.40283542 ... -0.11736222 -0.87669601
   0.70916212]
 [ 0.14470145 -1.95271028  0.08821601 ... -0.03227969 -0.46014647
  -0.44760309]
 ...
 [-0.49697313  0.58654547 -0.60675918 ...  0.02030568 -0.75500738
   0.59946887]
 [ 0.96545045 -1.07984112  0.40217517 ...  0.00707608  0.90651045
  -1.18553953]
 [-0.68544764  1.85617335 -0.85144571 ... -0.08535429  0.99543676
  -1.41489815]]


In [13]:
lin_svr = LinearSVR(random_state=42)
lin_svr.fit(X_train_scaled, y_train)



LinearSVR(random_state=42)

In [14]:
y_pred = lin_svr.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
mse

0.9641780189948642

In [15]:
np.sqrt(mse)

0.9819256687727764

In [16]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10, verbose=2, cv=3, random_state=42)
rnd_search_cv.fit(X_train_scaled, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  13.0s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  13.0s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  13.1s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  12.8s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  12.9s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  13.2s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  12.6s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  12.8s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  12.8s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  12.4s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  12.7s
[CV] END ....C=1.5808361216819946, gamma=0.05399

RandomizedSearchCV(cv=3, estimator=SVR(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000026A0B577190>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000026A0B74E7F0>},
                   random_state=42, verbose=2)

In [17]:
rnd_search_cv.best_estimator_


SVR(C=4.745401188473625, gamma=0.07969454818643928)

In [18]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
np.sqrt(mse)

0.572752477078536

In [19]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
np.sqrt(mse)

0.5929168385528742