In [1]:
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import ElasticNet
from scipy.stats import uniform, randint

Wczytanie przygotowanych zbiorów

In [2]:
abalone = pd.read_csv('../Dane/abalone.csv')
autompg = pd.read_csv('../Dane/autompg.csv')
insurance = pd.read_csv('../Dane/insurance.csv')
concrete = pd.read_csv('../Dane/concrete.csv')

Ustalenie siatek parametrów 

In [3]:
rfr_param = {'n_estimators': randint(1,1000),
             'max_depth': randint(1,100),
             'min_samples_split': randint(2,10),
             'min_samples_leaf': randint(1,5)}

gbr_param = {'n_estimators': randint(1,1000),
             'learning_rate': uniform(0.01,0.29),
             'subsample': uniform(0.1,0.9),
             'min_samples_split': randint(2,10),
             'min_samples_leaf': randint(1,5),
             'max_depth': randint(1,100)}

en_param = {'alpha': uniform(0,1),
            'l1_ratio': uniform(0,1)}

Podział zbiorów na zmienną objaśnianą i zmienne objasniające

In [4]:
X_abalone = abalone.drop('rings', axis=1)
y_abalone = abalone['rings']

X_autompg = autompg.drop('mpg', axis=1)
y_autompg = autompg['mpg']

X_insurance = insurance.drop('charges', axis=1)
y_insurance = insurance['charges']

X_concrete = concrete.drop('strength', axis=1)
y_concrete = concrete['strength']

#### Random Forest Regressor

Abalone

In [5]:
rfr_abalone = RandomForestRegressor(random_state=123)
rs_rfr_abalone = RandomizedSearchCV(rfr_abalone, rfr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [6]:
rs_rfr_abalone.fit(X_abalone, y_abalone)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [7]:
pd.DataFrame(rs_rfr_abalone.cv_results_).to_csv('../Wyniki/rs_abalone_rfr.csv')

Autompg

In [8]:
rfr_autompg = RandomForestRegressor(random_state=123)
rs_rfr_autompg = RandomizedSearchCV(rfr_autompg, rfr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [9]:
rs_rfr_autompg.fit(X_autompg, y_autompg)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [10]:
pd.DataFrame(rs_rfr_autompg.cv_results_).to_csv('../Wyniki/rs_autompg_rfr.csv')

Insurance

In [11]:
rfr_insurance = RandomForestRegressor(random_state=123)
rs_rfr_insurance = RandomizedSearchCV(rfr_insurance, rfr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [12]:
rs_rfr_insurance.fit(X_insurance, y_insurance)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [13]:
pd.DataFrame(rs_rfr_insurance.cv_results_).to_csv('../Wyniki/rs_insurance_rfr.csv')

Concrete

In [14]:
rfr_concrete = RandomForestRegressor(random_state=123)
rs_rfr_concrete = RandomizedSearchCV(rfr_concrete, rfr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [15]:
rs_rfr_concrete.fit(X_concrete, y_concrete)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [16]:
pd.DataFrame(rs_rfr_concrete.cv_results_).to_csv('../Wyniki/rs_concrete_rfr.csv')

#### Gradient Boosting Regressor

Abalone

In [17]:
gbr_abalone = GradientBoostingRegressor(random_state=123)
rs_gbr_abalone = RandomizedSearchCV(gbr_abalone, gbr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [18]:
rs_gbr_abalone.fit(X_abalone, y_abalone)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [19]:
pd.DataFrame(rs_gbr_abalone.cv_results_).to_csv('../Wyniki/rs_abalone_gbr.csv')

Autompg

In [20]:
gbr_autompg = GradientBoostingRegressor(random_state=123)
rs_gbr_autompg = RandomizedSearchCV(gbr_autompg, gbr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [21]:
rs_gbr_autompg.fit(X_autompg, y_autompg)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [22]:
pd.DataFrame(rs_gbr_autompg.cv_results_).to_csv('../Wyniki/rs_autompg_gbr.csv')

Insurance

In [23]:
gbr_insurance = GradientBoostingRegressor(random_state=123)
rs_gbr_insurance = RandomizedSearchCV(gbr_insurance, gbr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [24]:
rs_gbr_insurance.fit(X_insurance, y_insurance)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [25]:
pd.DataFrame(rs_gbr_insurance.cv_results_).to_csv('../Wyniki/rs_insurance_gbr.csv')

Concrete

In [26]:
gbr_concrete = GradientBoostingRegressor(random_state=123)
rs_gbr_concrete = RandomizedSearchCV(gbr_concrete, gbr_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [27]:
rs_gbr_concrete.fit(X_concrete, y_concrete)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [28]:
pd.DataFrame(rs_gbr_concrete.cv_results_).to_csv('../Wyniki/rs_concrete_gbr.csv')

#### Elastic Net

In [29]:
from sklearn.preprocessing import StandardScaler

Skalowanie danych

In [30]:
X_abalone = StandardScaler().fit_transform(X_abalone)
X_autompg = StandardScaler().fit_transform(X_autompg)
X_insurance = StandardScaler().fit_transform(X_insurance)
X_concrete = StandardScaler().fit_transform(X_concrete)

Abalone

In [31]:
en_abalone = ElasticNet(random_state=123)
rs_en_abalone = RandomizedSearchCV(en_abalone, en_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [32]:
rs_en_abalone.fit(X_abalone, y_abalone)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [33]:
pd.DataFrame(rs_en_abalone.cv_results_).to_csv('../Wyniki/rs_abalone_en.csv')

Autompg

In [34]:
en_autompg = ElasticNet(random_state=123)
rs_en_autompg = RandomizedSearchCV(en_autompg, en_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [35]:
rs_en_autompg.fit(X_autompg, y_autompg)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [36]:
pd.DataFrame(rs_en_autompg.cv_results_).to_csv('../Wyniki/rs_autompg_en.csv')

Insurance

In [37]:
en_insurance = ElasticNet(random_state=123)
rs_en_insurance = RandomizedSearchCV(en_insurance, en_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [38]:
rs_en_insurance.fit(X_insurance, y_insurance)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [39]:
pd.DataFrame(rs_en_insurance.cv_results_).to_csv('../Wyniki/rs_insurance_en.csv')

Concrete

In [40]:
en_concrete = ElasticNet(random_state=123)
rs_en_concrete = RandomizedSearchCV(en_concrete, en_param, n_iter=200, cv=3, scoring='r2', random_state=123, n_jobs=-1, verbose=10)

In [41]:
rs_en_concrete.fit(X_concrete, y_concrete)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


In [42]:
pd.DataFrame(rs_en_concrete.cv_results_).to_csv('../Wyniki/rs_concrete_en.csv')