In [9]:
import sklearn
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [2]:
scikit_learn_version = sklearn.__version__
scikit_learn_version

'0.20.3'

In [4]:
automobile_train = pd.read_csv('datasets/automobiles_file1.csv')
automobile_train.shape

(82, 52)

In [5]:
automobile_test = pd.read_csv('datasets/automobiles_test.csv')
automobile_test.shape

(41, 52)

In [6]:
x_train = automobile_train.drop('price',axis=1)
y_train = automobile_train['price']

x_test = automobile_test.drop('price',axis=1)
y_test = automobile_test['price']

In [10]:
regressor_model = RandomForestRegressor(n_estimators=5, warm_start=True)

#warm_start will allowus to reuse model params. the model will be reloaded and then we can continue training

rfr_model = regressor_model.fit(x_train, y_train)
rfr_model

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=True)

In [11]:
training_score = rfr_model.score(x_train, y_train)
training_score

0.9787709282988989

In [12]:
y_pred = rfr_model.predict(x_test)

In [13]:
testing_score = r2_score(y_test, y_pred)
testing_score

0.8557997993941615

In [14]:
rfr_model_param = {}
rfr_model_param['model'] = rfr_model
rfr_model_param['sklearn_version'] = scikit_learn_version
rfr_model_param['r2_score'] = testing_score

In [15]:
rfr_model_param

{'model': RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
            max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=None,
            oob_score=False, random_state=None, verbose=0, warm_start=True),
 'sklearn_version': '0.20.3',
 'r2_score': 0.8557997993941615}

In [16]:
import joblib

In [17]:
filename = 'models/rfr_model_checkpoint.joblib'
joblib.dump(rfr_model_param, filename)

['models/rfr_model_checkpoint.joblib']

In [18]:
joblib_model = joblib.load(filename)

In [19]:
joblib_model['model']

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=True)

In [20]:
#change n_estimators and continue training
joblib_model['model'].n_estimators = 15
joblib_model['model']

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=15, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=True)

In [21]:
auto_retrain = pd.read_csv('datasets/automobiles_file2.csv')
auto_retrain.shape

(82, 52)

In [22]:
x_train = auto_retrain.drop('price',axis=1)
y_train = auto_retrain['price']

In [23]:
rfr_retrained_model = joblib_model['model'].fit(x_train,y_train)

In [25]:
retrained_score = rfr_retrained_model.score(x_train, y_train)
retrained_score

0.9763955054988658

In [27]:
y_pred = rfr_retrained_model.predict(x_test)

retrained_testing_score = r2_score(y_test, y_pred)
retrained_score

0.9763955054988658

In [28]:
testing_score

0.8557997993941615