In [28]:
############## imports
# general
import statistics
import datetime

# data manipulation and exploration
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

## machine learning stuff
# preprocessing
from sklearn import preprocessing
# train/testing
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, cross_val_score  
# error calculations
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# models
from sklearn import linear_model  # linear regression
from sklearn.linear_model import BayesianRidge #bayesisan ridge regression
from sklearn.svm import SVR  # support vector machines regression
from sklearn.gaussian_process import GaussianProcessRegressor # import GaussianProcessRegressor
from sklearn.neighbors import KNeighborsRegressor # k-nearest neightbors for regression
from sklearn.neural_network import MLPRegressor # neural network for regression
from sklearn.ensemble import RandomForestRegressor  # random forest regression
from sklearn.ensemble import AdaBoostRegressor # adaboost for regression
# saving models
from sklearn.externals import joblib

# import the AP
APILoc = r"C:\Users\chris\Documents\Thesis\code\scripts\API"

import sys
sys.path.insert(0, APILoc)

from API import *

In [2]:
# get aggregate data
aggDf = pd.read_csv(r'C:\Users\chris\Documents\Thesis\data\aggregate\aggregateData.csv')
aggDf = aggDf.drop("Unnamed: 0",axis=1)

# filter out the dataframe so that it only has the relevant values
# this option only includes the data that has valid percent cover data
aggDf = aggDf.loc[(aggDf["Percent Cover (%)"] >= 0.0) & (aggDf["Harvested in Sown Year"] == 0.0) 
                  & (aggDf["First Date of Season"] == 0.0)]

# this option only includes the data that has valid percent cover data and only contains the kentucky data
#aggDf = aggDf.loc[(aggDf["Percent Cover (%)"] >= 0.0) & (aggDf["Harvested in Sown Year"] == 0.0) 
#                  & (aggDf["First Date of Season"] == 0.0) & (aggDf["State"] == "Kentucky")]

# this option only includes the data that has valid percent cover data and only contains the georgia data
#aggDf = aggDf.loc[(aggDf["Percent Cover (%)"] >= 0.0) & (aggDf["Harvested in Sown Year"] == 0.0) 
#                  & (aggDf["First Date of Season"] == 0.0) & (aggDf["State"] == "Georgia")]

# this option includes the data points that has invalid values for the column "Percent Cover (%)"
#aggDf= aggDf.loc[(aggDf["Harvested in Sown Year"] == 0.0) & (aggDf["First Date of Season"] == 0.0)]

# this filter keeps only the kentuck data
#aggDf = aggDf.loc[(aggDf["State"] == "Kentucky")]

# filter out the values that will not be used by the machine learning models
xColumnsToKeep = ["Julian Day", "Time Since Sown (Days)", "Time Since Last Harvest (Days)", "Total Radiation (MJ/m^2)",
                "Total Rainfall (mm)", "Avg Air Temp (C)", "Avg Soil Moisture (%)", "Percent Cover (%)", "Day Length (hrs)"]
yColumnsToKeep = ["Yield (tons/acre)"]

# get a dataframe containing the features and the targets
xDf = aggDf[xColumnsToKeep]
yDf = aggDf[yColumnsToKeep]

# reset the index
xDf = xDf.reset_index(drop=True)
yDf = yDf.reset_index(drop=True)

pd.set_option('display.max_rows', 2500)
pd.set_option('display.max_columns', 500)

xCols = list(xDf)

In [3]:
avgYield = ((aggDf["Yield (tons/acre)"].mean()) * 2000.0)
print("The average yield of a variety on a paricular cut date is: " + str(avgYield) + " lbs/acre")

The average yield of a variety on a paricular cut date is: 2019.454545454542 lbs/acre


In [4]:
aggDf.head()

Unnamed: 0,State,City,Date Sown,Variety,Date of Cut,Julian Day,Yield (tons/acre),Time Since Sown (Days),Time Since Last Harvest (Days),Total Radiation (MJ/m^2),Total Rainfall (mm),Avg Air Temp (C),Avg Min Temp (C),Avg Max Temp (C),Avg Soil Moisture (%),Harvested in Sown Year,First Date of Season,Percent Cover (%),Day Length (hrs)
3,Georgia,Athens,2007-10-10,TS 4010,2008-12-05,340,0.76,422,99,622.84,95.254,9.7902,3.9436,16.041,0.13408,0.0,0.0,90.915344,10.0
7,Georgia,Athens,2007-10-10,BaraWet 501,2008-12-05,340,0.79,422,99,622.84,95.254,9.7902,3.9436,16.041,0.13408,0.0,0.0,90.968254,10.0
11,Georgia,Athens,2007-10-10,GA-505,2008-12-05,340,0.75,422,99,622.84,95.254,9.7902,3.9436,16.041,0.13408,0.0,0.0,87.925926,10.0
15,Georgia,Athens,2007-10-10,Bulldog 805,2008-12-05,340,0.7,422,99,622.84,95.254,9.7902,3.9436,16.041,0.13408,0.0,0.0,88.883598,10.0
19,Georgia,Athens,2007-10-10,Phoenix,2008-12-05,340,0.69,422,99,622.84,95.254,9.7902,3.9436,16.041,0.13408,0.0,0.0,86.883598,10.0


In [5]:
len(aggDf)

770

In [6]:
xDf.head()

Unnamed: 0,Julian Day,Time Since Sown (Days),Time Since Last Harvest (Days),Total Radiation (MJ/m^2),Total Rainfall (mm),Avg Air Temp (C),Avg Soil Moisture (%),Percent Cover (%),Day Length (hrs)
0,340,422,99,622.84,95.254,9.7902,0.13408,90.915344,10.0
1,340,422,99,622.84,95.254,9.7902,0.13408,90.968254,10.0
2,340,422,99,622.84,95.254,9.7902,0.13408,87.925926,10.0
3,340,422,99,622.84,95.254,9.7902,0.13408,88.883598,10.0
4,340,422,99,622.84,95.254,9.7902,0.13408,86.883598,10.0


In [7]:
yDf.head()

Unnamed: 0,Yield (tons/acre)
0,0.76
1,0.79
2,0.75
3,0.7
4,0.69


In [8]:
# lets normalize each column in the dataframe. MinMax Scaling is used here.

xVals = xDf.to_numpy() #returns a numpy array
yVals = yDf.to_numpy() #returns a numpy array

# for normalization
#minMaxScaler = preprocessing.MinMaxScaler()
#adjustedXVals = minMaxScaler.fit_transform(xVals)

# for standardization
standardScaler = preprocessing.StandardScaler()
adjustedXVals = standardScaler.fit_transform(xVals)

# get the normalized or standardized xVals 
xDf = pd.DataFrame(adjustedXVals)

In [9]:
# lets make some training/testing sets

testSize = 0.1

xTrain, xTest, yTrain, yTest = makeTrainTestData(xDf, yDf, testSize)

In [15]:
# lets make a linear regression model and see how it turns out

# make model
linearModel = linear_model.LinearRegression()

# fit model
linearModel.fit(xTrain, yTrain)

# get predictions
linearPred = linearModel.predict(xTest)
trainLinearPred = linearModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, linearPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainLinearPred)
meanSquaredError = mean_squared_error(yTest, linearPred)

print("mean absolute error of data from testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of data from training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error: " + str(meanAbsoluteError) + " tons/acre")
#print("mean squared error: ", meanSquaredError)
print('Variance score of testing set: ' + str(r2_score(yTest, linearPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainLinearPred)))

coefficients = linearModel.coef_
print("Coefficients: ")
for i in range(len(coefficients)):
    col = xCols[i] # xCols was defined in the cell where xDf was defined
    coef = coefficients[i]
    print("    " + col + ": " + str(coef))
print(" ")
print(linearModel.get_params())

mean absolute error of data from testing set: 352.42 lbs/acre
mean absolute error of data from training set: 355.445 lbs/acre
mean absolute error: 0.17620994866615441 tons/acre
Variance score of testing set: 0.6328061530162579
Variance score of training set: 0.6717008783120788
Coefficients: 
    Julian Day: -0.6621200972780095
    Time Since Sown (Days): -0.05903467080527067
    Time Since Last Harvest (Days): 0.10238656460218852
    Total Radiation (MJ/m^2): -0.03232727828090301
    Total Rainfall (mm): 0.11144492974749833
    Avg Air Temp (C): 0.06151484470177444
    Avg Soil Moisture (%): -0.027517603834548033
    Percent Cover (%): 0.07347962779540351
    Day Length (hrs): -0.3244588149198849
 
{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'normalize': False}


In [13]:
# support vector machines for regression
numFolds = 5

svrModel = GridSearchCV(
    estimator=SVR(),
    param_grid={
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'C': [0.1, 1.0, 10],
        'gamma': ["scale", "auto"]
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False,
)

# fit model
svrModel.fit(xTrain, yTrain)

# get predictions
svrPred = svrModel.predict(xTest)
trainSvrPred = svrModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, svrPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainSvrPred)
meanSquaredError = mean_squared_error(yTest, svrPred)

# print the errors
# convert the absolute errors from tons/acre to lbs/acre
print("mean absolute error of data from testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of data from training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, svrPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainSvrPred)))

print(" ")
print(svrModel.get_params())

mean absolute error of data from testing set: 198.415 lbs/acre
mean absolute error of data from training set: 187.425 lbs/acre
Variance score of testing set: 0.881709366607075
Variance score of training set: 0.9214057337131799
 
{'cv': KFold(n_splits=5, random_state=None, shuffle=True), 'error_score': 'raise-deprecating', 'estimator__C': 1.0, 'estimator__cache_size': 200, 'estimator__coef0': 0.0, 'estimator__degree': 3, 'estimator__epsilon': 0.1, 'estimator__gamma': 'auto_deprecated', 'estimator__kernel': 'rbf', 'estimator__max_iter': -1, 'estimator__shrinking': True, 'estimator__tol': 0.001, 'estimator__verbose': False, 'estimator': SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False), 'fit_params': None, 'iid': 'warn', 'n_jobs': None, 'param_grid': {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [0.1, 1.0, 10], 'gamma': ['scale', 'auto']}, 'pre_dispatch': '2*n_jobs', 'refit



In [14]:
# random forest for regression
# make model and find best hyperparameters with mean absolute error as metric
numFolds = 5

rfModel = GridSearchCV(
    estimator=RandomForestRegressor(),
    param_grid={
        'n_estimators': [5, 10, 25, 50],
        'max_depth': [5, 10, 50, 100] # Maximum depth of the tree.
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
rfModel.fit(xTrain, yTrain)

# get predictions
rfPred = rfModel.predict(xTest)
trainRfPred = rfModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, rfPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainRfPred)
meanSquaredError = mean_squared_error(yTest, rfPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, rfPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainRfPred)))

mean absolute error of the data in the testing set: 183.575 lbs/acre
mean absolute error of the data in the training set: 119.5 lbs/acre
Variance score of testing set: 0.8981185619884312
Variance score of training set: 0.9642732942124886


In [20]:
# adaboosted decicions trees for regression
# make model and find best hyperparameters with mean absolute error as metric
# NOTE: I can make the base_estimator different. the default is a decision tree.
numFolds = 5

abModel = GridSearchCV(
    estimator=AdaBoostRegressor(),
    param_grid={
        'n_estimators': [5, 10, 25, 50],
        'learning_rate': [0.2, 0.6, 1.0],
        'loss': ['linear', 'square', 'exponential']
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
abModel.fit(xTrain, yTrain)

# get predictions
abPred = abModel.predict(xTest)
trainAbPred = abModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, abPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainAbPred)
meanSquaredError = mean_squared_error(yTest, abPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, abPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainAbPred)))

mean absolute error of the data in the testing set: 236.386 lbs/acre
mean absolute error of the data in the training set: 271.121 lbs/acre
Variance score of testing set: 0.8486321752741205
Variance score of training set: 0.8549193451867898


In [23]:
# gaussian processor for regression
# make model and find best hyperparameters with mean absolute error as metric
numFolds = 5

gaussModel = GridSearchCV(
    estimator=GaussianProcessRegressor(),
    param_grid={
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
gaussModel.fit(xTrain, yTrain)

# get predictions
gaussPred = gaussModel.predict(xTest)
trainGaussPred = gaussModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, gaussPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainGaussPred)
meanSquaredError = mean_squared_error(yTest, gaussPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, gaussPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainGaussPred)))

mean absolute error of the data in the testing set: 3891.74 lbs/acre
mean absolute error of the data in the training set: 102.452 lbs/acre
Variance score of testing set: -420.68273096003963
Variance score of training set: 0.9659613923245743




In [25]:
# k-nearest neighbors for regression
# make model and find best hyperparameters with mean absolute error as metric
numFolds = 5

knnModel = GridSearchCV(
    estimator=KNeighborsRegressor(),
    param_grid={
        'n_neighbors':[2,5,10],
        'weights': ['uniform', 'distance'],
        'leaf_size': [5, 10, 30, 50],
        
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
knnModel.fit(xTrain, yTrain)

# get predictions
knnPred = knnModel.predict(xTest)
trainKnnPred = knnModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, knnPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainKnnPred)
meanSquaredError = mean_squared_error(yTest, knnPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, knnPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainKnnPred)))

mean absolute error of the data in the testing set: 191.481 lbs/acre
mean absolute error of the data in the training set: 173.737 lbs/acre
Variance score of testing set: 0.8810947003860168
Variance score of training set: 0.9289473531106566


In [27]:
# bayesian ridge regression
# make model and find best hyperparameters with mean absolute error as metric
numFolds = 5

bayesModel = GridSearchCV(
    estimator=BayesianRidge(),
    param_grid={
        'n_iter':[100,300,500]
        
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
bayesModel.fit(xTrain, yTrain)

# get predictions
bayesPred = bayesModel.predict(xTest)
trainBayesPred = bayesModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, bayesPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainBayesPred)
meanSquaredError = mean_squared_error(yTest, bayesPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, bayesPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainBayesPred)))

mean absolute error of the data in the testing set: 348.454 lbs/acre
mean absolute error of the data in the training set: 354.52 lbs/acre
Variance score of testing set: 0.6368532800018523
Variance score of training set: 0.6713165307372615


In [29]:
# neural network for regression
# make model and find best hyperparameters with mean absolute error as metric
numFolds = 5

nnModel = GridSearchCV(
    estimator=MLPRegressor(),
    param_grid={
        'hidden_layer_sizes':[(3), (5), (10), (3,3), (5,5), (10,10)],
        'solver': ['sgd', 'adam'],
        'learning_rate' : ['constant', 'invscaling', 'adaptive'],
        'learning_rate_init': [0.1, 0.01, 0.001]
        
    },
    cv=KFold(n_splits=numFolds, shuffle=True),
    scoring='neg_mean_absolute_error',
    return_train_score=False
)

# fit the model
nnModel.fit(xTrain, yTrain)

# get predictions
nnPred = nnModel.predict(xTest)
trainNnPred = nnModel.predict(xTrain)

# find errors
meanAbsoluteError = mean_absolute_error(yTest, nnPred)
trainMeanAbsoluteError = mean_absolute_error(yTrain, trainNnPred)
meanSquaredError = mean_squared_error(yTest, nnPred)


print("mean absolute error of the data in the testing set: " + str(round(meanAbsoluteError*2000, 3)) + " lbs/acre")
print("mean absolute error of the data in the training set: " + str(round(trainMeanAbsoluteError*2000, 3)) + " lbs/acre")
#print("mean squared error:", meanSquaredError)

print('Variance score of testing set: ' + str(r2_score(yTest, nnPred)))
print('Variance score of training set: ' + str(r2_score(yTrain, trainNnPred)))











mean absolute error of the data in the testing set: 202.931 lbs/acre
mean absolute error of the data in the training set: 196.782 lbs/acre
Variance score of testing set: 0.8797998991269141
Variance score of training set: 0.9085651704490021




In [13]:
"""
# lets compare each model with cross validation
# number of folds
numFolds = 5
# CV for linear regression
linearKFoldErrors = cross_val_score(linearModel, xVals, yVals, cv=numFolds, scoring='neg_mean_absolute_error')
aSum = 0
for error in linearKFoldErrors:
    aSum += error
linearAvgError = aSum/len(linearKFoldErrors)

print("linear regression errors: ", linearKFoldErrors)
print("average svr error over folds: ", linearAvgError)
print(" ")
"""

'\n# lets compare each model with cross validation\n# number of folds\nnumFolds = 5\n# CV for linear regression\nlinearKFoldErrors = cross_val_score(linearModel, xVals, yVals, cv=numFolds, scoring=\'neg_mean_absolute_error\')\naSum = 0\nfor error in linearKFoldErrors:\n    aSum += error\nlinearAvgError = aSum/len(linearKFoldErrors)\n\nprint("linear regression errors: ", linearKFoldErrors)\nprint("average svr error over folds: ", linearAvgError)\nprint(" ")\n'

In [14]:
"""
# CV for SVR
numFolds = 5

#yValsNew = yVals.reshape(len(yVals), )
#print(yVals)
#print(" ")
#print(np.ravel(yVals))
svrModel2=SVR(gamma="scale")
svrKFoldErrors = cross_val_score(svrModel2, xVals, np.ravel(yVals), cv=numFolds, scoring='neg_mean_absolute_error', n_jobs=1)
aSum = 0
for error in svrKFoldErrors:
    aSum += error
svrAvgError = aSum/len(svrKFoldErrors)

print("svr errors: ", svrKFoldErrors)
print("average svr error over folds: ", svrAvgError)
print(" ")
"""

'\n# CV for SVR\nnumFolds = 5\n\n#yValsNew = yVals.reshape(len(yVals), )\n#print(yVals)\n#print(" ")\n#print(np.ravel(yVals))\nsvrModel2=SVR(gamma="scale")\nsvrKFoldErrors = cross_val_score(svrModel2, xVals, np.ravel(yVals), cv=numFolds, scoring=\'neg_mean_absolute_error\', n_jobs=1)\naSum = 0\nfor error in svrKFoldErrors:\n    aSum += error\nsvrAvgError = aSum/len(svrKFoldErrors)\n\nprint("svr errors: ", svrKFoldErrors)\nprint("average svr error over folds: ", svrAvgError)\nprint(" ")\n'

In [15]:
"""
# CV for random forest
numFolds = 5

rfModel2=RandomForestRegressor()
rfKFoldErrors = cross_val_score(rfModel2, xVals, np.ravel(yVals), cv=numFolds, scoring='neg_mean_absolute_error')
aSum = 0
for error in rfKFoldErrors:
    aSum += error
rfAvgError = aSum/len(rfKFoldErrors)

print("random forest errors: ", rfKFoldErrors)
print("average random forest error over folds: ", rfAvgError)
"""

'\n# CV for random forest\nnumFolds = 5\n\nrfModel2=RandomForestRegressor()\nrfKFoldErrors = cross_val_score(rfModel2, xVals, np.ravel(yVals), cv=numFolds, scoring=\'neg_mean_absolute_error\')\naSum = 0\nfor error in rfKFoldErrors:\n    aSum += error\nrfAvgError = aSum/len(rfKFoldErrors)\n\nprint("random forest errors: ", rfKFoldErrors)\nprint("average random forest error over folds: ", rfAvgError)\n'

In [16]:
# save the model
"""
joblib.dump(linearModel, 'linModel.pkl') 
joblib.dump(svrModel, 'svrModel.pkl') 
joblib.dump(rfModel, 'rfModel.pkl') 
"""

"\njoblib.dump(linearModel, 'linModel.pkl') \njoblib.dump(svrModel, 'svrModel.pkl') \njoblib.dump(rfModel, 'rfModel.pkl') \n"