**1. Importing the libraries**

In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

**2. Importing the dataset**

In [65]:
data = pd.read_csv('model_data.csv')
data = data.drop(['Unnamed: 0'], axis = 1)
data.head(10)

Unnamed: 0,production,sales,demand,population,gdp,disbursement,interest_rate,months,years
0,347.0,322.0,346.0,122.4,182277.0,60314.0,10.25,1,2010
1,306.0,285.0,338.0,122.5,181018.0,61213.92,10.33,2,2010
2,236.0,245.0,276.0,122.6,179759.0,62113.83,10.42,3,2010
3,234.0,212.0,245.0,122.8,178500.0,63013.75,10.5,4,2010
4,296.0,289.0,312.0,122.9,177354.0,63913.67,10.58,5,2010
5,224.0,212.0,215.0,123.1,176208.0,64813.58,10.67,6,2010
6,184.0,174.0,181.0,123.2,175062.0,65713.5,10.75,7,2010
7,193.0,200.0,216.0,123.3,177418.0,66613.42,10.83,8,2010
8,169.0,152.0,178.0,123.5,179774.0,67513.33,10.92,9,2010
9,200.0,214.0,215.0,123.6,182130.0,68413.25,11.0,10,2010


**3. Dividing the data into dependent and independent variables**

In [66]:
y = data['demand']
X = data.drop(['demand'], axis = 1)

**4. Splitting the data into training and testing sets**

In [67]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = .2, random_state = 0)

**5. Training a Linear Regression model**

In [68]:
from sklearn.linear_model import LinearRegression
linearregressor = LinearRegression()
linearregressor.fit(X_train, y_train)

**6. Perforance of the model**

In [69]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = linearregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.8457257156294531
Mean Squared Error:  3593.6476496885475
Mean Absolute Error:  32.7421149802673


**7. Training a Random Forest Regressor**

In [70]:
from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
rfregressor.fit(X_train, y_train)

**8. Performance of the model**

In [71]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = rfregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.8432229131772384
Mean Squared Error:  3651.9476456129028
Mean Absolute Error:  32.69387096774194


**9. Training a Decision Tree Regressor**

In [72]:
from sklearn.tree import DecisionTreeRegressor
dtregressor = DecisionTreeRegressor(random_state = 0)
dtregressor.fit(X_train, y_train)

**10. Performance of the model**

In [73]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = dtregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.8350654453517452
Mean Squared Error:  3841.9667741935477
Mean Absolute Error:  38.37096774193548


**11. Training a Support Vector Regressor**

In [74]:
from sklearn.svm import SVR
svregressor = SVR(kernel = 'rbf')
svregressor.fit(X_train, y_train)

**12. Performance of the model**

In [75]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = svregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  -0.04500831873500455
Mean Squared Error:  24342.305030610692
Mean Absolute Error:  116.3379023274345


**13. Training a K-Nearest Neighbors Regressor**

In [76]:
from sklearn.neighbors import KNeighborsRegressor
knnregressor = KNeighborsRegressor(n_neighbors = 5)
knnregressor.fit(X_train, y_train)

**14. Performance of the model**

In [77]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = knnregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.5096529870811297
Mean Squared Error:  11422.0876
Mean Absolute Error:  80.76322580645162


**15. Training a Gradient Boosting Regressor**

In [78]:
from sklearn.ensemble import GradientBoostingRegressor
gbregressor = GradientBoostingRegressor(n_estimators = 100, random_state = 0)
gbregressor.fit(X_train, y_train)

**16. Performance of the model**

In [79]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = gbregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.8588686412840091
Mean Squared Error:  3287.497832943434
Mean Absolute Error:  33.437433578855824


**17. Training a XGBoost Regressor**

In [80]:
from xgboost import XGBRegressor
xgbregressor = XGBRegressor(n_estimators = 100, random_state = 0)
xgbregressor.fit(X_train, y_train)

In [81]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
y_pred = xgbregressor.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))

R2 score:  0.8183919169185863
Mean Squared Error:  4230.358050875274
Mean Absolute Error:  35.578340788810486


**Coclusion**
The best performing model is the Gradient Boosting Regressor with an R2 score of 0.85, Mean Squared Error of  3287.4 and Mean Absolute Error of  33.4.

**18. Saving the model**

In [82]:
import pickle
pickle.dump(gbregressor, open('model.pkl', 'wb'))

In [1]:
data.columns

NameError: name 'data' is not defined