# Meat Predictor

Show overview of chicken prices visualizing averages based on month and year

Use a linear regression model from XGBoost to predict historical meat prices

Next analyze the model for different statistical metrics

Finally create a dataframe with future data for 1, 5 and 10 years and make predictions for each month

### Create Dataframe of Chicken Prices, SMA short and Long finally visualizing chicken prices based on month

In [78]:
# Import dependencies
import math
from datetime import timedelta
import pandas as pd
from pandas.tseries.offsets import DateOffset
import numpy as np
from pathlib import Path
import xgboost as xgb
import pickle
import os
import hvplot.pandas
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, explained_variance_score, mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import RepeatedKFold
from imblearn.over_sampling import RandomOverSampler
import prophet

%matplotlib inline

In [79]:
# Read meat prices into dataframe
csv_path = Path('../resources/historic_meat_prices.csv')

historic_meat_prices_df = pd.read_csv(csv_path)
historic_meat_prices_df.head()

Unnamed: 0,Month,Chicken Price,Chicken Price Change,Lamb Price,Lamb Price Change,Pork Price,Pork Price Change,Beef Price,Beef Price Change
0,Sep 1992,1.08,-,2.8,-,53.85,-,2.44,-
1,Oct 1992,1.06,-1.85%,2.52,-10.00%,53.76,-0.17%,2.5,2.46%
2,Nov 1992,1.09,2.83%,2.55,1.19%,56.06,4.28%,2.54,1.60%
3,Dec 1992,1.09,0.00%,2.72,6.67%,53.17,-5.16%,2.54,0.00%
4,Jan 1993,1.11,1.83%,2.69,-1.10%,59.14,11.23%,2.35,-7.48%


In [80]:
# slice DataFrame for only chicken prices
chicken_df = historic_meat_prices_df.loc[:, ['Month', 'Chicken Price']]
chicken_df

Unnamed: 0,Month,Chicken Price
0,Sep 1992,1.08
1,Oct 1992,1.06
2,Nov 1992,1.09
3,Dec 1992,1.09
4,Jan 1993,1.11
...,...,...
356,May 2022,3.70
357,Jun 2022,3.67
358,Jul 2022,3.54
359,Aug 2022,3.32


In [81]:
# Get dytpes
chicken_df.dtypes

Month             object
Chicken Price    float64
dtype: object

In [82]:
# change Month into datetime object
chicken_df['Month'] = pd.to_datetime(chicken_df['Month'], format='%b %Y')
chicken_df.head()

Unnamed: 0,Month,Chicken Price
0,1992-09-01,1.08
1,1992-10-01,1.06
2,1992-11-01,1.09
3,1992-12-01,1.09
4,1993-01-01,1.11


In [83]:
# Check dtypes
chicken_df.dtypes

Month            datetime64[ns]
Chicken Price           float64
dtype: object

In [84]:
# Rename Month to Date
chicken_df = chicken_df.rename(columns={'Month': 'Date'})
chicken_df.head()

Unnamed: 0,Date,Chicken Price
0,1992-09-01,1.08
1,1992-10-01,1.06
2,1992-11-01,1.09
3,1992-12-01,1.09
4,1993-01-01,1.11


In [85]:
# Set index as Month
chicken_df = chicken_df.set_index(['Date'])

chicken_df

Unnamed: 0_level_0,Chicken Price
Date,Unnamed: 1_level_1
1992-09-01,1.08
1992-10-01,1.06
1992-11-01,1.09
1992-12-01,1.09
1993-01-01,1.11
...,...
2022-05-01,3.70
2022-06-01,3.67
2022-07-01,3.54
2022-08-01,3.32


In [86]:
# Create daily percent change
chicken_df['pct_change'] = chicken_df['Chicken Price'].pct_change()

# drop nan values
chicken_df = chicken_df.dropna()

# Review Data Frame
chicken_df.head()

Unnamed: 0_level_0,Chicken Price,pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1992-10-01,1.06,-0.018519
1992-11-01,1.09,0.028302
1992-12-01,1.09,0.0
1993-01-01,1.11,0.018349
1993-02-01,1.13,0.018018


In [87]:
# Plot Price Data
chicken_df_plot = chicken_df.hvplot.line(
    x='Date',
    y='Chicken Price',
    label='Cost of Chicken per Pound since 1992',
    shared_axes=True
)
chicken_df_plot

In [88]:
# Plot monthly pct change data
chicken_df.hvplot.line(
    x='Date',
    y='pct_change',
    label='Volatility in Chicken Price since 1992'
)

In [89]:
# Create extra features for prediction
# Use the finta library to create different features for chicken prices
from finta import TA

In [90]:
# Try creating a simple moving average
short_window = 5
long_window = 20

# Create fast and slow sma
chicken_df['sma_fast'] = chicken_df['Chicken Price'].rolling(window=short_window).mean()
chicken_df['sma_long'] = chicken_df['Chicken Price'].rolling(window=long_window).mean()

# Drop NaN values
chicken_df = chicken_df.dropna()

# Review Data Frame
chicken_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0_level_0,Chicken Price,pct_change,sma_fast,sma_long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-05-01,1.3,0.056911,1.218,1.1895
1994-06-01,1.31,0.007692,1.248,1.202
1994-07-01,1.28,-0.022901,1.268,1.2115
1994-08-01,1.23,-0.039062,1.27,1.2185
1994-09-01,1.24,0.00813,1.272,1.225


In [91]:
chicken_df.dtypes

Chicken Price    float64
pct_change       float64
sma_fast         float64
sma_long         float64
dtype: object

In [92]:
# visualize monthly trends for chicken price volatility
# Declare group level to be the month of the year
group_level = chicken_df.index.month

# plot mean of percent changes according to month 
chicken_df[['Chicken Price', 'sma_fast', 'sma_long']].groupby(group_level).mean().hvplot(title='Average Chicken Price and Moving Averages Based on Month',
                                                                                        ylabel='Price per Pound ($)')

In [93]:
# Create a Heatmap
chicken_df.hvplot.heatmap(x='index.month', y='index.year', C='pct_change', cmap='reds', label='Heatmap Volatility in Chicken Prices').aggregate(function=np.mean)

## Start Predictive Modeling

In [94]:
# Organize Data Frame for predictive modeling


In [95]:
# Split Date into month and year
chicken_df['Month'] = chicken_df.index.month
chicken_df['Year'] = chicken_df.index.year
chicken_df.head()

Unnamed: 0_level_0,Chicken Price,pct_change,sma_fast,sma_long,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1994-05-01,1.3,0.056911,1.218,1.1895,5,1994
1994-06-01,1.31,0.007692,1.248,1.202,6,1994
1994-07-01,1.28,-0.022901,1.268,1.2115,7,1994
1994-08-01,1.23,-0.039062,1.27,1.2185,8,1994
1994-09-01,1.24,0.00813,1.272,1.225,9,1994


In [96]:
# Create dataframe for use in machine learning model
learning_df = chicken_df.reset_index().drop(columns=['pct_change', 'Date'])
learning_df

Unnamed: 0,Chicken Price,sma_fast,sma_long,Month,Year
0,1.30,1.218,1.1895,5,1994
1,1.31,1.248,1.2020,6,1994
2,1.28,1.268,1.2115,7,1994
3,1.23,1.270,1.2185,8,1994
4,1.24,1.272,1.2250,9,1994
...,...,...,...,...,...
336,3.70,3.284,2.4250,5,2022
337,3.67,3.442,2.5335,6,2022
338,3.54,3.584,2.6255,7,2022
339,3.32,3.580,2.7010,8,2022


In [97]:
# Split data into features and target

X = learning_df.drop(columns=['Chicken Price', 'sma_fast', 'sma_long'])
y = learning_df['Chicken Price']
display(X.head())
display(y.head())

Unnamed: 0,Month,Year
0,5,1994
1,6,1994
2,7,1994
3,8,1994
4,9,1994


0    1.30
1    1.31
2    1.28
3    1.23
4    1.24
Name: Chicken Price, dtype: float64

In [98]:
# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
y_test

102    1.36
125    1.68
11     1.13
248    2.22
238    2.26
       ... 
9      1.13
127    1.62
16     1.34
0      1.30
284    2.07
Name: Chicken Price, Length: 86, dtype: float64

### Try fitting with scaled data

In [99]:
# Scale the X data
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [100]:
# set up XGBoost Regression Model
model = xgb.XGBRegressor(n_estimators=500)

In [101]:
# Fit training data to model
model.fit(X_train_scaled, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=500, n_jobs=16,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [102]:
# predict data
y_pred = model.predict(X_test_scaled)
y_pred

array([1.3847843, 1.6828423, 1.1375706, 2.0558658, 2.356015 , 3.7060807,
       1.529502 , 1.5440714, 1.4454033, 1.9291391, 1.6019086, 2.243597 ,
       1.7069492, 1.7547431, 1.2246354, 2.2062695, 2.3632705, 2.1435854,
       2.1228535, 2.0760524, 1.850354 , 1.6134506, 2.1662443, 1.1399384,
       3.3262293, 1.7377231, 2.583255 , 2.120039 , 1.5175556, 1.7858806,
       1.8927542, 1.8682045, 2.4031177, 1.4042159, 1.2921969, 1.5332053,
       1.3028666, 1.6245114, 2.049815 , 1.7052474, 1.8975205, 1.7846007,
       1.7843257, 1.9430116, 2.2349734, 1.3743337, 1.3555328, 1.8676109,
       3.335833 , 1.338491 , 1.6562855, 1.3281239, 2.0593643, 2.1304595,
       1.1439363, 1.8603476, 1.4472201, 2.0326793, 3.3448367, 1.9812485,
       1.4481263, 1.4152349, 2.0597928, 1.3675833, 1.3258307, 1.6079024,
       1.4387065, 2.1877139, 1.5574712, 1.4049184, 1.3150978, 2.205972 ,
       1.8521771, 1.7539606, 2.3787227, 1.8119415, 1.2852364, 2.3121486,
       1.4015503, 1.8802532, 1.5013239, 1.1391875, 

In [103]:
# Create Metrics Dataframe
explained_variance = explained_variance_score(y_test, y_pred)
mean_squared = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mean_absolute = mean_absolute_error(y_test, y_pred)

print(f'Explained Variance is : {explained_variance:.2f}')
print(f'Mean Squared Error is : {mean_squared:.2f}')
print(f'Mean Absolute Erros is : {mean_absolute:.2f}')
print(f'R Squared is : {r2:.2f}')

Explained Variance is : 0.96
Mean Squared Error is : 0.01
Mean Absolute Erros is : 0.05
R Squared is : 0.96


### Try fitting with unscaled data

In [104]:
# set up XGBoost Regression Model
model_unscaled = xgb.XGBRegressor(n_estimators=500)

In [105]:
# Fit training data to model
model_unscaled.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=500, n_jobs=16,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [106]:
# predict data
y_pred_unscaled = model_unscaled.predict(X_test)
y_pred_unscaled

array([1.3847843, 1.6720982, 1.1375706, 2.0558658, 2.356015 , 3.7060807,
       1.529502 , 1.5440714, 1.4454033, 1.9266416, 1.6019086, 2.243597 ,
       1.7069492, 1.7391224, 1.2246354, 2.2062695, 2.3632705, 2.1435854,
       2.1228535, 2.0760524, 1.850354 , 1.6134506, 2.1662443, 1.1399384,
       3.3262293, 1.7377231, 2.583255 , 2.120039 , 1.5175556, 1.7858806,
       1.8921106, 1.8682045, 2.4031177, 1.4042159, 1.2921969, 1.5332053,
       1.3028666, 1.6245114, 2.049815 , 1.7052474, 1.8975205, 1.7846007,
       1.7843257, 1.9430116, 2.2332425, 1.3743337, 1.3555328, 1.8676109,
       3.335833 , 1.338491 , 1.6562855, 1.3281239, 2.0593643, 2.1304595,
       1.1439363, 1.8603476, 1.4472201, 2.0326793, 3.3448367, 1.9812485,
       1.4503986, 1.4152349, 2.0597928, 1.3675833, 1.3258307, 1.6079024,
       1.4387065, 2.1877139, 1.5574712, 1.4049184, 1.3150978, 2.205972 ,
       1.8521771, 1.7539606, 2.3418295, 1.8119415, 1.2852364, 2.3121486,
       1.4015503, 1.8802532, 1.5013239, 1.1391875, 

In [107]:
# Create Metrics Dataframe
explained_variance = explained_variance_score(y_test, y_pred_unscaled)
mean_squared = mean_squared_error(y_test, y_pred_unscaled)
r2 = r2_score(y_test, y_pred_unscaled)
mean_absolute = mean_absolute_error(y_test, y_pred_unscaled)

print(f'Explained Variance is : {explained_variance:.2f}')
print(f'Mean Squared Error is : {mean_squared:.2f}')
print(f'Mean Absolute Erros is : {mean_absolute:.2f}')
print(f'R Squared is : {r2:.2f}')

Explained Variance is : 0.96
Mean Squared Error is : 0.01
Mean Absolute Erros is : 0.05
R Squared is : 0.96


**Both of the models have the exact same evaluation metrics**

### Pickle the models for use in future predictions

In [108]:
# save model to file
pickle.dump(model, open("./models/pickeled_model.pkl", "wb"))
pickle.dump(model_unscaled, open("./models/pickeled_model_unscaled.pkl", "wb"))

### Generate Future DataFrame for 10 years

In [109]:
# slice DataFrame for only chicken prices
future_df = historic_meat_prices_df.loc[:, ['Month']]
future_df

Unnamed: 0,Month
0,Sep 1992
1,Oct 1992
2,Nov 1992
3,Dec 1992
4,Jan 1993
...,...
356,May 2022
357,Jun 2022
358,Jul 2022
359,Aug 2022


In [110]:
# change Month into datetime object
future_df['Month'] = pd.to_datetime(future_df['Month'], format='%b %Y')
future_df

Unnamed: 0,Month
0,1992-09-01
1,1992-10-01
2,1992-11-01
3,1992-12-01
4,1993-01-01
...,...
356,2022-05-01
357,2022-06-01
358,2022-07-01
359,2022-08-01


In [111]:
# Create future data frame
to_merge_future_dates = pd.DataFrame()
to_merge_future_dates['Month'] = pd.date_range(start='2022-09-01', periods=(120), freq='M')
to_merge_future_dates['Month'] = to_merge_future_dates['Month'] + pd.Timedelta(days=1)
to_merge_future_dates 

Unnamed: 0,Month
0,2022-10-01
1,2022-11-01
2,2022-12-01
3,2023-01-01
4,2023-02-01
...,...
115,2032-05-01
116,2032-06-01
117,2032-07-01
118,2032-08-01


In [112]:
# Concatenate to create a full dataframe with past dates and future dates
full_future_df = pd.concat([future_df, to_merge_future_dates], ignore_index=True)

# Rename columns to date 
full_future_df = full_future_df.rename(columns={'Month':'Date'})

# Set index as date
full_future_df = full_future_df.set_index('Date')

In [113]:
# Check dtypes
full_future_df.dtypes

Series([], dtype: object)

In [114]:
# split the month into a format for our model to predict
# Split Date into month and year
full_future_df['Month'] = full_future_df.index.month
full_future_df['Year'] = full_future_df.index.year
full_future_df.head()

Unnamed: 0_level_0,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1992-09-01,9,1992
1992-10-01,10,1992
1992-11-01,11,1992
1992-12-01,12,1992
1993-01-01,1,1993


In [115]:
# Finalizing dataframe for predicting
predicting_df = full_future_df.loc[:, ['Month', 'Year']].reset_index().drop(columns=['Date'])
predicting_df

Unnamed: 0,Month,Year
0,9,1992
1,10,1992
2,11,1992
3,12,1992
4,1,1993
...,...,...
476,5,2032
477,6,2032
478,7,2032
479,8,2032


In [116]:
# load our models from pickled files for scaled data
pickled_model = pickle.load(open('./models/pickeled_model.pkl', 'rb'))

In [117]:
# Create X features then scale the data
X_future = predicting_df
X_future

Unnamed: 0,Month,Year
0,9,1992
1,10,1992
2,11,1992
3,12,1992
4,1,1993
...,...,...
476,5,2032
477,6,2032
478,7,2032
479,8,2032


In [118]:
# Scale the X future date
X_future_scaled = X_scaler.transform(X_future)
X_future_scaled[:10]

array([[ 0.70826165, -1.96756134],
       [ 0.99631064, -1.96756134],
       [ 1.28435964, -1.96756134],
       [ 1.57240864, -1.96756134],
       [-1.59613032, -1.84370843],
       [-1.30808132, -1.84370843],
       [-1.02003233, -1.84370843],
       [-0.73198333, -1.84370843],
       [-0.44393433, -1.84370843],
       [-0.15588534, -1.84370843]])

In [119]:
# predict for the future and past
future_pred = pickled_model.predict(X_future_scaled)
future_pred[:10]

array([1.2246354, 1.1990901, 1.1399384, 1.1323035, 1.2086565, 1.2099007,
       1.209633 , 1.2082839, 1.2793741, 1.3068103], dtype=float32)

In [120]:
# turn predictions to data frame
future_pred_df = pd.DataFrame(future_pred)
future_pred_df = future_pred_df.rename(columns={0:'Predicted Chicken Price'})

In [121]:
# review future pred df
future_pred_df = future_pred_df.set_index(full_future_df.index)
future_pred_df

Unnamed: 0_level_0,Predicted Chicken Price
Date,Unnamed: 1_level_1
1992-09-01,1.224635
1992-10-01,1.199090
1992-11-01,1.139938
1992-12-01,1.132303
1993-01-01,1.208657
...,...
2032-05-01,3.697761
2032-06-01,3.706081
2032-07-01,3.326229
2032-08-01,3.319972


In [122]:
# Plot Predicted Data 
future_chicken_plot = future_pred_df.hvplot(label='Predicted Chicken Prices',
                     shared_axes=True)
future_chicken_plot

In [123]:
# Create composite plot of actual vs predicted
compositie_plot = chicken_df_plot * future_chicken_plot
compositie_plot

### Create Plots

1. Predictions based on actual data
2. Composite Plots for 