In [1]:
#Dependencies
import os
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

# Base Predictions for 2019
Follows similar logic pattern used by business in determining the future production amounts. Business further refines these numbers taking into account the amount of orders they have on the books for upcoming cycle, but this gives them their starting point. Note also these are aggregated to monthly amounts.
### Business Formula for Total Monthly Allocations:
1.  Calculate percentage(%) shipped per month by block
2.  Apply new allocation for upcoming year (dependent on resources available) and multiply by % shipped last year 
-  We use the total amount shipped for 2019 as the 'new allocation' amount
 
#### All of these calculations can be found in the base_forecast2019.xlsx for our sample block 4" ANG

## Importing Datasets

In [2]:
print(os.getcwd())
path = './datasets/'

/Users/jacosta3/OneDrive - University of South Florida/Gerdau Projects/pft/submission


In [3]:
base_predictions = pd.read_excel(path+'base_forecast2019.xlsx', sheet_name='forecast_2019')
actuals = pd.read_excel(path+'base_forecast2019.xlsx', sheet_name='actuals_2019')

In [4]:
#rounding and changing data type for predictions
base_predictions.shipment_predictions = base_predictions.shipment_predictions.round().astype(int)

### Looking at dataframes

In [5]:
print(base_predictions.dtypes)
base_predictions.head(2)

calendar_day            datetime64[ns]
year                             int64
month                            int64
block                           object
shipment_tons                    int64
future_date             datetime64[ns]
percentage                     float64
total_alloc_2019                 int64
shipment_predictions             int64
dtype: object


Unnamed: 0,calendar_day,year,month,block,shipment_tons,future_date,percentage,total_alloc_2019,shipment_predictions
0,2018-01-07,2018,1,"4"" ANG",36,2019-01-06,0.00177,22431,40
1,2018-01-14,2018,1,"4"" ANG",42,2019-01-13,0.002065,22431,46


In [6]:
print(actuals.dtypes)
actuals.head(2)

calendar_day     datetime64[ns]
year                      int64
month                     int64
block                    object
shipment_tons             int64
dtype: object


Unnamed: 0,calendar_day,year,month,block,shipment_tons
0,2019-01-06,2019,1,"4"" ANG",53
1,2019-01-13,2019,1,"4"" ANG",120


## Evaluation of Results

In [7]:
#assigning variables for calculations
test_y = actuals['shipment_tons'] 
pred_y = base_predictions['shipment_predictions']

In [8]:
#Tests to be applied accross models
r2 = r2_score(test_y, pred_y)
mae = mean_absolute_error(test_y,pred_y)
rmse = mean_squared_error(test_y, pred_y, squared=False)
four_pred = pred_y[4:]
four_test = test_y[4:]
print("Test Scores:", )
print("  Mean Absolute Error(MAE):",mae)
print("  Root Mean Squared Error(RMSE):",rmse)
print("  Coefficient of Determination(R2):",r2)
print("Take 4 Sample Comparison:")
print("  Predictions:", four_pred[0:4].to_numpy())
print("      Actuals:", four_test[0:4].to_numpy())

Test Scores:
  Mean Absolute Error(MAE): 278.7692307692308
  Root Mean Squared Error(RMSE): 379.13266401495525
  Coefficient of Determination(R2): -0.5521468665048599
Take 4 Sample Comparison:
  Predictions: [1319 1575 1295  997]
      Actuals: [875 736 617 490]
