# Table of Contents
1. [Average Case](#predicting-coal-for-the-average-case)
2. [Best Case](#predicting-coal-for-the-best-case)
3. [Worst Case](#predicting-coal-for-the-worst-case)

# Predicting Coal for the Average Case

In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
data = pd.read_csv("C:/Users/msteinme/Documents/bdifreightdollarpredictcoalavg.csv")
coal = data[['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']][215:1006]
coal_fdnew = np.array(coal)
coal_fdnew = pd.DataFrame(coal_fdnew)
coal_fdnew.columns = ['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']
print(coal_fdnew.dtypes)
coal_fdnew.tail()

Date           object
BDI            object
Freight_MLR    object
Freight_RDF    object
Dollar_All     object
Coal           object
dtype: object


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
786,3/13/2015,562,5.7,5.7,100.33,64.95
787,3/20/2015,591,5.7,5.7,97.909,64.05
788,3/27/2015,596,5.7,5.7,97.291,63.3
789,4/3/2015,588,5.65,5.65,96.545,55.6
790,4/10/2015,580,6.85,6.85,99.338,54.65


In [48]:
coal.describe()

Unnamed: 0,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
count,791.0,791.0,791.0,791.0,791.0
mean,2722.970923,15.471719,15.471719,88.876618,66.853034
std,2211.163051,10.855792,10.855792,12.761327,34.354293
min,513.0,4.0,4.0,71.657,20.9
25%,1121.0,8.775,8.775,80.1775,34.8
50%,1888.0,11.95,11.95,84.4,62.9
75%,3696.5,18.275,18.275,93.385,89.95
max,11612.0,73.35,73.35,119.9,192.5


In [49]:
from datetime import datetime
coal_fdnew['Date'] = pd.to_datetime(coal_fdnew['Date'])
date = coal_fdnew['Date']
coal_fdnew.dtypes

Date           datetime64[ns]
BDI                    object
Freight_MLR            object
Freight_RDF            object
Dollar_All             object
Coal                   object
dtype: object

In [24]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std
X = coal[['BDI','Freight_MLR','Dollar_All']]
Y = coal[['Coal']]
X= sm.add_constant(X)
est= sm.OLS(Y,X).fit()
est.summary()

0,1,2,3
Dep. Variable:,Coal,R-squared:,0.641
Model:,OLS,Adj. R-squared:,0.64
Method:,Least Squares,F-statistic:,468.4
Date:,"Fri, 01 Apr 2016",Prob (F-statistic):,1.51e-174
Time:,10:44:15,Log-Likelihood:,-3514.3
No. Observations:,791,AIC:,7037.0
Df Residuals:,787,BIC:,7055.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
const,214.4230,7.709,27.815,0.000,199.290 229.556
BDI,-0.0119,0.001,-9.780,0.000,-0.014 -0.010
Freight_MLR,2.5352,0.269,9.433,0.000,2.008 3.063
Dollar_All,-1.7375,0.076,-22.972,0.000,-1.886 -1.589

0,1,2,3
Omnibus:,101.237,Durbin-Watson:,0.092
Prob(Omnibus):,0.0,Jarque-Bera (JB):,147.612
Skew:,0.9,Prob(JB):,8.84e-33
Kurtosis:,4.113,Cond. No.,36900.0


In [39]:
X = coal_fdnew[['BDI','Freight_MLR','Dollar_All']].values
y = coal_fdnew['Coal'].values
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.3,random_state=1)
forest=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=150, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
forest.fit(X_train, y_train)
y_train_pred= forest.predict(X_train)
y_test_pred= forest.predict(X_test)
print('MSE train: %.3f, test: %.3f' % (mean_squared_error(y_train, y_train_pred),mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred),r2_score(y_test,y_test_pred)))
print(forest.feature_importances_)

MSE train: 24.280, test: 112.556
R^2 train: 0.979, test: 0.906
[ 0.1348884   0.06212429  0.80298731]


In [51]:
coal_predicted_fd = forest.predict(X)
coal_predicted_fd = pd.DataFrame(coal_predicted_fd)
coal_fdnew['Predicted_Coal_RDF(MLR)'] = coal_predicted_fd
rdf_error = (abs(coal_fdnew['Predicted_Coal_RDF(MLR)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(MLR)'] = rdf_error

coal_predicted_fd2 = forest.predict(X)
coal_predicted_fd2 = pd.DataFrame(coal_predicted_fd2)
coal_fdnew['Predicted_Coal_RDF(RDF)'] = coal_predicted_fd2
rdf_error2 = (abs(coal_fdnew['Predicted_Coal_RDF(RDF)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(RDF)'] = rdf_error2
print("Average RDF error for mlr values is: %.3f" % (rdf_error.mean())) 
print("Average RDF error for rdf values is: %.3f" % (rdf_error2.mean())) 
coal_fdnew.head()

Average RDF error for mlr values is: 5.684
Average RDF error for rdf values is: 5.684


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal,Predicted_Coal_RDF(MLR),RDF_Error(MLR),Predicted_Coal_RDF(RDF),RDF_Error(RDF)
0,2000-02-18,1454,7.5,7.5,104.63,21.3,23.448197,10.0854,23.448197,10.0854
1,2000-02-25,1511,7.5,7.5,105.26,21.3,22.730244,6.71476,22.730244,6.71476
2,2000-03-03,1578,7.75,7.75,106.0,20.9,22.25887,6.50177,22.25887,6.50177
3,2000-03-10,1645,7.85,7.85,105.71,21.0,21.585929,2.79014,21.585929,2.79014
4,2000-03-17,1630,7.85,7.85,105.2,21.0,21.667909,3.18052,21.667909,3.18052


In [52]:
newpred2 = data [['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All']][1006:]
newpred = data [['BDI','Freight_MLR','Dollar_All']][1006:]
newpred3 = data[['BDI','Freight_RDF','Dollar_All']][1006:]

In [53]:
#new predictions
newpred2 = np.array(newpred2)
newpred2 = pd.DataFrame(newpred2)
newpred2.columns = ['Date', 'BDI','Freight_MLR','Freight_RDF','Dollar_All']
newpred_predicted = forest.predict(newpred)
df_new_pred = pd.DataFrame(newpred_predicted)
newpred2['Predicted_Coal_RDF(MLR)'] = df_new_pred

newpred_predicted2 = forest.predict(newpred3)
df_new_pred2 = pd.DataFrame(newpred_predicted2)
newpred2['Predicted_Coal_RDF(RDF)'] = df_new_pred2

newpred2['Date'] = pd.to_datetime(newpred2['Date'])
newpred2

Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF)
0,2015-04-17,597,4.21337,5.23284,97.52,60.966000,60.966000
1,2015-04-24,600,4.32159,5.25428,96.922,61.545333,61.545333
2,2015-05-01,587,4.52261,5.44168,95.297,59.648333,59.648333
3,2015-05-08,574,4.54533,5.44788,94.794,60.575000,60.575000
4,2015-05-15,634,5.07295,5.51438,93.135,62.349208,62.429208
5,2015-05-22,586,4.40428,5.40733,96.014,58.775667,58.775667
6,2015-05-29,589,4.27558,5.25428,96.907,58.779333,58.779333
7,2015-06-05,610,4.46348,5.24554,96.306,61.487667,61.487667
8,2015-06-12,642,4.81625,5.44168,94.972,62.311333,62.311333
9,2015-06-19,779,5.55999,5.32679,94.085,47.837293,48.025627


In [54]:
frames = [coal_fdnew,newpred2]
combined = pd.concat(frames)
combined

Unnamed: 0,BDI,Coal,Date,Dollar_All,Freight_MLR,Freight_RDF,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF),RDF_Error(MLR),RDF_Error(RDF)
0,1454,21.3,2000-02-18,104.63,7.5,7.5,23.448197,23.448197,10.0854,10.0854
1,1511,21.3,2000-02-25,105.26,7.5,7.5,22.730244,22.730244,6.71476,6.71476
2,1578,20.9,2000-03-03,106,7.75,7.75,22.258870,22.258870,6.50177,6.50177
3,1645,21,2000-03-10,105.71,7.85,7.85,21.585929,21.585929,2.79014,2.79014
4,1630,21,2000-03-17,105.2,7.85,7.85,21.667909,21.667909,3.18052,3.18052
5,1618,21,2000-03-24,104.77,7.85,7.85,21.982143,21.982143,4.67687,4.67687
6,1660,21,2000-03-31,105.44,7.85,7.85,21.644483,21.644483,3.06897,3.06897
7,1684,21.2,2000-04-07,105.97,7.95,7.95,21.722302,21.722302,2.46369,2.46369
8,1676,21.4,2000-04-14,105.47,8.1,8.1,21.841188,21.841188,2.06162,2.06162
9,1656,21.5,2000-04-21,107.45,8.1,8.1,22.049740,22.049740,2.55693,2.55693


In [55]:
combined.to_csv("C:/Users/msteinme/Documents/coalavg.csv")

# Predicting Coal for the Best Case
[Top](#Table-of-contents)

In [57]:
data = pd.read_csv("C:/Users/msteinme/Documents/bdifreightdollarpredictcoalbest.csv")
coal = data[['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']][215:1006]
coal_fdnew = np.array(coal)
coal_fdnew = pd.DataFrame(coal_fdnew)
coal_fdnew.columns = ['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']
print(coal_fdnew.dtypes)
coal_fdnew.head()

Date           object
BDI            object
Freight_MLR    object
Freight_RDF    object
Dollar_All     object
Coal           object
dtype: object


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
0,2/18/2000,1454,7.5,7.5,104.63,21.3
1,2/25/2000,1511,7.5,7.5,105.26,21.3
2,3/3/2000,1578,7.75,7.75,106.0,20.9
3,3/10/2000,1645,7.85,7.85,105.71,21.0
4,3/17/2000,1630,7.85,7.85,105.2,21.0


In [58]:
coal.describe() #make sure same as in avg model

Unnamed: 0,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
count,791.0,791.0,791.0,791.0,791.0
mean,2722.970923,15.471719,15.471719,88.876618,66.853034
std,2211.163051,10.855792,10.855792,12.761327,34.354293
min,513.0,4.0,4.0,71.657,20.9
25%,1121.0,8.775,8.775,80.1775,34.8
50%,1888.0,11.95,11.95,84.4,62.9
75%,3696.5,18.275,18.275,93.385,89.95
max,11612.0,73.35,73.35,119.9,192.5


In [59]:
from datetime import datetime
coal_fdnew['Date'] = pd.to_datetime(coal_fdnew['Date'])
date = coal_fdnew['Date']
coal_fdnew.dtypes

Date           datetime64[ns]
BDI                    object
Freight_MLR            object
Freight_RDF            object
Dollar_All             object
Coal                   object
dtype: object

In [87]:
#freight_mlr and freight_rdf same at this point but after in predictions then they are different
X = coal_fdnew[['BDI','Freight_MLR','Dollar_All']].values
y = coal_fdnew['Coal'].values
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.3,random_state=1)
forest=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=150, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
forest.fit(X_train, y_train)
y_train_pred= forest.predict(X_train)
y_test_pred= forest.predict(X_test)
print('MSE train: %.3f, test: %.3f' % (mean_squared_error(y_train, y_train_pred),mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred),r2_score(y_test,y_test_pred)))
print(forest.feature_importances_)

MSE train: 26.007, test: 114.288
R^2 train: 0.978, test: 0.904
[ 0.13582288  0.06166584  0.80251127]


In [88]:
coal_predicted_fd = forest.predict(X)
coal_predicted_fd = pd.DataFrame(coal_predicted_fd)
coal_fdnew['Predicted_Coal_RDF(MLR)'] = coal_predicted_fd
rdf_error = (abs(coal_fdnew['Predicted_Coal_RDF(MLR)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(MLR)'] = rdf_error

coal_predicted_fd2 = forest.predict(X)
coal_predicted_fd2 = pd.DataFrame(coal_predicted_fd2)
coal_fdnew['Predicted_Coal_RDF(RDF)'] = coal_predicted_fd2
rdf_error2 = (abs(coal_fdnew['Predicted_Coal_RDF(RDF)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(RDF)'] = rdf_error2
print("Average RDF error for mlr values is: %.3f" % (rdf_error.mean())) 
print("Average RDF error for rdf values is: %.3f" % (rdf_error2.mean())) 
coal_fdnew.head()

Average RDF error for mlr values is: 5.676
Average RDF error for rdf values is: 5.676


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal,Predicted_Coal_RDF(MLR),RDF_Error(MLR),Predicted_Coal_RDF(RDF),RDF_Error(RDF)
0,2000-02-18,1454,7.5,7.5,104.63,21.3,23.400458,9.8613,23.400458,9.8613
1,2000-02-25,1511,7.5,7.5,105.26,21.3,22.652825,6.35129,22.652825,6.35129
2,2000-03-03,1578,7.75,7.75,106.0,20.9,22.348198,6.92918,22.348198,6.92918
3,2000-03-10,1645,7.85,7.85,105.71,21.0,21.398488,1.89756,21.398488,1.89756
4,2000-03-17,1630,7.85,7.85,105.2,21.0,21.571243,2.72021,21.571243,2.72021


In [89]:
newpred2 = data [['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All']][1006:]
newpred = data [['BDI','Freight_MLR','Dollar_All']][1006:]
newpred3 = data[['BDI','Freight_RDF','Dollar_All']][1006:]
newpred2.tail()

Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All
2793,12/27/2020,3880,20.644691,19.439625,85.020822
2794,12/28/2020,3884,20.663119,19.439625,85.015616
2795,12/29/2020,3888,20.681546,19.426144,85.010411
2796,12/30/2020,3892,20.699973,19.426144,85.005205
2797,12/31/2020,3897,20.7228,19.426144,85.0


In [90]:
#new predictions
newpred2 = np.array(newpred2)
newpred2 = pd.DataFrame(newpred2)
newpred2.columns = ['Date', 'BDI','Freight_MLR','Freight_RDF','Dollar_All']
newpred_predicted = forest.predict(newpred)
df_new_pred = pd.DataFrame(newpred_predicted)
newpred2['Predicted_Coal_RDF(MLR)'] = df_new_pred

newpred_predicted2 = forest.predict(newpred3)
df_new_pred2 = pd.DataFrame(newpred_predicted2)
newpred2['Predicted_Coal_RDF(RDF)'] = df_new_pred2

newpred2['Date'] = pd.to_datetime(newpred2['Date'])
newpred2

Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF)
0,2015-04-17,597,4.21337,5.37363,97.52,60.611556,60.458222
1,2015-04-24,600,4.32159,5.39548,96.922,61.088248,60.934914
2,2015-05-01,587,4.52261,5.53049,95.297,59.798222,59.732222
3,2015-05-08,574,4.54533,5.53464,94.794,60.433222,60.361889
4,2015-05-15,634,5.07295,5.61216,93.135,62.027914,61.571581
5,2015-05-22,586,4.40428,5.51584,96.014,58.897222,58.775889
6,2015-05-29,589,4.27558,5.39548,96.907,58.940222,58.786889
7,2015-06-05,610,4.46348,5.38857,96.306,60.989914,60.836581
8,2015-06-12,642,4.81625,5.53049,94.972,61.924248,61.906248
9,2015-06-19,779,5.55999,5.45098,94.085,48.285956,48.418622


In [91]:
frames = [coal_fdnew,newpred2]
combined = pd.concat(frames)
combined

Unnamed: 0,BDI,Coal,Date,Dollar_All,Freight_MLR,Freight_RDF,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF),RDF_Error(MLR),RDF_Error(RDF)
0,1454,21.3,2000-02-18,104.63,7.5,7.5,23.400458,23.400458,9.8613,9.8613
1,1511,21.3,2000-02-25,105.26,7.5,7.5,22.652825,22.652825,6.35129,6.35129
2,1578,20.9,2000-03-03,106,7.75,7.75,22.348198,22.348198,6.92918,6.92918
3,1645,21,2000-03-10,105.71,7.85,7.85,21.398488,21.398488,1.89756,1.89756
4,1630,21,2000-03-17,105.2,7.85,7.85,21.571243,21.571243,2.72021,2.72021
5,1618,21,2000-03-24,104.77,7.85,7.85,22.123515,22.123515,5.35007,5.35007
6,1660,21,2000-03-31,105.44,7.85,7.85,21.317419,21.317419,1.51152,1.51152
7,1684,21.2,2000-04-07,105.97,7.95,7.95,21.506748,21.506748,1.44693,1.44693
8,1676,21.4,2000-04-14,105.47,8.1,8.1,21.720470,21.720470,1.49752,1.49752
9,1656,21.5,2000-04-21,107.45,8.1,8.1,22.198581,22.198581,3.24922,3.24922


In [92]:
combined.to_csv("C:/Users/msteinme/Documents/coalbest.csv")

# Predicting Coal for the Worst Case 
[Top](#Table-of-Contents)

In [106]:
data = pd.read_csv("C:/Users/msteinme/Documents/bdifreightdollarpredictcoalworst.csv")
coal = data[['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']][215:1006]
coal_fdnew = np.array(coal)
coal_fdnew = pd.DataFrame(coal_fdnew)
coal_fdnew.columns = ['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']
print(coal_fdnew.dtypes)
coal_fdnew.tail()

Date           object
BDI            object
Freight_MLR    object
Freight_RDF    object
Dollar_All     object
Coal           object
dtype: object


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
786,3/13/2015,562,5.7,5.7,100.33,64.95
787,3/20/2015,591,5.7,5.7,97.909,64.05
788,3/27/2015,596,5.7,5.7,97.291,63.3
789,4/3/2015,588,5.65,5.65,96.545,55.6
790,4/10/2015,580,6.85,6.85,99.338,54.65


In [107]:
coal.describe()

Unnamed: 0,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
count,791.0,791.0,791.0,791.0,791.0
mean,2722.970923,15.471719,15.471719,88.876618,66.853034
std,2211.163051,10.855792,10.855792,12.761327,34.354293
min,513.0,4.0,4.0,71.657,20.9
25%,1121.0,8.775,8.775,80.1775,34.8
50%,1888.0,11.95,11.95,84.4,62.9
75%,3696.5,18.275,18.275,93.385,89.95
max,11612.0,73.35,73.35,119.9,192.5


In [108]:
from datetime import datetime
coal_fdnew['Date'] = pd.to_datetime(coal_fdnew['Date'])
date = coal_fdnew['Date']
coal_fdnew.dtypes

Date           datetime64[ns]
BDI                    object
Freight_MLR            object
Freight_RDF            object
Dollar_All             object
Coal                   object
dtype: object

In [100]:
#freight_mlr and freight_rdf same at this point but after in predictions then they are different
X = coal_fdnew[['BDI','Freight_MLR','Dollar_All']].values
y = coal_fdnew['Coal'].values
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.3,random_state=1)
forest=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=150, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
forest.fit(X_train, y_train)
y_train_pred= forest.predict(X_train)
y_test_pred= forest.predict(X_test)
print('MSE train: %.3f, test: %.3f' % (mean_squared_error(y_train, y_train_pred),mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred),r2_score(y_test,y_test_pred)))
print(forest.feature_importances_)

MSE train: 26.374, test: 113.836
R^2 train: 0.978, test: 0.904
[ 0.13270538  0.06169961  0.80559502]


In [109]:
coal_predicted_fd = forest.predict(X)
coal_predicted_fd = pd.DataFrame(coal_predicted_fd)
coal_fdnew['Predicted_Coal_RDF(MLR)'] = coal_predicted_fd
rdf_error = (abs(coal_fdnew['Predicted_Coal_RDF(MLR)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(MLR)'] = rdf_error

coal_predicted_fd2 = forest.predict(X)
coal_predicted_fd2 = pd.DataFrame(coal_predicted_fd2)
coal_fdnew['Predicted_Coal_RDF(RDF)'] = coal_predicted_fd2
rdf_error2 = (abs(coal_fdnew['Predicted_Coal_RDF(RDF)'] - coal_fdnew['Coal'])/coal_fdnew['Coal'])*100
coal_fdnew['RDF_Error(RDF)'] = rdf_error2
print("Average RDF error for mlr values is: %.3f" % (rdf_error.mean())) 
print("Average RDF error for rdf values is: %.3f" % (rdf_error2.mean())) 
coal_fdnew.head()

Average RDF error for mlr values is: 5.719
Average RDF error for rdf values is: 5.719


Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal,Predicted_Coal_RDF(MLR),RDF_Error(MLR),Predicted_Coal_RDF(RDF),RDF_Error(RDF)
0,2000-02-18,1454,7.5,7.5,104.63,21.3,23.217999,9.00469,23.217999,9.00469
1,2000-02-25,1511,7.5,7.5,105.26,21.3,22.563799,5.93333,22.563799,5.93333
2,2000-03-03,1578,7.75,7.75,106.0,20.9,22.505681,7.68268,22.505681,7.68268
3,2000-03-10,1645,7.85,7.85,105.71,21.0,21.749928,3.57109,21.749928,3.57109
4,2000-03-17,1630,7.85,7.85,105.2,21.0,21.698444,3.32592,21.698444,3.32592


In [111]:
newpred2 = data [['Date','BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']][1006:]
newpred = data [['BDI','Freight_MLR','Dollar_All']][1006:]
newpred3 = data[['BDI','Freight_RDF','Dollar_All']][1006:]
newpred2.head()

Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal
1006,4/17/2015,597,4.213372,5.290366,97.52,57.9
1007,4/24/2015,600,4.321594,5.312064,96.922,58.55
1008,5/1/2015,587,4.522607,5.435922,95.297,62.2
1009,5/8/2015,574,4.545333,5.440822,94.794,61.85
1010,5/15/2015,634,5.072948,5.646518,93.135,61.3


In [112]:
#new predictions
newpred2 = np.array(newpred2)
newpred2 = pd.DataFrame(newpred2)
newpred2.columns = ['Date', 'BDI','Freight_MLR','Freight_RDF','Dollar_All','Coal']
newpred_predicted = forest.predict(newpred)
df_new_pred = pd.DataFrame(newpred_predicted)
newpred2['Predicted_Coal_RDF(MLR)'] = df_new_pred

newpred_predicted2 = forest.predict(newpred3)
df_new_pred2 = pd.DataFrame(newpred_predicted2)
newpred2['Predicted_Coal_RDF(RDF)'] = df_new_pred2

newpred2['Date'] = pd.to_datetime(newpred2['Date'])
newpred2

Unnamed: 0,Date,BDI,Freight_MLR,Freight_RDF,Dollar_All,Coal,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF)
0,2015-04-17,597,4.21337,5.29037,97.52,57.9,61.779000,61.779000
1,2015-04-24,600,4.32159,5.31206,96.922,58.55,61.901333,61.901333
2,2015-05-01,587,4.52261,5.43592,95.297,62.2,59.918667,59.918667
3,2015-05-08,574,4.54533,5.44082,94.794,61.85,60.489667,60.489667
4,2015-05-15,634,5.07295,5.64652,93.135,61.3,62.450000,62.074958
5,2015-05-22,586,4.40428,5.40987,96.014,60,58.976333,58.976333
6,2015-05-29,589,4.27558,5.31693,96.907,60.05,59.111333,59.111333
7,2015-06-05,610,4.46348,5.30497,96.306,58.1,61.671000,61.671000
8,2015-06-12,642,4.81625,5.42106,94.972,60,62.449667,62.449667
9,2015-06-19,779,5.55999,5.38479,94.085,60.2,48.352550,48.445925


In [105]:
frames = [coal_fdnew,newpred2]
combined = pd.concat(frames)
combined

Unnamed: 0,BDI,Coal,Date,Dollar_All,Freight_MLR,Freight_RDF,Predicted_Coal_RDF(MLR),Predicted_Coal_RDF(RDF),RDF_Error(MLR),RDF_Error(RDF)
0,1454,21.3,2000-02-18,104.63,7.5,7.5,23.217999,23.217999,9.00469,9.00469
1,1511,21.3,2000-02-25,105.26,7.5,7.5,22.563799,22.563799,5.93333,5.93333
2,1578,20.9,2000-03-03,106,7.75,7.75,22.505681,22.505681,7.68268,7.68268
3,1645,21,2000-03-10,105.71,7.85,7.85,21.749928,21.749928,3.57109,3.57109
4,1630,21,2000-03-17,105.2,7.85,7.85,21.698444,21.698444,3.32592,3.32592
5,1618,21,2000-03-24,104.77,7.85,7.85,22.073943,22.073943,5.11402,5.11402
6,1660,21,2000-03-31,105.44,7.85,7.85,21.672015,21.672015,3.20007,3.20007
7,1684,21.2,2000-04-07,105.97,7.95,7.95,21.746927,21.746927,2.57985,2.57985
8,1676,21.4,2000-04-14,105.47,8.1,8.1,21.861793,21.861793,2.15791,2.15791
9,1656,21.5,2000-04-21,107.45,8.1,8.1,22.140257,22.140257,2.97794,2.97794


In [113]:
combined.to_csv("C:/Users/msteinme/Documents/coalworst.csv")