In [32]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

In [34]:
df = pd.read_csv(r"Bond-VN.csv", index_col= 0,parse_dates=True)

y = df.GGVF5Y
x = df[['GGVF1Y','GGVF3Y','CPI','FER','Baserate','FIR', 'Stockreturn','Fiscal', 'Pubdebt', 'CurrentAcc']]

In [35]:
n_state = 50
x_train, x_test, y_train, y_test = train_test_split(x,y, train_size = 0.7, test_size = 0.3, random_state=n_state) 

# Decision Tree Regressor

In [36]:
decision_tree = DecisionTreeRegressor(random_state=n_state)
decision_tree.fit(x_train, y_train);

In [37]:
pred_decision_tree = decision_tree.predict(x_test)
result_decision_tree = pd.DataFrame({'Real': y_test, 'Prediction': pred_decision_tree})
result_decision_tree.sort_index(inplace=True)
result_decision_tree.head(7)

Unnamed: 0_level_0,Real,Prediction
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-10-01,0.08388,0.0856
2007-07-01,0.07559,0.07904
2007-09-01,0.0815,0.08292
2007-11-01,0.08617,0.08711
2008-03-01,0.0873,0.08644
2008-04-01,0.099,0.1542
2008-06-01,0.20333,0.1994


In [38]:
# plt.figure(figsize=(30,10))

# plt.plot(result_decision_tree.Real,'--')
# plt.plot(result_decision_tree.Prediction)

# plt.legend(['Real','Predict'])

# plt.ylabel('Bond yield')

# plt.show()

In [39]:
rmse = np.sqrt(mean_squared_error(result_decision_tree.Real, result_decision_tree.Prediction))
mae = mean_absolute_error(result_decision_tree.Real, result_decision_tree.Prediction)
mape = (mean_absolute_percentage_error(result_decision_tree.Real, result_decision_tree.Prediction))*100

In [40]:
print(f'RMSE = {rmse}')
print(f'MAE  = {mae}')
print(f'MAPE = {round(mape,2)}%')


RMSE = 0.008322832039832674
MAE  = 0.0030798979591836733
MAPE = 4.73%


# Random Forest Regressor

In [41]:
random_forest = RandomForestRegressor(random_state = n_state)
random_forest.fit(x_train, y_train);

random_forest_pred = random_forest.predict(x_test)

In [42]:
result_random_forest = pd.DataFrame({'Real': y_test, 'Prediction': random_forest_pred})
result_random_forest.sort_index(inplace=True)
result_random_forest.head(7)

Unnamed: 0_level_0,Real,Prediction
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-10-01,0.08388,0.085983
2007-07-01,0.07559,0.076449
2007-09-01,0.0815,0.084108
2007-11-01,0.08617,0.086763
2008-03-01,0.0873,0.086309
2008-04-01,0.099,0.101445
2008-06-01,0.20333,0.176409


In [43]:
# plt.figure(figsize=(30,15))

# plt.plot(result_random_forest.Real,'--')
# plt.plot(result_random_forest.Prediction)

# plt.legend(['Real','Predict'])

# plt.xlabel('Year')
# plt.ylabel('Bond yield')
# plt.show()

In [44]:
rmse = np.sqrt(mean_squared_error(result_random_forest.Real, result_random_forest.Prediction))
mae = mean_absolute_error(result_random_forest.Real, result_random_forest.Prediction)
mape = (mean_absolute_percentage_error(result_random_forest.Real, result_random_forest.Prediction))*100

In [45]:
print(f'RMSE = {rmse}')
print(f'MAE  = {mae}')
print(f'MAPE = {round(mape,2)}%')


RMSE = 0.004510184008034078
MAE  = 0.0021322877551020365
MAPE = 3.47%
