In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from sklearn.ensemble import RandomForestRegressor
import ipywidgets as widgets
from ipywidgets import interact
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
import statsmodels.api as sm

combi = pd.read_table('Historical_combi2.csv',delimiter =';')
data_mean = combi.iloc[:,1:].apply(lambda x : np.nanmean(x))

import datetime
datetimes = [datetime.datetime.strptime(d, '%d.%m.%Y') for d in combi["Date"]]


df = pd.DataFrame(datetimes, columns=['date'])
df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
combi = combi.assign(df=df['date'].values)

def regressionlinear(X,Y):
    regr = linear_model.LinearRegression()
    regr.fit(X, Y)
    #print('Intercept: \n', regr.intercept_)
    #print('Coefficients: \n', regr.coef_)

    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    predictions = model.predict(X) 
 
    print_model = model.summary()
    #print(print_model)
    
    return regr.intercept_,regr.coef_



def randomforest(x_train,y_train,x_test,y_test):
    
    plt.rcParams['figure.dpi'] = 100

    regressor = RandomForestRegressor(n_estimators=200, max_depth=5)
    clf=regressor.fit(x_train, y_train)

    y_pred=regressor.predict(x_test)
    y_pred=pd.DataFrame(y_pred)


    plt_train=plt.scatter(x_train,y_train,   color='grey')
    plt_test=plt.scatter(x_test,y_test,   color='green')
    plt_pred=plt.scatter(x_test, y_pred,  color='black')

    plt.xlabel("Dated Brent")
    plt.ylabel("Bonny light")
    plt.legend((plt_train, plt_test,plt_pred),("train data", "test data","prediction"))
    plt.show()

    print("Mean squared error: %.2f" % np.mean((regressor.predict(x_train) - y_train) ** 2))

    import seaborn as sns
    importances=regressor.feature_importances_

    indices=list(x_train)
    print("Feature ranking:")
    
    for f in range(x_train.shape[1]):
        print("Feature %s (%f)" % (indices[f], importances[f]))

        f, (ax1) = plt.subplots(1, 1, figsize=(6, 4), sharex=True)
        sns.barplot(indices, importances, palette="BrBG", ax=ax1)
        ax1.set_ylabel("Importance")

        ax1.set_xticklabels(
        ax1.get_xticklabels(),
        rotation=45,
        horizontalalignment='right'
        );

    return regressor.predict

@interact(DB=(0,200),remuneration=(0,100),EI_consumption_switch=(0,100),model=['Bonny_light', 'Urals_NWE','Tapis'])
      
def NPV_Calc(DB,remuneration,EI_consumption_switch,model):
    
    Bonny_light  ={'input': 'DB'}
    Urals_NWE  ={'input': 'DB'}
    
    if model == 'Bonny_light':
             
          
        plt.rcParams['figure.dpi'] = 100

        rma60 = combi["Bonny light"].rolling(window=60).mean()
        ema60 = combi["Bonny light"].ewm(span=60, adjust=False).mean()
        plt.plot(combi["df"],combi["Bonny light"])
        plt.plot(combi["df"],rma60)
        plt.plot(combi["df"],ema60)
        plt.legend(("Monthly","rma","ema"))
        plt.show()
        
        #Non linear model
        nonlinear_BL = 0.0164924882990988*(DB) + 4.43302177278368e-5*np.power(DB,2) - 0.157317431833725
        #print("Non.linear predicted price =[", nonlinear_BL,"]")
        
        #Linear model
        Y = combi[['Bonny light']].dropna()
        K=list(Y.index.values)
        K[0]
        X = combi[['Dated Brent']].iloc[K[0]:]
        #print(X)
        #print(Y)
        intercept_, coef_ = regressionlinear(X,Y)
        linear_BL=intercept_+ coef_[0]*DB 
        #print("Linear predicted price =", linear_BL)
        
        #Random forest
               
        All = pd.concat([X,Y],axis=1,sort=False)
        train = All.iloc[:,:]
        test =All.iloc[-100:,:]
        
        x_train=train["Dated Brent"].to_frame()
        y_train=train["Bonny light"]
        
        x_test=test["Dated Brent"].to_frame()
        y_test=test["Bonny light"].to_frame()
        F=randomforest(x_train,y_train,x_test,y_test)
        

        invar = {'Dated Brent':[DB]}
        invar_df = pd.DataFrame(invar)
        y_pred = F(invar_df)
        
        print("Linear predicted price =", linear_BL)
        print("Non.linear predicted price =[", nonlinear_BL,"]")
        print("Random forest price =",y_pred)
        print("Historical mean price =[", data_mean['Bonny light'],"]")
    elif model == 'Urals_NWE':
        print('choose something') 
    else:
        print('choose something')   

    return
    





interactive(children=(IntSlider(value=100, description='DB', max=200), IntSlider(value=50, description='remune…

In [53]:
import pandas as pd
combi = pd.read_table('Historical_combi2.csv',delimiter =';')
Y = combi[['Bonny light']].dropna()
K=list(Y.index.values)
K[0]
X = combi[['Dated Brent']].iloc[K[0]:]
All = pd.concat([X,Y],axis=1,sort=False)
train = All.iloc[:,:]
test =All.iloc[-100:,:]
x_train=train["Dated Brent"].to_frame()
y_train=train["Bonny light"]
        
x_test=test["Dated Brent"]
y_test=test["Bonny light"].to_frame()
type(x_test)
DB =100
invar = {'Dated Brent' :DB}
invar

{'Dated Brent': 100}