In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from sklearn.ensemble import RandomForestRegressor
import ipywidgets as widgets
from ipywidgets import interact
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
import statsmodels.api as sm
import ipwidgets import interactive

combi = pd.read_table('Historical_combi2.csv',delimiter =';')

import datetime
datetimes = [datetime.datetime.strptime(d, '%d.%m.%Y') for d in combi["Date"]]


df = pd.DataFrame(datetimes, columns=['date'])
df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
combi = combi.assign(df=df['date'].values)

def regressionlinear(X,Y):
    regr = linear_model.LinearRegression()
    regr.fit(X, Y)
    #print('Intercept: \n', regr.intercept_)
    #print('Coefficients: \n', regr.coef_)

    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    predictions = model.predict(X) 
 
    print_model = model.summary()
    #print(print_model)
    
    return regr.intercept_,regr.coef_



def randomforest(x_train,y_train,x_test,y_test):
    
    plt.rcParams['figure.dpi'] = 100

    regressor = RandomForestRegressor(n_estimators=200, max_depth=5)
    clf=regressor.fit(x_train, y_train)

    y_pred=regressor.predict(x_test)
    y_pred=pd.DataFrame(y_pred)


    plt_train=plt.scatter(x_train.iloc[:,0],y_train,   color='grey')
    plt_test=plt.scatter(x_test.iloc[:,0],y_test,   color='green')
    plt_pred=plt.scatter(x_test.iloc[:,0], y_pred,  color='black')

    plt.xlabel("Dated Brent")
    plt.ylabel("Bonny light")
    plt.legend((plt_train, plt_test,plt_pred),("train data", "test data","prediction"))
    plt.show()

    print("Mean squared error: %.2f" % np.mean((regressor.predict(x_train) - y_train) ** 2))

    import seaborn as sns
    importances=regressor.feature_importances_

    indices=list(x_train)
    print("Feature ranking:")
    
    for f in range(x_train.shape[1]):
        print("Feature %s (%f)" % (indices[f], importances[f]))

    f, (ax1) = plt.subplots(1, 1, figsize=(6, 4), sharex=True)
    sns.barplot(indices, importances, palette="BrBG", ax=ax1)
    ax1.set_ylabel("Importance")

    ax1.set_xticklabels(
    ax1.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
    );

    return regressor.predict





def NPV_Calc(DB,FO35,FO1,model):
    
    
    
    if model == 'Bonny_light':
             
        widget = interactive(NPV_Calc,DB=(0,200))
        plt.rcParams['figure.dpi'] = 100
        rma60 = combi["Bonny light"].rolling(window=60).mean()
        ema60 = combi["Bonny light"].ewm(span=60, adjust=False).mean()
        plt.plot(combi["df"],combi["Bonny light"])
        plt.plot(combi["df"],rma60)
        plt.plot(combi["df"],ema60)
        plt.legend(("Monthly","rma","ema"))
        plt.show()
        
        #Non linear model
        nonlinear_BL = 0.0164924882990988*(DB) + 4.43302177278368e-5*np.power(DB,2) - 0.157317431833725
        #print("Non.linear predicted price =[", nonlinear_BL,"]")
        
        #Linear model
        Y = combi[['Bonny light']].dropna()
        K=list(Y.index.values)
        K[0]
        X = combi[['Dated Brent']].iloc[K[0]:]
        #print(X)
        #print(Y)
        intercept_, coef_ = regressionlinear(X,Y)
        linear_BL=intercept_+ coef_[0]*DB 
        #print("Linear predicted price =", linear_BL)
        
        #Random forest
               
        All = pd.concat([X,Y],axis=1,sort=False)
        train = All.iloc[:,:]
        test =All.iloc[-100:,:]
        
        x_train=train["Dated Brent"].to_frame()
        y_train=train["Bonny light"]
        
        x_test=test["Dated Brent"].to_frame()
        y_test=test["Bonny light"].to_frame()
        F=randomforest(x_train,y_train,x_test,y_test)
        

        invar = {'Dated Brent':[DB]}
        invar_df = pd.DataFrame(invar)
        y_pred = F(invar_df)
        
                
        print("Linear predicted price =", linear_BL)
        print("Non.linear predicted price =[", nonlinear_BL,"]")
        print("Random forest price =",y_pred)
        
    elif model == 'Urals_NWE':
        
        
        plt.rcParams['figure.dpi'] = 100
        rma60 = combi["Urals NWE"].rolling(window=60).mean()
        ema60 = combi["Urals NWE"].ewm(span=60, adjust=False).mean()
        plt.plot(combi["df"],combi["Urals NWE"])
        plt.plot(combi["df"],rma60)
        plt.plot(combi["df"],ema60)
        plt.legend(("Monthly","rma","ema"))
        plt.show()
        
        #Non linear model
        nonlinear_UralN = 0.243310947652501*(FO35) + 0.0327070285007665*(DB) + 0.000931100809264595*np.power(FO1,3) + 3.01672677408283e-5*np.power(FO1,4) - 0.771156577782479 - 0.00241982760220774*(DB)*(FO1) - 0.000191940652210639*np.power(DB,2)
                
        #Linear model
        Y = combi[['Urals NWE']].dropna()
        K=list(Y.index.values)
        K[0]
        X = combi[['Dated Brent','FO 3.5%','FO 1%']].iloc[K[0]:]
               
        intercept_, coef_ = regressionlinear(X,Y)
        coef_ = coef_.reshape(-1)
        linear_UralN=intercept_+ coef_[0]*DB + coef_[1]*FO35 + coef_[2]*FO1
                
        #Random forest
               
        All = pd.concat([X,Y],axis=1,sort=False)
        train = All.iloc[-100:,:]
        test =All.iloc[:-100,:]
        
        x_train=train[["Dated Brent","FO 3.5%","FO 1%"]]
        y_train=train["Urals NWE"]
        
        x_test=test[["Dated Brent","FO 3.5%","FO 1%"]]
        y_test=test["Urals NWE"].to_frame()
        F=randomforest(x_train,y_train,x_test,y_test)
        

        invar = {'Dated Brent':[DB], 'FO 3.5%':[FO35], 'FO 1%':[FO1]}
        invar_df = pd.DataFrame(invar)
        y_pred = F(invar_df)
        
                
        print("Linear predicted price =", linear_UralN)
        print("Non.linear predicted price =[", nonlinear_UralN,"]")
        print("Random forest price =",y_pred)
        
        
    else:
        print('choose something')   

    return
    





SyntaxError: invalid syntax (<ipython-input-3-5784a3d3f2b8>, line 11)