In [5]:
import numpy as np
import pandas as pd
import pyflux as pf
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.arima_model import ARMAResults
import matplotlib.pyplot as plt


def convert(dataframe):
    l = []
    for i in dataframe:
        c = str(i).split("..")
        f = ""
        if(c[0][0] == "."):
            f += c[0][1:]
        else:
            f += c[0]
        if(len(c)>1):
            f+="."+c[1][:-1]
        l.append(float(f))
    return l

data = pd.read_csv("/home/nishant/Downloads/merge_oil (1).csv")
data = data.iloc[::-1]
data.index = pd.to_datetime(data["Date"],format="%Y-%m-%d")
data = data.drop(['Date'],axis=1)

#splitting the series into training and testing
split = int(0.8 * len(data))
training_data = data[:split]
test_data = data[split:]

In [6]:


print("ADF Test p value outputs for crude oil and crude oil parameters before differencing")
print(adfuller(training_data["Dollar_eq"])[1])
print(adfuller(training_data["Price"])[1])
print(adfuller(training_data["US_Price"])[1])


#perform adf test on each variable
print("Following ADF Test p value outputs are in the order of Open, High, Low, Close\n")
print("P values of IOC Variables before differencing")
print(adfuller(training_data["IOC_Open"])[1])
print(adfuller(training_data["IOC_High"])[1])
print(adfuller(training_data["IOC_Low"])[1])
print(adfuller(training_data["IOC_Close"])[1])
print("\n")
print("P values of TCI Variables before differencing")
print(adfuller(training_data["TCI_Open"])[1])
print(adfuller(training_data["TCI_High"])[1])
print(adfuller(training_data["TCI_Low"])[1])
print(adfuller(training_data["TCI_Close"])[1])
print("\n")
print("P values of ONGC Variables before differencing")
print(adfuller(training_data["ONGC_Open"])[1])
print(adfuller(training_data["ONGC_High"])[1])
print(adfuller(training_data["ONGC_Low"])[1])
print(adfuller(training_data["ONGC_Close"])[1])
print("\n")



#difference the variables which have p>=0.05 from adf test
diff_ioc_open = training_data["IOC_Open"].diff()[1:]
diff_ioc_high = training_data["IOC_High"].diff()[1:]
diff_ioc_low = training_data["IOC_Low"].diff()[1:]
diff_ioc_close = training_data["IOC_Close"].diff()[1:]


diff_us_price = training_data["US_Price"].diff()[1:]
diff_price = training_data["Price"].diff()[1:]
diff_dollar_eq = training_data["Dollar_eq"].diff()[1:]

diff_ongc_open = training_data["ONGC_Open"].diff()[1:]
diff_ongc_high = training_data["ONGC_High"].diff()[1:]
diff_ongc_low = training_data["ONGC_Low"].diff()[1:]
diff_ongc_close = training_data["ONGC_Close"].diff()[1:]

diff_tci_open = training_data["TCI_Open"].diff()[1:]
diff_tci_high = training_data["TCI_High"].diff()[1:]
diff_tci_low = training_data["TCI_Low"].diff()[1:]
diff_tci_close = training_data["TCI_Close"].diff()[1:]


#check if the variables differenced are now stationary
print("P values of IOC Variables after differencing")
print(adfuller(diff_ioc_open)[1])
print(adfuller(diff_ioc_high)[1])
print(adfuller(diff_ioc_low)[1])
print(adfuller(diff_ioc_close)[1])
print("\n")


print("P values of ONGC Variables after differencing")
print(adfuller(diff_ongc_open)[1])
print(adfuller(diff_ongc_high)[1])
print(adfuller(diff_ongc_low)[1])
print(adfuller(diff_ongc_close)[1])
print("\n")

print("P values of TCI Variables after differencing")
print(adfuller(diff_tci_open)[1])
print(adfuller(diff_tci_high)[1])
print(adfuller(diff_tci_low)[1])
print(adfuller(diff_tci_close)[1])
print("\n")

print("ADF Test p value outputs for crude oil and crude oil parameters after differencing")
print(adfuller(diff_us_price)[1])
print(adfuller(diff_price)[1])
print(adfuller(diff_dollar_eq)[1])








'''
IOC_Open
0.8002719134424383
IOC_High
0.7929649906307306
IOC_Low
0.8029500063118031
IOC_Close
0.8039473147230245
ONGC_Open
0.2742350667014256
ONGC_High
0.2469060046476632
ONGC_Low
0.28255038008874644
ONGC_Close
0.2690677416186882
TCI_Open
0.6197477341099538
TCI_High
0.5681508146555672
TCI_Low
0.6252814709902863
TCI_Close
0.6522010507098521
'''


ADF Test p value outputs for crude oil and crude oil parameters before differencing
0.44991044564964633
0.7045981675734824
0.7599836224588259
Following ADF Test p value outputs are in the order of Open, High, Low, Close

P values of IOC Variables before differencing
0.9990410289044603
0.9986904991334106
0.9986961848979752
0.9988699031482736


P values of TCI Variables before differencing
0.5614986355311959
0.5518971449077584
0.6026590896103907
0.6280993689484832


P values of ONGC Variables before differencing
0.3783545786701888
0.3495948423989612
0.3741751132910967
0.4057938385530423


P values of IOC Variables after differencing
3.0931979084324334e-15
0.0
3.9676269271623017e-22
0.0


P values of ONGC Variables after differencing
6.907525780730542e-14
0.0
0.0
0.0


P values of TCI Variables after differencing
5.6951097480746595e-18
0.0
0.0
0.0


ADF Test p value outputs for crude oil and crude oil parameters after differencing
2.8377346945600307e-23
1.052502151413111e-22
2.91800229539

'\nIOC_Open\n0.8002719134424383\nIOC_High\n0.7929649906307306\nIOC_Low\n0.8029500063118031\nIOC_Close\n0.8039473147230245\nONGC_Open\n0.2742350667014256\nONGC_High\n0.2469060046476632\nONGC_Low\n0.28255038008874644\nONGC_Close\n0.2690677416186882\nTCI_Open\n0.6197477341099538\nTCI_High\n0.5681508146555672\nTCI_Low\n0.6252814709902863\nTCI_Close\n0.6522010507098521\n'

In [7]:
ex_ongc = pd.concat([diff_ongc_open,diff_ongc_high,diff_ongc_low,diff_price,diff_dollar_eq,diff_us_price],axis=1)
print(ex_ongc)
ex_tci = pd.concat([diff_tci_open,diff_tci_high,diff_tci_low,diff_price,diff_dollar_eq,diff_us_price],axis=1)
print(ex_tci)
ex_ioc = pd.concat([diff_ioc_open,diff_ioc_high,diff_ioc_low,diff_price,diff_dollar_eq,diff_us_price],axis=1)
print(ex_ioc)

            ONGC_Open  ONGC_High   ONGC_Low  Price  Dollar_eq  US_Price
Date                                                                   
2011-12-13  -3.932999  -5.033005  -1.965989  139.0      0.495      2.37
2011-12-14  -1.199997  -0.867004  -1.034011 -133.0      0.510     -5.19
2011-12-15  -3.000000  -2.165986  -2.000000 -173.0     -0.090     -1.08
2011-12-16   0.732987   3.799988  -1.232987 -133.0     -1.020     -0.34
2011-12-19  -0.532990  -6.099991  -0.932999   80.0      0.200      0.35
2011-12-20   0.733002   3.099991   2.665986  170.0      0.070      3.34
2011-12-21   3.067001   5.066010   3.267013   61.0     -0.370      1.45
2011-12-22   0.000000   1.833999   1.567001   54.0      0.165      0.86
2011-12-23   7.332993   1.132996   2.500000   32.0      0.225      0.15
2011-12-27  -2.932999  -2.000000   0.299988   74.0      0.130      1.66
2011-12-28   0.132996  -0.399994  -0.333999  -49.0     -0.030     -1.98
2011-12-29  -0.932999  -0.367004  -2.332993  -21.0      0.115   

In [9]:
#find p and q for ARMAX

#pacf for p
#plot_pacf(diff_ongc_close,lags=100) 
#plot_acf(diff_ongc_close,lags=100) 


#plot_pacf(diff_ioc_close,lags=100) 
#plot_acf(diff_ioc_close,lags=100) 

#plot_pacf(diff_tci_close,lags=100) 
#plot_acf(diff_tci_close,lags=100) 
 


#           FORECAST ONGC

'''for i in range(1,6):
    for j in range(1,6):
        try:
            model = ARIMA(diff_ongc_close,order=(i,0,j),exog=ex_ongc)
            model_fit = model.fit(disp=0)
            print(str(i)+" and "+str(j))
            print(model_fit.aic)
            print(model_fit.bic)
            print("\n")
        except:
            print("Invalid p and q")'''


'''
model = ARIMA(diff_ongc_close,order=(3,0,3),exog=ex_ongc)
model_fit = model.fit(disp=0)

test_exog = pd.concat([test_data["ONGC_Open"],test_data["ONGC_High"],test_data["ONGC_Low"],test_data["US_Price"],test_data["Dollar_eq"],test_data["Price"]],axis=1)

forecast = model_fit.forecast(steps=len(test_data),exog=test_exog)[0]
f = []
for yhat in forecast:
    f.append(yhat)

    
    
#print(history)

rms = sqrt(mean_squared_error(list(test_data["ONGC_Close"]), f))
predicted_ongc = pd.DataFrame({'Predicted':f})
predicted_ongc.index = test_data.index
fig = plt.figure()
fig = plt.figure(figsize=(10,5))
ax  = fig.add_subplot(111)
plt.plot(test_data["ONGC_Close"],label='Actual')
plt.plot(predicted_ongc,label='Predicted')
plt.xlabel("Time")
plt.ylabel("ONGC Closing Price")
plt.legend(loc='upper left')
plt.title("ONGC Closing Prices with Crude Oil and its Parameters")
fig.savefig('ONGC2.png')

'''


#           FORECAST IOC

'''for i in range(1,6):
    for j in range(1,6):
        try:
            model = ARIMA(diff_ioc_close,order=(i,0,j),exog=ex_ioc)
            model_fit = model.fit(disp=0)
            print(str(i)+" and "+str(j))
            print(model_fit.aic)
            print(model_fit.bic)
            print("\n")
        except:
            print("Invalid p and q")'''
'''
model = ARIMA(diff_ioc_close,order=(3,0,3),exog=ex_ioc)
model_fit = model.fit(disp=0)

test_exog = pd.concat([test_data["IOC_Open"],test_data["IOC_High"],test_data["IOC_Low"],test_data["US_Price"],test_data["Dollar_eq"],test_data["Price"]],axis=1)

forecast = model_fit.forecast(steps=len(test_data),exog=test_exog)[0]
f = []
for yhat in forecast:
    f.append(yhat)


    
    

rms = sqrt(mean_squared_error(list(test_data["IOC_Close"]), f))
predicted_ioc = pd.DataFrame({'Predicted':f})
predicted_ioc.index = test_data.index
fig = plt.figure()
fig = plt.figure(figsize=(10,5))
ax  = fig.add_subplot(111)
plt.plot(test_data["IOC_Close"],label='Actual')
plt.plot(predicted_ioc,label='Predicted')
plt.xlabel("Time")
plt.ylabel("IOC Closing Price")
plt.legend(loc='upper left')
plt.title("IOC Closing Prices with Crude Oil and its parameters")
fig.savefig('IOC2.png')
'''

#           FORECAST TCI

'''for i in range(1,6):
    for j in range(1,6):
        try:
            model = ARIMA(diff_tci_close,order=(i,0,j),exog=ex_tci)
            model_fit = model.fit(disp=0)
            print(str(i)+" and "+str(j))
            print(model_fit.aic)
            print(model_fit.bic)
            print("\n")
        except:
            print("Invalid p and q")'''

model = ARIMA(diff_tci_close,order=(4,0,4),exog=ex_tci)
model_fit = model.fit(disp=0)

test_exog = pd.concat([test_data["TCI_Open"],test_data["TCI_High"],test_data["TCI_Low"],test_data["US_Price"],test_data["Dollar_eq"],test_data["Price"]],axis=1)

forecast = model_fit.forecast(steps=len(test_data),exog=test_exog)[0]
f = []
for yhat in forecast:
    f.append(yhat)


    
    
#print(history)

rms = sqrt(mean_squared_error(list(test_data["TCI_Close"]), f))
predicted_tci = pd.DataFrame({'Predicted':f})
predicted_tci.index = test_data.index
fig = plt.figure()
fig = plt.figure(figsize=(10,5))
ax  = fig.add_subplot(111)
plt.plot(test_data["TCI_Close"],label='Actual')
plt.plot(predicted_tci,label='Predicted')
plt.xlabel("Time")
plt.ylabel("TCI Closing Price")
plt.legend(loc='upper left')
plt.title("TCI Closing Prices with Crude Oil and its parameters ")
fig.savefig('TCI2.png')


#print(list(predicted_ioc["Predicted"]))








KeyboardInterrupt: 

In [24]:
#print(list(predicted_ioc["Predicted"]))
#print(list(test_data["IOC_Close"]))
#print(list(predicted_tci["Predicted"]))
#print(list(test_data["TCI_Close"]))

#print(predicted_tci.plot())
#print(test_data["TCI_Close"].plot())
#print(list(predicted_ongc["Predicted"]))
#print(list(test_data["ONGC_Close"]))

[212.125, 215.274994, 219.475006, 213.024994, 209.625, 209.774994, 214.725006, 206.925003, 207.399994, 211.024994, 212.300003, 213.675003, 209.375, 210.024994, 211.824997, 209.875, 210.350006, 203.524994, 202.300003, 203.600006, 205.850006, 204.125, 196.75, 191.699997, 195.199997, 193.574997, 193.425003, 192.600006, 193.449997, 190.925003, 192.850006, 190.800003, 191.125, 191.725006, 193.75, 196.300003, 190.475006, 186.550003, 186.774994, 186.074997, 187.574997, 187.274994, 190.449997, 189.074997, 187.399994, 186.824997, 184.449997, 183.899994, 183.824997, 185.175003, 185.050003, 193.524994, 209.475006, 215.600006, 206.625, 207.024994, 205.149994, 205.899994, 209.725006, 211.149994, 213.149994, 213.675003, 206.600006, 211.600006, 213.425003, 214.100006, 220.350006, 217.25, 226.425003, 227.350006, 224.149994, 214.524994, 215.925003, 214.125, 216.100006, 214.574997, 214.524994, 217.449997, 207.975006, 209.850006, 207.824997, 207.824997, 209.149994, 206.975006, 206.625, 204.199997, 199.5,