<a href="https://colab.research.google.com/github/prachikane/final_year_project/blob/main/ARIMA_DAY_WISE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ARIMA Model**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import numpy as np
from google.colab import drive
from sklearn.metrics import mean_squared_error

  import pandas.util.testing as tm


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

## **Reading Data** **and**  **Data Preprocessing**

In [None]:
df=pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/TCS.csv')

In [None]:
df=df[['Close','Date']]

In [None]:
df.to_csv('New_TCS')
df.head()

In [None]:
df.dtypes

In [None]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')

In [None]:
data = pd.read_csv('/content/New_TCS', parse_dates=['Date'], index_col='Date',date_parser=dateparse)

In [None]:
data.head()

In [None]:
data=data[['Close']]
data.head()

In [None]:
plt.plot(data)

In [None]:
data.size

### **Checking Stationarity**

In [None]:
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(200).mean()
    rolstd = timeseries.rolling(200).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)

    #Perform Dickey-Fuller test:
    print ('Results of Dickey-Fuller Test:')
    dicky_fuller_test(timeseries,200,timeseries.size)

In [None]:
def dicky_fuller_test(timeseries,l,r):
  X=timeseries["Close"]
  #print(X.size)
  if r<X.size :
    result=adfuller(X[l:r])
  else:
    result=adfuller(X[l:X.size])
    r=X.size
  print('\n\nFor range %d,%d ' %(l,r))
  print('ADF statistic: %f' % result[0])
  print('p-value: %f' % result[1])
  print('Critical Values: ')
  for key,value in result[4].items():
    print('\t%s: %.3f' %(key,value))
  
  if result[0]<result[4]['5%']:
    print ('Time Series is Stationary')
  else:
    print ('Time Series is not Stationary')
    

In [None]:
test_stationarity(data)

## **Making the Data Stationary**

### **Applying Log**

In [None]:
#trying to make data stationary by applying log
ts_log=np.log(data)
plt.plot(ts_log)

In [None]:
rolmean = ts_log.rolling(10).mean()
plt.plot(ts_log)
plt.plot(rolmean, color='red')
diff_ts_log_rolmean=ts_log-rolmean
diff_ts_log_rolmean.head()

In [None]:
diff_ts_log_rolmean.dropna(inplace=True)
test_stationarity(diff_ts_log_rolmean)

### **Applying Exponentially Weighted Moving Average(EWMA)**

In [None]:
##trying exponentially weighted moving average 
expweighted_avg=ts_log.ewm(halflife=1).mean()
plt.plot(ts_log)
plt.plot(expweighted_avg, color='red')

In [None]:
diff_ts_log_exp=ts_log-expweighted_avg
diff_ts_log_exp.head()

In [None]:
test_stationarity(diff_ts_log_exp)

### **Performing difference**

In [None]:
ts_log_diff = ts_log - ts_log.shift()
plt.plot(ts_log_diff)

In [None]:
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)

## **Removing Seasonality and Trend**

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
decomposition = seasonal_decompose(ts_log,freq=10)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()

In [None]:
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

## **Determining the hyperparameter p,d,q**

In [None]:
#ACF and PACF plots:
#auto correlation graph and partial correlational graph for finding p and q

from statsmodels.tsa.stattools import acf, pacf
lag_acf = acf(ts_log_diff[0:500], nlags=50)
lag_pacf = pacf(ts_log_diff[0:500], nlags=50, method='ols')

In [None]:
#Plot PACF:

plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff[0:500])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff[0:500])),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
#p=1(approx)

In [None]:
#Plot ACF: 

plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff[0:500])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff[0:500])),linestyle='--',color='gray')
plt.title('Autocorrelation Function')
#q=1(approx)

## **Training ARIMA Model**

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:

model = ARIMA(ts_log, order=(1, 2, 1))  
results_ARIMA = model.fit(disp=-1)  
plt.plot(ts_log_diff)
plt.plot(results_ARIMA.fittedvalues, color='red')
#plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff[0:500])**2))

In [None]:
plt.plot(data)

## **Bringing the data back to non-stationary** 

In [None]:
predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True)
#print predictions_ARIMA_diff.head()
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
#print predictions_ARIMA_diff_cumsum.head()
predictions_ARIMA_log = pd.Series(ts_log.iloc[:,0], index=ts_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0)
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(data, color='blue')
plt.plot(predictions_ARIMA,color="orange")

In [None]:
from math import sqrt
from sklearn.metrics import mean_squared_error

rms = sqrt(mean_squared_error(data,predictions_ARIMA))
print(rms)