In [38]:
import pandas as pd

# Assuming 'gspc.csv' is in the current working directory or you provide the correct path
df = pd.read_csv('Nikkei_225.csv')

# You can now work with the DataFrame 'df'
print(df.head())  # Print the first few rows of the DataFrame


         Date        Close         High          Low         Open  Volume
0  01-01-1985  11992.30957  11992.30957  11543.00000  11543.00000       0
1  01-02-1985  12321.91992  12321.91992  11823.42969  11946.95996       0
2  01-03-1985  12580.75977  12604.01953  12263.84961  12412.13965       0
3  01-04-1985  12426.29004  12683.25977  12052.82031  12677.15039       0
4  01-05-1985  12758.45996  12790.26953  12358.03027  12456.65039       0


In [39]:
# Calculate the index for splitting the data
split_index = int(len(df) * 0.7)

# Split the data into training and testing sets
train_data = df['Close'][:split_index]
test_data = df['Close'][split_index:]

print(f"Training data length: {len(train_data)}")
print(f"Testing data length: {len(test_data)}")


Training data length: 282
Testing data length: 122


In [40]:


from statsmodels.tsa.arima.model import ARIMA
import warnings


model = ARIMA(train_data, order=(5, 1, 0 ))  # Example order (p, d, q)
model_fit = model.fit()

# # Print model summary (optional)
print(model_fit.summary())

# # Make predictions on the training data
predictions = model_fit.predict(start=0, end=len(train_data)-1)



                               SARIMAX Results                                
Dep. Variable:                  Close   No. Observations:                  282
Model:                 ARIMA(5, 1, 0)   Log Likelihood               -2382.031
Date:                Sat, 22 Mar 2025   AIC                           4776.063
Time:                        16:03:18   BIC                           4797.893
Sample:                             0   HQIC                          4784.818
                                - 282                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0384      0.042      0.908      0.364      -0.045       0.121
ar.L2          0.0229      0.042      0.547      0.584      -0.059       0.105
ar.L3         -0.0365      0.046     -0.796      0.4

In [41]:


import numpy as np
# # Calculate RMSE
rmse = np.sqrt(np.mean((predictions - train_data)**2))
print(f"RMSE: {rmse}")


RMSE: 1362.0529761332705


In [42]:
from statsmodels.tsa.arima.model import ARIMA
import numpy as np

history = np.array(train_data).copy()  # Ensure a NumPy array
test_data = np.array(test_data)
predictions = []

for i in range(len(test_data)):
    model = ARIMA(history, order=(5, 1, 0))
    model_fit = model.fit()
    hat = model_fit.forecast(steps=1)[0] # Extract scalar value
    # print(hat)
    predictions.append(hat)

    observed = test_data[i]
    history = np.concatenate((history, np.array([observed])))  # Append new value

# Convert predictions list to NumPy array before computing RMSE
predictions = np.array(predictions)
rmse = np.sqrt(np.mean((predictions - test_data) ** 2))
print("RMSE:", rmse)




RMSE: 750.2056154842833
