In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf

In [2]:
# Define the time period and stock symbol
start = dt.datetime(2021, 6, 1)
end = dt.datetime(2022, 1, 5)
symbol = 'ZOMATO.NS'

# Download stock data
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%***********************]  1 of 1 completed


In [3]:
# Selecting specific columns
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [4]:
stk_data 

Price,Open,High,Low,Close
Ticker,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [5]:
# Adjust to your column names
new_headers = ['Open','High','Low','Close'] 
stk_data.columns = new_headers

In [6]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [7]:
stk_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 113 entries, 2021-07-23 to 2022-01-04
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    113 non-null    float64
 1   High    113 non-null    float64
 2   Low     113 non-null    float64
 3   Close   113 non-null    float64
dtypes: float64(4)
memory usage: 4.4 KB


In [8]:
from sklearn.preprocessing import MinMaxScaler
Ms = MinMaxScaler()
data1= Ms.fit_transform(stk_data)
print("Len:",data1.shape)

Len: (113, 4)


In [9]:
data1=pd.DataFrame(data1,columns=["Open Price","High Price","Low Price","Close Price"])

In [10]:
training_size = round(len(data1 ) * 0.80)
print(training_size)
X_train=data1[:training_size]
X_test=data1[training_size:]
print("X_train length:",X_train.shape)
print("X_test length:",X_test.shape)
y_train=data1[:training_size]
y_test=data1[training_size:]
print("y_train length:",y_train.shape)
print("y_test length:",y_test.shape)

90
X_train length: (90, 4)
X_test length: (23, 4)
y_train length: (90, 4)
y_test length: (23, 4)


In [11]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
listt=["Close Price","High Price","Open Price","Low Price"]

In [13]:
listt

['Close Price', 'High Price', 'Open Price', 'Low Price']

In [14]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [15]:
def combination(dataset,listt):
    print(listt)
    performance={"Model":[],"RMSE":[],"MaPe":[],"Order":[],"Test":[]}
    datasetTwo=dataset[listt]
    test_obs = 28
    train =datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    best_aic = float('inf')
    from statsmodels.tsa.api import VARMAX
    
    best_order = None
    best_model = None
    
    for p in range(1, 3):  # Autoregressive part
        for q in range(1, 3):  # Moving average part
            try:
                # Initialize and fit the VARMA model
                model = VARMAX(train, order=(p, q))
                results = model.fit(disp=False)
                
                # Print the order and AIC for each model
                print(f'Order = (p={p}, q={q})')
                print('AIC: ', results.aic)
                print('BIC: ', results.bic)
                
                # Track the best model based on AIC
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (p, q)
                    best_model = results
            except Exception as e:
                print(f"Could not fit VARMA(p={p}, q={q}): {e}")
                continue
    
    # Use the best model to make predictions
    if best_model is not None:
        lagged_Values = train.values[-best_order[0]:]  # Use the AR part of the best order
        pforecast = results.get_forecast(steps=test_obs)
        forecasted_values = pforecast.predicted_mean
        preds = pd.DataFrame(forecasted_values, columns=listt)
    
        preds.to_csv("varma_forecasted_{}.csv".format(test_obs), index=False)
        from sklearn.metrics import mean_squared_error
        rmse= round(mean_squared_error(test,preds,squared=False))
        from sklearn.metrics import mean_absolute_percentage_error
        mape=mean_absolute_percentage_error(test,preds)
        performance["Model"].append(listt)
        performance["RMSE"].append(rmse)
        performance["MaPe"].append(mape)
        performance["Order"].append(best_order)
        performance["Test"].append(test_obs)
        perf=pd.DataFrame(performance)
    return perf,results,preds

In [16]:
#listt=["AQI_calculated","PM10","PM2.5","NOx","NO2","NO","NH3","SO2","CO",'year']

In [17]:
perf,result,pred=combination(data1,listt) # here p is aiuto regressive , q is moving average

['Close Price', 'High Price', 'Open Price', 'Low Price']


  warn('Estimation of VARMA(p,q) models is not generically robust,'


Order = (p=1, q=1)
AIC:  -862.822346199606
BIC:  -750.4603884010514


  warn('Estimation of VARMA(p,q) models is not generically robust,'


Order = (p=1, q=2)
AIC:  -852.7650284950274
BIC:  -701.3206505926278


  warn('Estimation of VARMA(p,q) models is not generically robust,'


Order = (p=2, q=1)
AIC:  2127.7875491500936
BIC:  2279.2319270524936


  warn('Estimation of VARMA(p,q) models is not generically robust,'


Order = (p=2, q=2)
AIC:  -830.5992221121172
BIC:  -640.0724241058725


In [18]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [19]:
perf

Unnamed: 0,Model,RMSE,MaPe,Order,Test
0,"[Close Price, High Price, Open Price, Low Price]",0,0.42824,"(1, 1)",28
