In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf

In [2]:
# Define the time period and stock symbol
start = dt.datetime(2021, 6, 1)
end = dt.datetime(2022, 1, 5)
symbol = 'ZOMATO.NS'

# Download stock data
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%***********************]  1 of 1 completed


In [3]:
# Selecting specific columns
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [4]:
stk_data 

Price,Open,High,Low,Close
Ticker,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [5]:
# Adjust to your column names
new_headers = ['Open','High','Low','Close'] 
stk_data.columns = new_headers

In [6]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [7]:
stk_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 113 entries, 2021-07-23 to 2022-01-04
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    113 non-null    float64
 1   High    113 non-null    float64
 2   Low     113 non-null    float64
 3   Close   113 non-null    float64
dtypes: float64(4)
memory usage: 4.4 KB


In [8]:
from sklearn.preprocessing import MinMaxScaler
Ms = MinMaxScaler()
data1= Ms.fit_transform(stk_data)
print("Len:",data1.shape)

Len: (113, 4)


In [9]:
data1=pd.DataFrame(data1,columns=["Open Price","High Price","Low Price","Close Price"])

In [10]:
training_size = round(len(data1 ) * 0.80)
print(training_size)
X_train=data1[:training_size]
X_test=data1[training_size:]
print("X_train length:",X_train.shape)
print("X_test length:",X_test.shape)
y_train=data1[:training_size]
y_test=data1[training_size:]
print("y_train length:",y_train.shape)
print("y_test length:",y_test.shape)

90
X_train length: (90, 4)
X_test length: (23, 4)
y_train length: (90, 4)
y_test length: (23, 4)


In [11]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
listt=["Close Price","High Price","Open Price","Low Price"]

In [13]:
listt

['Close Price', 'High Price', 'Open Price', 'Low Price']

In [14]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [15]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import pandas as pd

def combination(dataset, listt):
    print(listt)
    performance = {"Model": [], "RMSE": [], "MaPe": [], "Order": [], "Test": []}
    
    datasetTwo = dataset[listt]
    test_obs = 28
    train = datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    
    best_params = None
    best_model = {}
    best_rmse = float('inf')
    
    # Constant to add to data in case of non-positive values
    constant = 1e-3  # Small constant to shift data

    # Iterate over each column in the dataset
    for column in datasetTwo.columns:
        print(f'Processing column: {column}')
        best_params_column = None
        best_model_column = None
        best_rmse_column = float('inf')
        preds_column = None

        # Check if column contains any non-positive values
        if (train[column] <= 0).any():
            print(f"Applying transformation to column {column} due to non-positive values.")
            train_transformed = train[column] + abs(train[column].min()) + constant
            test_transformed = test[column] + abs(train[column].min()) + constant
        else:
            train_transformed = train[column]
            test_transformed = test[column]
        
        # Try different combinations of trend and seasonal components
        for trend in ['add', 'mul', None]:  # Additive, multiplicative, or no trend
            for seasonal in ['add', 'mul', None]:  # Additive, multiplicative, or no seasonality
                try:
                    # Initialize and fit the HWES model for each column
                    hwes_model = ExponentialSmoothing(
                        train_transformed, 
                        trend=trend, 
                        seasonal=seasonal, 
                        seasonal_periods=12  # Assuming monthly data
                    ).fit(optimized=True)
                    
                    print(f'Trend = {trend}, Seasonal = {seasonal} for column {column}')
                    
                    # Forecasting
                    forecasted_values_transformed = hwes_model.forecast(steps=test_obs)
                    forecasted_values_transformed.to_csv("varforecastedSES_{}.csv".format(test_obs))
                    
                    # If data was transformed, inverse the transformation
                    if (train[column] <= 0).any():
                        forecasted_values = forecasted_values_transformed - (abs(train[column].min()) + constant)
                    else:
                        forecasted_values = forecasted_values_transformed
                    
                    # Calculate RMSE and MAPE for each column
                    rmse = mean_squared_error(test[column], forecasted_values, squared=False)
                    mape = mean_absolute_percentage_error(test[column], forecasted_values)
                    
                    print(f'RMSE: {rmse}')
                    print(f'MAPE: {mape}')
                    
                    # Track the best model for this column based on RMSE
                    if rmse < best_rmse_column:
                        best_rmse_column = rmse
                        best_params_column = (trend, seasonal)
                        best_model_column = hwes_model
                        preds_column = forecasted_values
                
                except Exception as e:
                    print(f"Could not fit HWES (Trend={trend}, Seasonal={seasonal}) for column {column}: {e}")
                    continue
        
        # Save the best model and predictions for each column
        if best_model_column is not None:
            best_model[column] = best_model_column
            
            # Store the results for performance metrics
            performance["Model"].append(f'{column} (HWES)')
            performance["RMSE"].append(best_rmse_column)
            performance["MaPe"].append(mape)
            performance["Order"].append(f"Trend={best_params_column[0]}, Seasonal={best_params_column[1]}")
            performance["Test"].append(test_obs)
    
    # Convert performance to DataFrame
    perf = pd.DataFrame(performance)
    
    return perf, best_model, preds_column

In [16]:
#listt=["AQI_calculated","PM10","PM2.5","NOx","NO2","NO","NH3","SO2","CO",'year']

In [17]:
perf,result,pred=combination(data1,listt) # here p is aiuto regressive , q is moving average

['Close Price', 'High Price', 'Open Price', 'Low Price']
Processing column: Close Price
Applying transformation to column Close Price due to non-positive values.
Trend = add, Seasonal = add for column Close Price
RMSE: 0.8085622030717834
MAPE: 2.786147596098671
Trend = add, Seasonal = mul for column Close Price
RMSE: 0.7952190294415434
MAPE: 2.7911705315073148
Trend = add, Seasonal = None for column Close Price
RMSE: 0.7082705236023601
MAPE: 2.4379459640501073
Trend = mul, Seasonal = add for column Close Price
RMSE: 1.566094359410695
MAPE: 5.1110192006376085
Trend = mul, Seasonal = mul for column Close Price
RMSE: 1.1133491191890186
MAPE: 3.845430241116298
Trend = mul, Seasonal = None for column Close Price
RMSE: 0.7886068259174257
MAPE: 2.6902785430548284
Trend = None, Seasonal = add for column Close Price
RMSE: 0.6441664991871239
MAPE: 2.247820787136251
Trend = None, Seasonal = mul for column Close Price
RMSE: 0.6018277608361207
MAPE: 2.136689515483081
Trend = None, Seasonal = None f



Trend = add, Seasonal = mul for column Open Price
RMSE: 0.41360481752479056
MAPE: 0.8097484607732407
Trend = add, Seasonal = None for column Open Price
RMSE: 0.5134887113368203
MAPE: 1.007225348323127
Trend = mul, Seasonal = add for column Open Price
RMSE: 0.5634392109509316
MAPE: 1.1127167838907464
Trend = mul, Seasonal = mul for column Open Price
RMSE: 0.460975745588938
MAPE: 0.9040965758397838
Trend = mul, Seasonal = None for column Open Price
RMSE: 0.41699531991831956
MAPE: 0.8215608312908225
Trend = None, Seasonal = add for column Open Price
RMSE: 0.4105924277818147
MAPE: 0.8133275886264685
Trend = None, Seasonal = mul for column Open Price
RMSE: 0.3837759869464613
MAPE: 0.7499679933476526
Trend = None, Seasonal = None for column Open Price
RMSE: 0.35442600184552303
MAPE: 0.6995171917653863
Processing column: Low Price
Applying transformation to column Low Price due to non-positive values.
Trend = add, Seasonal = add for column Low Price
RMSE: 0.7042754885916755
MAPE: 1.4149012257



Trend = None, Seasonal = mul for column Low Price
RMSE: 0.5103803754401556
MAPE: 1.0129798105405021
Trend = None, Seasonal = None for column Low Price
RMSE: 0.45360769421453895
MAPE: 0.9186578833422049


In [18]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [19]:
perf

Unnamed: 0,Model,RMSE,MaPe,Order,Test
0,Close Price (HWES),0.548702,1.922939,"Trend=None, Seasonal=None",28
1,High Price (HWES),0.449969,1.535995,"Trend=None, Seasonal=None",28
2,Open Price (HWES),0.354426,0.699517,"Trend=None, Seasonal=None",28
3,Low Price (HWES),0.244976,0.918658,"Trend=mul, Seasonal=None",28
