In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf

In [10]:
# Define the time period and stock symbol
start = dt.datetime(2021, 6, 1)
end = dt.datetime(2022, 1, 5)
symbol = 'ZOMATO.NS'

# Download stock data
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%***********************]  1 of 1 completed


In [11]:
# Selecting specific columns
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [12]:
stk_data 

Price,Open,High,Low,Close
Ticker,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS,ZOMATO.NS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [13]:
# Adjust to your column names
new_headers = ['Open','High','Low','Close'] 
stk_data.columns = new_headers

In [14]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-23,116.000000,138.899994,115.000000,126.000000
2021-07-26,126.349998,143.750000,125.300003,140.649994
2021-07-27,141.699997,147.800003,127.750000,132.899994
2021-07-28,131.000000,135.000000,123.550003,131.199997
2021-07-29,134.949997,144.000000,132.199997,141.550003
...,...,...,...,...
2021-12-29,132.699997,137.699997,131.550003,137.100006
2021-12-30,137.050003,137.050003,133.300003,133.949997
2021-12-31,133.850006,138.399994,133.399994,137.399994
2022-01-03,139.699997,142.449997,138.300003,141.350006


In [15]:
stk_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 113 entries, 2021-07-23 to 2022-01-04
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    113 non-null    float64
 1   High    113 non-null    float64
 2   Low     113 non-null    float64
 3   Close   113 non-null    float64
dtypes: float64(4)
memory usage: 4.4 KB


In [16]:
from sklearn.preprocessing import MinMaxScaler
Ms = MinMaxScaler()
data1= Ms.fit_transform(stk_data)
print("Len:",data1.shape)

Len: (113, 4)


In [17]:
data1=pd.DataFrame(data1,columns=["Open Price","High Price","Low Price","Close Price"])

In [18]:
training_size = round(len(data1 ) * 0.80)
print(training_size)
X_train=data1[:training_size]
X_test=data1[training_size:]
print("X_train length:",X_train.shape)
print("X_test length:",X_test.shape)
y_train=data1[:training_size]
y_test=data1[training_size:]
print("y_train length:",y_train.shape)
print("y_test length:",y_test.shape)

90
X_train length: (90, 4)
X_test length: (23, 4)
y_train length: (90, 4)
y_test length: (23, 4)


In [19]:
import warnings
warnings.filterwarnings("ignore")

In [20]:
listt=["Close Price","High Price","Open Price","Low Price"]

In [21]:
listt

['Close Price', 'High Price', 'Open Price', 'Low Price']

In [22]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [23]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import pandas as pd

def combination(dataset, listt):
    print(listt)
    performance = {"Model": [], "RMSE": [], "MaPe": [], "Order": [], "Test": []}
    
    datasetTwo = dataset[listt]
    test_obs = 28
    train = datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    
    best_alpha = None
    best_model = {}
    best_rmse = float('inf')
    
    # Iterate over each column in the dataset
    for column in datasetTwo.columns:
        print(f'Processing column: {column}')
        best_alpha_column = None
        best_model_column = None
        best_rmse_column = float('inf')
        preds_column = None
        
        # Try different smoothing levels (alpha) to find the best
        for alpha in [0.1, 0.2, 0.5, 0.8]:
            try:
                # Initialize and fit the SES model for each column
                ses_model = SimpleExpSmoothing(train[column]).fit(smoothing_level=alpha, optimized=False)
                print(f'Alpha = {alpha} for column {column}')
                
                # Forecasting
                forecasted_values = ses_model.forecast(steps=test_obs)
                forecasted_values.to_csv("varforecastedSES_{}.csv".format(test_obs))
                # Calculate RMSE and MAPE for each column
                rmse = mean_squared_error(test[column], forecasted_values, squared=False)
                mape = mean_absolute_percentage_error(test[column], forecasted_values)
                
                print(f'RMSE: {rmse}')
                print(f'MAPE: {mape}')
                
                # Track the best model for this column based on RMSE
                if rmse < best_rmse_column:
                    best_rmse_column = rmse
                    best_alpha_column = alpha
                    best_model_column = ses_model
                    preds_column = forecasted_values
                
            except Exception as e:
                print(f"Could not fit SES(alpha={alpha}) for column {column}: {e}")
                continue
        
        # Save the best model and predictions for each column
        if best_model_column is not None:
            best_model[column] = best_model_column
            
            # Store the results for performance metrics
            performance["Model"].append(f'{column} (SES)')
            performance["RMSE"].append(best_rmse_column)
            performance["MaPe"].append(mape)
            performance["Order"].append(f"alpha={best_alpha_column}")
            performance["Test"].append(test_obs)
    
    # Convert performance to DataFrame
    perf = pd.DataFrame(performance)
    
    return perf, best_model, preds_column


In [24]:
#listt=["AQI_calculated","PM10","PM2.5","NOx","NO2","NO","NH3","SO2","CO",'year']


In [25]:
perf,result,pred=combination(data1,listt) # here p is aiuto regressive , q is moving average

['Close Price', 'High Price', 'Open Price', 'Low Price']
Processing column: Close Price
Alpha = 0.1 for column Close Price
RMSE: 0.27363511235684185
MAPE: 1.0010892222561945
Alpha = 0.2 for column Close Price
RMSE: 0.38897369104015267
MAPE: 1.3900588828867824
Alpha = 0.5 for column Close Price
RMSE: 0.48907679253494807
MAPE: 1.7277581592757294
Alpha = 0.8 for column Close Price
RMSE: 0.547163643575856
MAPE: 1.9179411346085686
Processing column: High Price
Alpha = 0.1 for column High Price
RMSE: 0.2473331491779577
MAPE: 0.8493631740202255
Alpha = 0.2 for column High Price
RMSE: 0.3430518671632497
MAPE: 1.1748990767086394
Alpha = 0.5 for column High Price
RMSE: 0.3952822041776801
MAPE: 1.3481126503191858
Alpha = 0.8 for column High Price
RMSE: 0.4260058276961681
MAPE: 1.4543251756460216
Processing column: Open Price
Alpha = 0.1 for column Open Price
RMSE: 0.2022424804200604
MAPE: 0.38460382669745
Alpha = 0.2 for column Open Price
RMSE: 0.27959835742683337
MAPE: 0.5407619679902096
Alpha =

In [26]:
data1

Unnamed: 0,Open Price,High Price,Low Price,Close Price
0,0.000000,0.280764,0.000000,0.048544
1,0.229236,0.396655,0.262420,0.454924
2,0.569214,0.493429,0.324841,0.239944
3,0.332226,0.187575,0.217834,0.192788
4,0.419712,0.402628,0.438216,0.479889
...,...,...,...,...
108,0.369878,0.252091,0.421656,0.356450
109,0.466224,0.236559,0.466242,0.269071
110,0.395349,0.268817,0.468790,0.364771
111,0.524917,0.365591,0.593631,0.474341


In [27]:
perf

Unnamed: 0,Model,RMSE,MaPe,Order,Test
0,Close Price (SES),0.273635,1.917941,alpha=0.1,28
1,High Price (SES),0.247333,1.454325,alpha=0.1,28
2,Open Price (SES),0.202242,0.699511,alpha=0.1,28
3,Low Price (SES),0.203657,0.870444,alpha=0.1,28
