In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
dateparse = lambda dates: pd.datetime.strptime(dates, '%d-%m-%Y')
from sklearn.metrics import mean_squared_error 
from statsmodels.tools.eval_measures import rmse 
from fbprophet import Prophet
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima import auto_arima

In [53]:
df=pd.read_csv("C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Wallmart_Splitted_Train_Data/S__1/S_1__D_4.csv",parse_dates=['Date'],index_col='Date')

In [4]:
df=df.drop(columns=['Store','Dept','IsHoliday'])

In [5]:
df

Unnamed: 0_level_0,Weekly_Sales
Date,Unnamed: 1_level_1
2010-02-05,39954.04
2010-02-12,35351.21
2010-02-19,36826.95
2010-02-26,34660.16
2010-03-05,38086.19
...,...
2012-09-28,34647.33
2012-10-05,39311.93
2012-10-12,35446.18
2012-10-19,35549.19


In [2]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0, 1))

In [3]:
import itertools
def get_sarima_params(data):
  p = d = q = range(0, 2)
  pdq = list(itertools.product(p, d, q))
  seasonal_pdq = [(x[0], x[1], x[2], 2) for x in list(itertools.product(p, d, q))]
  result_table = pd.DataFrame(columns=['pda','seasonal_pda','aic'])

  for param in pdq:
      for param_seasonal in seasonal_pdq:
          try:
            mod = SARIMAX(data,order=param,seasonal_order=param_seasonal,enforce_stationarity=True,enforce_invertibility=False)
            results = mod.fit()
            result_table = result_table.append({'pda':param, 'seasonal_pda':param_seasonal, 'aic':results.aic},ignore_index=True)
          except:
            continue

  optimal_params = result_table[result_table['aic']==result_table.aic.min()]
  print(optimal_params)
  print(result_table)
  order = optimal_params.pda.values[0]
  seasonal_order = optimal_params.seasonal_pda.values[0]
  return (order,seasonal_order)

In [4]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [7]:
def preprocess_weather(df,scaler):
    df=df[['temperatureMax']]
    scaler=scaler.fit(df)
    df['temperatureMax']=scaler.transform(df)
    return df

In [10]:
def preprocess_grocery(df,scaler):
    df.drop(['id','store_nbr','item_nbr','onpromotion'], axis=1,inplace=True)
    scaler=scaler.fit(df)
    df['unit_sales']=scaler.transform(df)
    return df

In [11]:
def preprocessing_walmart(df,scaler):
    df=df.drop(columns=['Store','Dept','IsHoliday'])
    scaler=scaler.fit(df)
    df['Weekly_Sales']=scaler.transform(df)
    return df
    
    

In [9]:
def x_y_split(df,ratio):
    interval=int(len(df)*ratio)
    train = df[:interval]
    test = df[interval:]
    train.dropna(inplace=True)
    test.dropna(inplace=True)
    return train,test
    
    

In [55]:
df=preprocessing_walmart(df,scaler)

In [61]:
train,test=x_y_split(df,0.5)

In [62]:
len(test)

72

In [25]:
interval=int(len(df)*0.8)

In [26]:
scaler=scaler.fit(df)

In [27]:
train = df[:interval]
test = df[interval:]
train['Weekly_Sales']=scaler.transform(train)
test['Weekly_Sales']=scaler.transform(test)
train.dropna(inplace=True)
test.dropna(inplace=True)
start = len(train) 
end = len(train) + len(test) - 1
print('Train-size', len(train))
print('Tesr-size', len(test))

Train-size 114
Tesr-size 29


In [20]:
warnings.filterwarnings("ignore")
order,seasonal_order = get_sarima_params(train)

          pda  seasonal_pda          aic
46  (1, 0, 1)  (1, 1, 0, 2)  2133.706834
          pda  seasonal_pda          aic
0   (0, 0, 0)  (0, 0, 0, 2)  2722.714421
1   (0, 0, 0)  (0, 0, 1, 2)  2650.047775
2   (0, 0, 0)  (0, 1, 0, 2)  2173.594522
3   (0, 0, 0)  (0, 1, 1, 2)  2152.333570
4   (0, 0, 0)  (1, 0, 0, 2)  2223.205930
..        ...           ...          ...
59  (1, 1, 1)  (0, 1, 1, 2)  2155.972557
60  (1, 1, 1)  (1, 0, 0, 2)  2155.438886
61  (1, 1, 1)  (1, 0, 1, 2)  2153.294815
62  (1, 1, 1)  (1, 1, 0, 2)  2156.826843
63  (1, 1, 1)  (1, 1, 1, 2)  2157.742346

[64 rows x 3 columns]


In [30]:
model = ARIMA(train,order=order)
model_fit = model.fit()
predictions =model_fit.predict(start, end, typ = 'levels').rename("Predictions")
test['ARIMA']=np.array((predictions))

In [36]:
train_prophet=pd.DataFrame()
train_prophet['ds']=train.index
train_prophet['y']=np.array(train['Weekly_Sales'])
m1 = Prophet(weekly_seasonality=True)
m1.fit(train_prophet)
future1 = m1.make_future_dataframe(periods=len(test.index),freq='D')
forecast1 = m1.predict(future1)
pred_prophet=(((forecast1[['yhat']])))
p=pred_prophet.iloc[interval:]
test['PROPHET']=np.array(p)

INFO:numexpr.utils:NumExpr defaulting to 4 threads.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [38]:
#SARIMA
model = SARIMAX(train, order=order,seasonal_order=seasonal_order, enforce_stationarity=False,enforce_invertibility=False)
model_fit = model.fit(disp=False)
# make prediction
yhat= model_fit.predict(start,end)
test['SARIMAX']=np.array(yhat)

In [47]:
fit1 = ExponentialSmoothing(np.asarray(train) ,seasonal_periods=len(test), seasonal='add').fit(optimized=True, use_brute=True)
#y_hat_avg['Holt_Winter'] = fit1.forecast(len(test_data))
predictions_holt_winter = fit1.predict(start, end)
                            
test['HOLT_WINTER']=np.array(predictions_holt_winter)

In [50]:
span = len(test)
alpha = 2/(span+1)
model = SimpleExpSmoothing(train).fit(smoothing_level=alpha)
test_predictions = model.forecast(len(test))
test['SES']=np.array(test_predictions)

In [26]:
import os
import logging, sys
logging.disable(sys.maxsize)
from math import sqrt
scaler = MinMaxScaler(feature_range = (0, 1))
# paths_to_folders = ['C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Wallmart_Splitted_Train_Data/S__5']
paths_to_folders = ['C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Practice/weather']
x=0
error = pd.DataFrame(columns=['File_name', 'ARIMA_RMSE','HOLT_WINTER_RMSE','PROPHET_RMSE','SES_RMSE','SARIMA_RMSE',
                              'ARIMA_MAPE_ERROR','HOLT_WINTER_MAPE_ERROR','PROPHET_MAPE_ERROR','SES_MAPE_ERROR','SARIMA_MAPE_ERROR'])
for folder in paths_to_folders:
   for csv_file in os.listdir(folder):
     df=pd.read_csv("C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Practice/weather/"+csv_file,parse_dates=['time'],index_col='time')
     #print(len(df.index);
     if len(df.index)>=100:
         interval=int(len(df)*0.8)
         df=preprocess_weather(df,scaler)
         train,test=x_y_split(df,0.8)
         start = len(train) 
         end = len(train) + len(test) - 1
         warnings.filterwarnings("ignore")
         order,seasonal_order = get_sarima_params(train)
        
         #ARIMA MODEL
         
         model = ARIMA(train,order=order)
         model_fit = model.fit()
         predictions =model_fit.predict(start, end, typ = 'levels').rename("Predictions")
         test['ARIMA']=np.array((predictions))
            
        
         #Prophet Model  
         train_prophet=pd.DataFrame()
         train_prophet['ds']=train.index
         train_prophet['y']=np.array(train['temperatureMax'])
         m1 = Prophet(weekly_seasonality=True)
         m1.fit(train_prophet)
         future1 = m1.make_future_dataframe(periods=len(test.index),freq='D')
         forecast1 = m1.predict(future1)
         pred_prophet=(((forecast1[['yhat']])))
         p=pred_prophet.iloc[interval:]
         test['PROPHET']=np.array(p)
            
         #SARIMA MODEL
         model = SARIMAX(train, order=order,seasonal_order=seasonal_order, enforce_stationarity=False,enforce_invertibility=False)
         model_fit = model.fit(disp=False)
         # make prediction
         yhat= model_fit.predict(start,end)
         test['SARIMA']=np.array(yhat)

            
         #HOLT WINTER
         fit1 = ExponentialSmoothing(np.asarray(train) ,seasonal_periods=len(test), seasonal='add').fit(optimized=True, use_brute=True)
         #y_hat_avg['Holt_Winter'] = fit1.forecast(len(test_data))
         predictions_holt_winter = fit1.predict(start, end)
         test['HOLT_WINTER']=np.array(predictions_holt_winter)
        
         #SES
         span = len(test)
         alpha = 2/(span+1)
         model = SimpleExpSmoothing(train).fit(smoothing_level=alpha)
         test_predictions = model.forecast(len(test))
         test['SES']=np.array(test_predictions)
        
        
         error.loc[x, ['File_name']]=csv_file
         error.loc[x, ['ARIMA_RMSE']]=rmse(test['temperatureMax'],test['ARIMA'])
         error.loc[x, ['SES_RMSE']]=rmse(test['temperatureMax'],test['SES'])
         error.loc[x, ['SARIMA_RMSE']]=rmse(test['temperatureMax'],test['SARIMA'])
         error.loc[x, ['PROPHET_RMSE']]=rmse(test['temperatureMax'],test['PROPHET'])
         error.loc[x, ['HOLT_WINTER_RMSE']]=rmse(test['temperatureMax'],test['HOLT_WINTER'])
        
        
         error.loc[x, ['ARIMA_MAPE_ERROR']]=mean_absolute_percentage_error(test['temperatureMax'],test['ARIMA'])
         error.loc[x, ['SES_MAPE_ERROR']]=mean_absolute_percentage_error(test['temperatureMax'],test['SES'])
         error.loc[x, ['SARIMA_MAPE_ERROR']]=mean_absolute_percentage_error(test['temperatureMax'],test['SARIMA'])
         error.loc[x, ['PROPHET_MAPE_ERROR']]=mean_absolute_percentage_error(test['temperatureMax'],test['PROPHET'])
         error.loc[x, ['HOLT_WINTER_MAPE_ERROR']]=mean_absolute_percentage_error(test['temperatureMax'],test['HOLT_WINTER'])
    
         x=x+1
         print(csv_file)
         print(x)
         #print(rmse(test["unit_sales"], predictions))
         #print(mean_squared_error(test["unit_sales"], predictions))

          pda  seasonal_pda         aic
21  (0, 1, 0)  (1, 0, 1, 2) -170.782742
          pda  seasonal_pda         aic
0   (0, 0, 0)  (0, 0, 0, 2)  138.699672
1   (0, 0, 0)  (0, 0, 1, 2)   52.262300
2   (0, 0, 0)  (0, 1, 0, 2) -115.333056
3   (0, 0, 0)  (0, 1, 1, 2) -118.679159
4   (0, 0, 0)  (1, 0, 0, 2) -111.138922
..        ...           ...         ...
59  (1, 1, 1)  (0, 1, 1, 2) -153.192418
60  (1, 1, 1)  (1, 0, 0, 2) -167.909490
61  (1, 1, 1)  (1, 0, 1, 2) -168.202776
62  (1, 1, 1)  (1, 1, 0, 2) -126.120100
63  (1, 1, 1)  (1, 1, 1, 2) -151.312560

[64 rows x 3 columns]
Armenia.csv
1


In [27]:
error

Unnamed: 0,File_name,ARIMA_RMSE,HOLT_WINTER_RMSE,PROPHET_RMSE,SES_RMSE,SARIMA_RMSE,ARIMA_MAPE_ERROR,HOLT_WINTER_MAPE_ERROR,PROPHET_MAPE_ERROR,SES_MAPE_ERROR,SARIMA_MAPE_ERROR
0,Armenia.csv,0.167742,0.150409,0.119524,0.132093,0.144902,16.6143,14.2572,15.0944,13.8045,14.8725


In [28]:
error.to_csv('weather_ts.csv')

In [None]:
import os
import logging, sys
logging.disable(sys.maxsize)
from math import sqrt
paths_to_folders = ['C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Wallmart_Splitted_Train_Data/S__5']
x=0
error = pd.DataFrame(columns=['File_name','DNN_RMSE','DNN_MAPE','CNN_RMSE','CNN_MAPE','LSTM_RMSE','LSTM_MAPE'])
for folder in paths_to_folders:
    for csv_file in os.listdir(folder):
        df=pd.read_csv("C:/Users/RAHAT/Downloads/Untitled Folder/Sales_f/data/Wallmart_Splitted_Train_Data/S__5/"+csv_file,parse_dates=['Date'],index_col='Date')
        #print(len(df.index))
        df=df.drop(columns=['Store','Dept','IsHoliday'])
        if len(df.index)>=140:
             i=int(len(df.index)*0.8)
             train = df.iloc[0:i]
             test = df.iloc[i:]
             train_scal=pd.DataFrame(scaler.fit_transform(pd.DataFrame(train['Weekly_Sales'])))
             test_scal=pd.DataFrame(scaler.fit_transform(pd.DataFrame(test['Weekly_Sales'])))
             train_scal=train_scal.replace(0, np.nan)
             train_scal = train_scal.dropna()
             test_scal=test_scal.replace(0, np.nan)
             test_scal = test_scal.dropna()
             
             xtrain,ytrain=convert2matrix(train_scal.values,4)
             xtest,ytest=convert2matrix(test_scal.values,4)
             #DNN MODEL
             model=model_dnn(4)
             history=model.fit(xtrain,ytrain, epochs=60, batch_size=32, verbose=1,callbacks=[EarlyStopping(monitor='val_loss', patience=10)],shuffle=False)
             test_predict = model.predict(xtest)
             
        
             error.loc[x, ['File_name']]=csv_file
             error.loc[x, ['DNN_RMSE']]=np.sqrt(mean_squared_error(ytest,test_predict))
             error.loc[x, ['DNN_MAPE']]=mean_absolute_percentage_error(ytest,test_predict)
             
             xtrain=xtrain.reshape(xtrain.shape[0],xtrain.shape[1],1)
             xtest=xtest.reshape(xtest.shape[0],xtest.shape[1],1)

             #CNN MODEL
             model = Sequential()
             model.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(xtrain.shape[1], 1)))
             model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
             model.add(Flatten())
             model.add(Dense(100, activation='relu'))
             model.add(Dense(1))
             model.compile(optimizer='adam', loss='mse')
             # fit model
             model.fit(xtrain, ytrain, epochs=60, verbose=1)
             cnn_preds = model.predict(xtest)
             

             error.loc[x, ['CNN_RMSE']]=np.sqrt(mean_squared_error(ytest,cnn_preds))
             error.loc[x, ['CNN_MAPE']]=mean_absolute_percentage_error(ytest,cnn_preds)
                
             
             #LSTM_MODEL
             model = Sequential()
             model.add(LSTM(100, return_sequences=True, input_shape=(xtrain.shape[1], xtrain.shape[2])))
             model.add(Dropout(0.2))
             model.add(LSTM(50))
             model.add(Dropout(0.1))
             model.add(Dense(1))
             model.compile(loss='mean_squared_error', optimizer='adam')
             # fit network
             model.fit(xtrain, ytrain, epochs=60, batch_size=32,  verbose=1, shuffle=False)
             lstm_preds = model.predict(xtest)

             error.loc[x, ['LSTM_RMSE']]=np.sqrt(mean_squared_error(ytest,lstm_preds))
             error.loc[x, ['LSTM_MAPE']]=mean_absolute_percentage_error(ytest,lstm_preds)

             x=x+1
             print(csv_file+"\n")
             print(x)
             #print(rmse(test["unit_sales"], predictions))
             #print(mean_squared_error(test["unit_sales"], predictions))