In [1]:
import numpy as np
import pandas as pd
from math import *
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib as mpl

from statsmodels.tsa.exponential_smoothing.ets import ETSModel
from statsmodels.tsa.arima_model import ARIMA

In [2]:
# This sets some nicer defaults for plotting.
# This must be run in a separate cell from importing matplotlib due to a bug.
params = {'legend.fontsize': 'large',
          'figure.figsize': (11.0, 11.0),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'xx-large',
          'xtick.labelsize':'large',
          'ytick.labelsize':'large'}
mpl.rcParams.update(params)

# This makes it so that the pandas dataframes don't get truncated horizontally.
pd.options.display.max_columns = 200

In [3]:
df = pd.read_csv("topics_pm_txn_filt.csv")

  df = pd.read_csv("topics_pm_txn_filt.csv")


In [5]:
df["transactiondescription"][df["Name_Clean"]=="Cleaning Supplies"]

7                                             BLEACH CLOROX
8                                          WIPER LINT FREE 
21                                         WIPER LINT FREE 
22                                         WIPER LINT FREE 
24                          OIL ABSORBENT PADS " X " SQUARE
                                ...                        
183166    HYDROGEN PEROXIDE SOLUTION  WT. % IN HO, STABI...
183167    ALDRICH UREA HYDROGEN PEROXIDE, %  EACH - THOM...
183168    ALDRICH UREA HYDROGEN PEROXIDE, %  EACH - THOM...
183184                                           PAN LINERS
183185                                           PAN LINERS
Name: transactiondescription, Length: 4465, dtype: object

In [58]:
df = df[df['transactionamount']>=0]

In [74]:
df['date_column'] = pd.to_datetime(df['transactiondate'])

# Extract month and year into a column
df['year_month'] = df['date_column'].dt.strftime('%Y-%m')
df = df[df["Topic"].notnull()]
df = df[df["Name_Clean"]=="Batteries"]
df = df[df['year_month']>='2019-01']

In [75]:
df['transactionamount_mnth'] = df.groupby('year_month')['transactionamount'].transform('sum')


In [76]:
unique_df = df[["year_month","transactionamount_mnth"]].drop_duplicates().sort_values(by='year_month').reset_index(drop=True) 
unique_df["transactionamount_mnth"] = unique_df["transactionamount_mnth"].clip(lower=0)

In [77]:
ets_add_add_false = ETSModel(unique_df['transactionamount_mnth'], error="add", trend="add", damped_trend=False).fit(maxiter=1000)
ets_add_add_false.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.02201D+01    |proj g|=  9.99000D-02

At iterate    1    f=  1.02084D+01    |proj g|=  9.12514D-04

At iterate    2    f=  1.02083D+01    |proj g|=  9.12159D-04

At iterate    3    f=  1.01995D+01    |proj g|=  7.85327D-04

At iterate    4    f=  1.01768D+01    |proj g|=  1.06226D-04

At iterate    5    f=  1.01764D+01    |proj g|=  1.27899D-05

At iterate    6    f=  1.01764D+01    |proj g|=  1.29675D-05

At iterate    7    f=  1.01764D+01    |proj g|=  1.27899D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function 

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(AAN),Log Likelihood,-417.233
Date:,"Tue, 23 Jul 2024",AIC,844.466
Time:,03:10:06,BIC,853.033
Sample:,0,HQIC,847.585
,- 41,Scale,40424250.754
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.0001,,,,,
smoothing_trend,1e-08,,,,,
initial_level,3706.3613,,,,,
initial_trend,139.8969,,,,,

0,1,2,3
Ljung-Box (Q):,5.27,Jarque-Bera (JB):,319.89
Prob(Q):,0.07,Prob(JB):,0.0
Heteroskedasticity (H):,2.27,Skew:,3.02
Prob(H) (two-sided):,0.14,Kurtosis:,15.28


In [78]:
ets_mul_add_false = ETSModel(unique_df['transactionamount_mnth'], error="mul", trend="add", damped_trend=False).fit(maxiter=1000)
ets_mul_add_false.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.01330D+01    |proj g|=  7.14045D-01

At iterate    1    f=  1.00549D+01    |proj g|=  2.35593D-01

At iterate    2    f=  1.00185D+01    |proj g|=  8.86761D-02

At iterate    3    f=  1.00173D+01    |proj g|=  5.84480D-02

At iterate    4    f=  1.00164D+01    |proj g|=  6.80807D-03

At iterate    5    f=  1.00164D+01    |proj g|=  8.29559D-04

At iterate    6    f=  1.00164D+01    |proj g|=  1.53832D-04

At iterate    7    f=  1.00164D+01    |proj g|=  1.53832D-04

At iterate    8    f=  1.00164D+01    |proj g|=  3.18323D-04

At iterate    9    f=  1.00164D+01    |proj g|=  8.49809D-04

At iterate   10    f=  1.00164D+01    |proj g|=  1.56035D-03

At iterate   11    f=  1.00164D+01    |proj g|=  2.80220D-03

At iterate   12    f=  1.00164D+01    |proj g|=  4.70166D-03

At iterate   13    f=  1.0

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(MAN),Log Likelihood,-404.303
Date:,"Tue, 23 Jul 2024",AIC,818.606
Time:,03:10:06,BIC,827.174
Sample:,0,HQIC,821.726
,- 41,Scale,0.387
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.9999,0.153,6.548,0.000,0.701,1.299
smoothing_trend,0.0076,0.022,0.343,0.731,-0.036,0.051
initial_level,3653.4430,,,,,
initial_trend,1842.5387,908.509,2.028,0.043,61.894,3623.183

0,1,2,3
Ljung-Box (Q):,4.16,Jarque-Bera (JB):,16.69
Prob(Q):,0.12,Prob(JB):,0.0
Heteroskedasticity (H):,0.59,Skew:,1.3
Prob(H) (two-sided):,0.34,Kurtosis:,4.75


In [79]:
ets_add_mul_false = ETSModel(unique_df['transactionamount_mnth'], error="add", trend="mul", damped_trend=False).fit(maxiter=1000)
ets_add_mul_false.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.03055D+01    |proj g|=  6.41160D-01

At iterate    1    f=  1.02981D+01    |proj g|=  1.19243D-01

At iterate    2    f=  1.02941D+01    |proj g|=  1.02876D-01

At iterate    3    f=  1.02459D+01    |proj g|=  3.12478D+00
  ys=-3.187E-02  -gs= 3.596E-02 BFGS update SKIPPED

At iterate    4    f=  1.02048D+01    |proj g|=  1.43944D-01

At iterate    5    f=  1.02045D+01    |proj g|=  8.69848D-02

At iterate    6    f=  1.02039D+01    |proj g|=  1.39694D-01

At iterate    7    f=  1.02033D+01    |proj g|=  2.49908D-01

At iterate    8    f=  1.01923D+01    |proj g|=  1.47393D-01
  ys=-1.694E-02  -gs= 7.385E-03 BFGS update SKIPPED

At iterate    9    f=  1.01923D+01    |proj g|=  1.19957D-03

At iterate   10    f=  1.01923D+01    |proj g|=  2.57574D-05

           * * *

Tit   = total number of iter

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(AMN),Log Likelihood,-417.886
Date:,"Tue, 23 Jul 2024",AIC,845.772
Time:,03:10:06,BIC,854.339
Sample:,0,HQIC,848.892
,- 41,Scale,41732706.982
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.0001,,,,,
smoothing_trend,1e-08,0.005,2.2e-06,1.000,-0.009,0.009
initial_level,3707.9093,1981.939,1.871,0.061,-176.620,7592.439
initial_trend,1.0231,0.040,25.875,0.000,0.946,1.101

0,1,2,3
Ljung-Box (Q):,5.37,Jarque-Bera (JB):,316.41
Prob(Q):,0.07,Prob(JB):,0.0
Heteroskedasticity (H):,2.39,Skew:,3.0
Prob(H) (two-sided):,0.11,Kurtosis:,15.22


In [80]:
ets_mul_mul_false = ETSModel(unique_df['transactionamount_mnth'], error="mul", trend="mul", damped_trend=False).fit()
ets_mul_mul_false.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            4     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.01970D+01    |proj g|=  8.99900D-01

At iterate    1    f=  1.01124D+01    |proj g|=  2.31237D-01

At iterate    2    f=  1.00655D+01    |proj g|=  1.50833D-01

At iterate    3    f=  1.00643D+01    |proj g|=  5.51545D-02

At iterate    4    f=  1.00640D+01    |proj g|=  2.89379D-02

At iterate    5    f=  1.00636D+01    |proj g|=  1.86569D-02

At iterate    6    f=  1.00634D+01    |proj g|=  7.47455D-03

At iterate    7    f=  1.00634D+01    |proj g|=  2.35385D-03

At iterate    8    f=  1.00634D+01    |proj g|=  1.18305D-04

At iterate    9    f=  1.00634D+01    |proj g|=  1.63425D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = nu

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(MMN),Log Likelihood,-412.6
Date:,"Tue, 23 Jul 2024",AIC,835.199
Time:,03:10:06,BIC,843.767
Sample:,0,HQIC,838.319
,- 41,Scale,0.438
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.4621,0.070,6.581,0.000,0.325,0.600
smoothing_trend,0.0315,0.021,1.505,0.132,-0.010,0.072
initial_level,3707.9093,,,,,
initial_trend,1.2286,0.108,11.324,0.000,1.016,1.441

0,1,2,3
Ljung-Box (Q):,4.45,Jarque-Bera (JB):,19.72
Prob(Q):,0.11,Prob(JB):,0.0
Heteroskedasticity (H):,0.45,Skew:,1.44
Prob(H) (two-sided):,0.15,Kurtosis:,4.8


In [81]:
ets_add_add_true = ETSModel(unique_df['transactionamount_mnth'], error="add", trend="add", damped_trend=True).fit()
ets_add_add_true.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         1 variables are exactly at the bounds

At iterate    0    f=  1.02128D+01    |proj g|=  1.80000D-01

At iterate    1    f=  1.01893D+01    |proj g|=  7.76638D-02

At iterate    2    f=  1.01849D+01    |proj g|=  7.65379D-02

At iterate    3    f=  1.01666D+01    |proj g|=  2.44485D-02
  ys=-1.569E-02  -gs= 1.239E-02 BFGS update SKIPPED

At iterate    4    f=  1.01629D+01    |proj g|=  1.46119D-02

At iterate    5    f=  1.01626D+01    |proj g|=  1.30438D-01

At iterate    6    f=  1.01625D+01    |proj g|=  1.06772D-02

At iterate    7    f=  1.01625D+01    |proj g|=  9.28857D-04

At iterate    8    f=  1.01625D+01    |proj g|=  3.81029D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of ac

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(AAdN),Log Likelihood,-416.661
Date:,"Tue, 23 Jul 2024",AIC,845.321
Time:,03:10:06,BIC,855.603
Sample:,0,HQIC,849.065
,- 41,Scale,39311707.815
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.0001,,,,,
smoothing_trend,2.989e-06,,,,,
damping_trend,0.9696,,,,,
initial_level,3707.9093,2227.260,1.665,0.096,-657.440,8073.259
initial_trend,207.7233,113.649,1.828,0.068,-15.025,430.472

0,1,2,3
Ljung-Box (Q):,5.14,Jarque-Bera (JB):,335.14
Prob(Q):,0.08,Prob(JB):,0.0
Heteroskedasticity (H):,1.95,Skew:,3.09
Prob(H) (two-sided):,0.22,Kurtosis:,15.57


In [82]:
ets_mul_add_true = ETSModel(unique_df['transactionamount_mnth'], error="mul", trend="add", damped_trend=True).fit()
ets_mul_add_true.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         1 variables are exactly at the bounds

At iterate    0    f=  1.01310D+01    |proj g|=  6.62646D-01

At iterate    1    f=  1.00567D+01    |proj g|=  3.03325D-02

At iterate    2    f=  1.00095D+01    |proj g|=  7.73532D-03

At iterate    3    f=  1.00095D+01    |proj g|=  3.76108D-03

At iterate    4    f=  1.00095D+01    |proj g|=  1.82609D-04

At iterate    5    f=  1.00095D+01    |proj g|=  1.82609D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5      5      6      6     0     2   

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(MAdN),Log Likelihood,-410.39
Date:,"Tue, 23 Jul 2024",AIC,832.781
Time:,03:10:06,BIC,843.062
Sample:,0,HQIC,836.525
,- 41,Scale,0.806
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.8595,0.095,9.010,0.000,0.673,1.047
smoothing_trend,8.595e-05,0.019,0.004,0.996,-0.038,0.038
damping_trend,0.9800,,,,,
initial_level,3707.9093,,,,,
initial_trend,207.7234,182.308,1.139,0.255,-149.594,565.041

0,1,2,3
Ljung-Box (Q):,4.75,Jarque-Bera (JB):,21.83
Prob(Q):,0.09,Prob(JB):,0.0
Heteroskedasticity (H):,0.33,Skew:,1.48
Prob(H) (two-sided):,0.05,Kurtosis:,5.0


In [83]:
ets_add_mul_true = ETSModel(unique_df['transactionamount_mnth'], error="add", trend="mul", damped_trend=True).fit()
ets_add_mul_true.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         1 variables are exactly at the bounds

At iterate    0    f=  1.02576D+01    |proj g|=  1.80000D-01

At iterate    1    f=  1.01997D+01    |proj g|=  3.58329D-01

At iterate    2    f=  1.01474D+01    |proj g|=  1.87282D-01
  ys=-2.329E-03  -gs= 4.333E-02 BFGS update SKIPPED

At iterate    3    f=  1.01471D+01    |proj g|=  3.20789D-02

At iterate    4    f=  1.01471D+01    |proj g|=  3.96872D-02

At iterate    5    f=  1.01466D+01    |proj g|=  5.99195D-02

At iterate    6    f=  1.01464D+01    |proj g|=  1.89276D-02

At iterate    7    f=  1.01464D+01    |proj g|=  1.29567D-03

At iterate    8    f=  1.01464D+01    |proj g|=  3.36264D-04

At iterate    9    f=  1.01464D+01    |proj g|=  2.69651D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy search

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(AMdN),Log Likelihood,-416.003
Date:,"Tue, 23 Jul 2024",AIC,844.006
Time:,03:10:06,BIC,854.287
Sample:,0,HQIC,847.75
,- 41,Scale,38070276.44
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.0001,,,,,
smoothing_trend,1.367e-06,,,,,
damping_trend,0.8565,,,,,
initial_level,3707.9093,1718.631,2.157,0.031,339.454,7076.365
initial_trend,1.1383,,,,,

0,1,2,3
Ljung-Box (Q):,5.07,Jarque-Bera (JB):,336.46
Prob(Q):,0.08,Prob(JB):,0.0
Heteroskedasticity (H):,1.65,Skew:,3.12
Prob(H) (two-sided):,0.36,Kurtosis:,15.57


In [84]:
ets_mul_mul_true = ETSModel(unique_df['transactionamount_mnth'], error="mul", trend="mul", damped_trend=True).fit()
ets_mul_mul_true.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         1 variables are exactly at the bounds

At iterate    0    f=  1.01656D+01    |proj g|=  8.37878D-01

At iterate    1    f=  1.00371D+01    |proj g|=  5.21792D-02

At iterate    2    f=  1.00328D+01    |proj g|=  8.91173D-02
  ys=-1.483E-03  -gs= 3.555E-03 BFGS update SKIPPED

At iterate    3    f=  1.00249D+01    |proj g|=  1.12263D-01
  ys=-2.112E-03  -gs= 6.463E-03 BFGS update SKIPPED

At iterate    4    f=  1.00179D+01    |proj g|=  7.12221D-02

At iterate    5    f=  1.00101D+01    |proj g|=  5.04951D-02

At iterate    6    f=  1.00049D+01    |proj g|=  3.53140D-02

At iterate    7    f=  1.00047D+01    |proj g|=  4.67265D-02

At iterate    8    f=  1.00045D+01    |proj g|=  2.96769D-02

At iterate    9    f=  1.00041D+01    |proj g|=  1.97083D-02

At iterate   10    f=  1.00002D+01    |proj g|=  7.71895D-02
  Positive dir derivative in projection 
  Usin

0,1,2,3
Dep. Variable:,transactionamount_mnth,No. Observations:,41.0
Model:,ETS(MMdN),Log Likelihood,-409.914
Date:,"Tue, 23 Jul 2024",AIC,831.828
Time:,03:10:07,BIC,842.109
Sample:,0,HQIC,835.572
,- 41,Scale,0.496
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
smoothing_level,0.4326,0.105,4.139,0.000,0.228,0.637
smoothing_trend,4.326e-05,,,,,
damping_trend,0.9211,0.057,16.134,0.000,0.809,1.033
initial_level,3707.9092,,,,,
initial_trend,1.2661,,,,,

0,1,2,3
Ljung-Box (Q):,4.47,Jarque-Bera (JB):,13.74
Prob(Q):,0.11,Prob(JB):,0.0
Heteroskedasticity (H):,0.62,Skew:,1.3
Prob(H) (two-sided):,0.38,Kurtosis:,4.12


In [89]:
pred_ets_mul_add_false = ets_mul_add_false.get_prediction(start=65, end=65+30)

In [86]:
pred_ets_mul_add_false

<statsmodels.tsa.exponential_smoothing.ets.PredictionResultsWrapper at 0x7fbf5c88c9d0>

In [87]:
pred = pd.DataFrame(({
    'forecast': pred_ets_mul_add_false.predicted_mean
}))

In [88]:
pred

Unnamed: 0,forecast
65,35542.269183
66,36872.223189
67,38202.177195
68,39532.131201
69,40862.085207
70,42192.039214
71,43521.99322
72,44851.947226
73,46181.901232
74,47511.855238


In [49]:
df[["year_month","transactionamount_mnth"]].drop_duplicates().sort_values(by='year_month').reset_index(drop=True) 

Unnamed: 0,year_month,transactionamount_mnth
0,2019-01,1275.18
1,2019-02,2543.29
2,2019-03,1212.94
3,2019-04,1128.36
4,2019-05,2395.03
...,...,...
60,2024-01,8286.11
61,2024-02,14119.92
62,2024-03,4835.83
63,2024-04,1548.75


In [50]:
date_range = pd.date_range(start='2024-06-01', periods=len(pred), freq='MS')

# Format the date range as 'MM-YYYY' and convert it to a string
date_strings = date_range.strftime('%m-%Y')

# Add the formatted date strings as a new column to the DataFrame
pred['date'] = date_strings

In [51]:
pred_df = pd.DataFrame(({
    'month': date_strings,
    'forecast': pred['forecast']
}))

In [52]:
pred_df

Unnamed: 0,month,forecast
65,06-2024,2889.332562
66,07-2024,3147.685386
67,08-2024,3406.038211
68,09-2024,3664.391036
69,10-2024,3922.74386
70,11-2024,4181.096685
71,12-2024,4439.449509
72,01-2025,4697.802334
73,02-2025,4956.155159
74,03-2025,5214.507983


In [53]:
pred.to_csv("batteries_prediction.csv")

In [54]:
hist = df[["year_month","transactionamount_mnth"]].drop_duplicates().sort_values(by='year_month').reset_index(drop=True) 

In [53]:
hist.to_csv("batteries_history.csv")