In [None]:
import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import ml_metrics as metrics
from pylab import rcParams
import plotly.graph_objects as go
import seaborn as sns

In [None]:
eldoret_data = pd.read_excel('../input/refined-readings/clean_data.xlsx')
eldoret_locs = eldoret_data.loc[(eldoret_data['Longitude'] >= 35.28709)] 
eldoret_locs = eldoret_locs.loc[(eldoret_locs['Longitude'] < 35.29983)]
eldoret_locs.head()

In [None]:
eldoret_locs = eldoret_locs[~eldoret_locs['TimeTaken'].str.contains("-", na=False)]
eldoret_locs['TimeTaken'] = pd.to_datetime(eldoret_locs['TimeTaken'])
eldoret_locs = eldoret_locs.sort_values('TimeTaken')
eldoret_locs.info()

In [None]:
eldoret_locs.head()

In [None]:
eldoret_locs['TimeTaken'] = eldoret_locs['TimeTaken'].dt.date
eldoret_locs.head()

In [None]:
eldoret_locs = eldoret_locs.rename(columns={'DaillyFowRate':'DailyFlowRate'})
daily_consumption_info = eldoret_locs.groupby('TimeTaken')['DailyFlowRate'].sum().reset_index()
daily_consumption_info.head()

In [None]:
daily_readings = daily_consumption_info.copy()
daily_consumption_info = daily_consumption_info.set_index('TimeTaken')
daily_consumption = daily_consumption_info['DailyFlowRate']
daily_consumption.plot(figsize=(15, 6))
plt.title('Daily consumption Patterns for Eldowas Meters')
plt.show()

In [None]:
daily_consumption_info['Time-Stamp'] = daily_consumption_info.index
start_date = pd.to_datetime('2020-08-01').date()
end_date = pd.to_datetime('2020-12-08').date()
daily_consumption_info = daily_consumption_info[start_date:end_date]
daily_consumption_info.head()

In [None]:
daily_consumption2 = daily_consumption_info['DailyFlowRate']
daily_consumption2.plot(figsize=(15, 6))
plt.title('Daily consumption Patterns for Eldowas Meters from Aug 2020')
plt.savefig('Eldoret_consumption.png')
plt.show()

In [None]:
daily_consumption_info = daily_consumption_info.loc[(daily_consumption_info['DailyFlowRate'] < 100)]
daily_consumption_info.head()


In [None]:
ax = sns.distplot(daily_consumption_info.DailyFlowRate, hist=True, hist_kws={"edgecolor": 'w', "linewidth": 3}, kde_kws={"linewidth": 3})
# ticks 
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

# labels and title
plt.xlabel('Daily Flow Rate', fontsize=14)
plt.ylabel('frequency', fontsize=14)
plt.title('Distribution of Daily Cumulative Consumption in Eldoret', fontsize=20);

In [None]:
time_series_df = daily_consumption_info.copy()
time_series_df['TimeTaken'] = pd.to_datetime(time_series_df['Time-Stamp'])
time_series_df = time_series_df.set_index('TimeTaken')
time_series_df.index

In [None]:
#getting daily aggregates of the data
target = time_series_df['DailyFlowRate'].resample('D').sum()
target_df = pd.DataFrame(target)
target_df['DailyFlowRate'] = target_df['DailyFlowRate'].replace(to_replace=0.0, method='ffill')
target_df.info()
    

In [None]:
target_df['Date'] = target_df.index
target_df['Month'] = target_df.Date.dt.month_name()
target_df['Year'] = target_df.Date.dt.year

In [None]:
#plotting our results
daily_consumption = target_df['DailyFlowRate'].resample('D').sum()
daily_consumption.plot(figsize=(15, 6))
plt.title('Cleaned Daily consumption Patterns between Aug 2020 and Dec 2020')
plt.show()

In [None]:
#visualizing the trends and seasonality in the data
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(daily_consumption, model='additive')
fig = decomposition.plot()
plt.savefig('seasonality.png')
plt.show()


# IMPLEMENTATION OF THE ARIMA MODEL 

In [None]:
#HYPERPARAMETER OPTIMIZATION
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

In [None]:
#finding optimal parameters
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(daily_consumption,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

In [None]:
mod = sm.tsa.statespace.SARIMAX(daily_consumption,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])

In [None]:
#Visualization of the distribution of the output
results.plot_diagnostics(figsize=(16, 8))
plt.show()

In [None]:
pred = results.get_prediction(start=pd.to_datetime('2020-09-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = daily_consumption['2020':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Daily consumption')
plt.legend()
plt.title('Actual Consumption vs Forecasted consumption for the Period Starting Aug 2020')
plt.savefig('comparisonvsforecast.png')
plt.show()

# IMPLEMENTING THE FBPROPHET MODEL

In [None]:
daily_consumption = target_df.copy()
daily_consumption['Date'] = daily_consumption.index
consumption_df = daily_consumption[['Date', 'DailyFlowRate']]
consumption_df = consumption_df.rename(columns={'Date':'ds', 'DailyFlowRate':'y'})
#OPTIMIZING THE PARAMETERS OF THE FBPROPHET MODEL
consumption_model1 = Prophet(growth = 'linear',seasonality_mode = 'multiplicative',daily_seasonality = False,
                            weekly_seasonality = False,yearly_seasonality = False,).add_seasonality(name='monthly',
                                                                                                    period = 30.5, fourier_order=20).add_seasonality(name='daily',
                                                                                                                                    period=1, fourier_order=20).add_seasonality(name='weekly',period=7, fourier_order=20)

consumption_model1.fit(consumption_df)
daily_consumption_prediction = consumption_model1.make_future_dataframe(periods=120, freq='D')
daily_consumption_prediction = consumption_model1.predict(daily_consumption_prediction)
#the linear growth is used instead of the logistic because while we anticipate some saturation, this won't be realized soon



In [None]:
plt.figure(figsize=(18, 6))
consumption_model1.plot(daily_consumption_prediction, xlabel = 'Date', ylabel = 'Daily Predicted Consumption')
plt.savefig('prophet.png')
plt.title('Projected Daily Consumption for Eldoret Smart IoT Meters Upto March 2021');

In [None]:
consumption_model1.plot_components(daily_consumption_prediction)

In [None]:
metrics_eval_df = daily_consumption_prediction.set_index('ds')[['yhat']].join(consumption_df.set_index('ds').y).reset_index()
cumulative_consumption = metrics_eval_df.copy()
cumulative_consumption['Month'] = cumulative_consumption.ds.dt.month_name()
cumulative_consumption['Year'] = cumulative_consumption.ds.dt.year
cumulative_consumption_2020 = cumulative_consumption[cumulative_consumption['Year']==2020]
cumulative_consumption_2020 = cumulative_consumption_2020[~cumulative_consumption_2020['Month'].str.contains('December')]
cdf = cumulative_consumption_2020.copy()
cdf_group = cdf.groupby(['Month', 'Year'])
cdf_monthly_accumulated_flow = cdf_group.aggregate({"yhat":np.sum})
cdf_monthly_accumulated_flow['ActualConsumption'] = cdf_group.aggregate({'y':np.sum})
cdf_monthly_accumulated_flow = cdf_monthly_accumulated_flow.rename(columns={'yhat':'PredictedConsumption'})
cdf_monthly_accumulated_flow['Predicted_Revenue'] = cdf_monthly_accumulated_flow['PredictedConsumption']*70
cdf_monthly_accumulated_flow['Actual_Revenue'] = cdf_monthly_accumulated_flow['ActualConsumption']*70
#cdf_monthly_accumulated_flow['Month'] = cdf_monthly_accumulated_flow.index
cdf_monthly_accumulated_flow = cdf_monthly_accumulated_flow.reset_index(level=[0,1])

In [None]:
#2021 Data
cumulative_consumption_2021 = cumulative_consumption[cumulative_consumption['Year']==2021]
df_2021 = cumulative_consumption_2021.copy()
df_2021 = df_2021.drop(columns=['y'],axis=1)
df_2021_groups = df_2021.groupby(['Month', 'Year'])
predicted_consumption = df_2021_groups.aggregate({'yhat':np.sum})
predicted_consumption = predicted_consumption.rename(columns={'yhat':'PredictedMonthlyConsumption'})
predicted_consumption['PredictedRevenue'] = predicted_consumption['PredictedMonthlyConsumption']*70
predicted_consumption = predicted_consumption.reset_index(level=[0,1])

In [None]:
plot = go.Figure(data=[go.Bar( name = 'ActualMonthlyConsumption', x = cdf_monthly_accumulated_flow['Month'], 
                              y = cdf_monthly_accumulated_flow['ActualConsumption']),
                       go.Bar(name = 'Forecasted Consumption', x = cdf_monthly_accumulated_flow['Month'], y = cdf_monthly_accumulated_flow['PredictedConsumption']), 
                       go.Bar( name = '2021 Projected Consumption', x = predicted_consumption['Month'], 
                              y = predicted_consumption['PredictedMonthlyConsumption'])])

             
plot.show()

In [None]:
plot = go.Figure(data=[go.Bar( name = 'ActualMonthlyRevenue', x = cdf_monthly_accumulated_flow['Month'], 
                              y = cdf_monthly_accumulated_flow['Actual_Revenue']), go.Bar(name = 'Forecasted Revenue', x = cdf_monthly_accumulated_flow['Month'], y = cdf_monthly_accumulated_flow['Predicted_Revenue']), go.Bar( name = '2021 Projected Revenue', x = predicted_consumption['Month'], 
                              y = predicted_consumption['PredictedRevenue'])])

             
plot.show()