In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import seaborn as sn
from matplotlib import style
import matplotlib.font_manager as font_manager
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA

In [25]:
style.use('ggplot')
font_path = 'C:\Windows\Fonts\Arial.ttf'
font_prop = font_manager.FontProperties(fname=font_path, size=12)
title_font = {'fontname':'Arial', 'size':'15', 'color':'black', 'weight':'normal',
              'verticalalignment':'bottom'} # Bottom vertical alignment for more space
axis_font = {'fontname':'Arial', 'size':'12'}

In [26]:
filePathMonthly = 'C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\DataFiles\\PoissonDataMonthly.csv'
filePathDaily = 'C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\DataFiles\\PoissonDataDaily.csv'

In [27]:
dateparseMonthly = lambda dates: pd.datetime.strptime(dates, '%m/%Y')
timeSeriesMonthly = pd.read_csv(filePathMonthly, sep=',', parse_dates=[0], header=1,
                         names=['MonthYear','Index', 'Fat', 'Sanitary Products', 'Roots','Other','Total'],
                         date_parser=dateparseMonthly, index_col='MonthYear')

In [28]:
rolmeanMonthly = timeSeriesMonthly['Fat'].rolling(center=False,window=3).mean()
rolstdMonthly = timeSeriesMonthly['Fat'].rolling(center=False,window=3).std()

In [29]:
plt.figure(figsize=(8, 6), dpi=400)
plt.plot(timeSeriesMonthly['Fat'], color='r', marker='o')
plt.rcParams['font.size'] = 50
plt.plot(rolmeanMonthly, label='Rolling Mean - Fat', color='orange', marker='^')
#plt.title('Blockage trend for Fat - Per Month', **title_font)
plt.xlabel('Month/Year', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop, numpoints=1,loc='upper right')
plt.savefig("C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\Images\\CompleteMonthlyBTforFat.png", dpi=400)
plt.show()

In [30]:
lag_acf = acf(timeSeriesMonthly['Fat'], nlags=20)
lag_pacf = pacf(timeSeriesMonthly['Fat'], nlags=20, method='ols')

plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(timeSeriesMonthly['Fat'])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(timeSeriesMonthly['Fat'])),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(timeSeriesMonthly['Fat'])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(timeSeriesMonthly['Fat'])),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()

ValueError: left cannot be >= right

In [None]:
model = ARIMA((timeSeriesMonthly['Fat']), order=(0, 1, 2))  
results_AR = model.fit(disp=-1)  
plt.plot(timeSeriesMonthly['Fat'], color='blue')
plt.plot(results_AR.fittedvalues, color='red')
plt.title('RSS: %.4f'% sum((results_AR.fittedvalues-timeSeriesMonthly['Fat'])**2))
plt.show()

In [None]:
predictionsARFat = pd.Series(results_AR.fittedvalues, copy=True)
predictionsARFatCumulativeSum = predictionsARFat.cumsum()
print(predictionsARFat.head())
print(predictionsARFatCumulativeSum.head())
predictionFat = np.exp(predictionsARFatCumulativeSum)
plt.plot(timeSeriesMonthly['Fat'])
plt.plot(predictionFat)

In [None]:
rolmeanMonthlySP = timeSeriesMonthly['Sanitary Products'].rolling(center=False,window=3).mean()
rolstdMonthlySP = timeSeriesMonthly['Sanitary Products'].rolling(center=False,window=3).std()
plt.plot(timeSeriesMonthly['Sanitary Products'], color='r', marker='o')
plt.plot(rolmeanMonthlySP, label='Rolling Mean - Sanitary Products', color='orange', marker='^')
#plt.plot(rolstdMonthlySP, label='Rolling SD - SP',color = 'purple')
#plt.plot(timeSeriesMonthly['Roots'], color='g', marker='^')
#plt.plot(timeSeriesMonthly['Sanitary Products'], color='b', marker='o')
#plt.title('Blockage trend for Sanitary Products - Per Month', **title_font)
plt.xlabel('Month/Year', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop,numpoints=1,loc='lower right')
plt.savefig("C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\Images\\CompleteMonthlyBTforSP.png", dpi=400)
plt.show()

In [None]:
rolmeanMonthlyTotal = timeSeriesMonthly['Total'].rolling(center=False,window=3).mean()
rolstdMonthlyTotal = timeSeriesMonthly['Total'].rolling(center=False,window=3).std()
plt.plot(timeSeriesMonthly['Total'], color='r', marker='o', label='Total Blockages')
plt.plot(rolmeanMonthlyTotal, label='Rolling Mean - Total Blockages', color='orange', marker='^')
#plt.title('Blockage trend for Total Blockages - Per Month', **title_font)
plt.xlabel('Month/Year', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop, numpoints=1, loc='center right')
plt.savefig("C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\Images\\CompleteMonthlyBTforTotal.png", dpi=400)
plt.show()

In [None]:
rolmeanMonthlyRoots = timeSeriesMonthly['Roots'].rolling(center=False,window=3).mean()
rolstdMonthlyRoots = timeSeriesMonthly['Roots'].rolling(center=False,window=3).std()
plt.plot(timeSeriesMonthly['Roots'], color='r', marker='o', label='Roots')
plt.plot(rolmeanMonthlyRoots, label='Rolling Mean - Roots', color='orange', marker='^')
#plt.title('Blockage trend for Roots - Per Month', **title_font)
plt.xlabel('Month/Year', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop, numpoints=1, loc='upper right')
plt.savefig("C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\Images\\CompleteMonthlyBTforRoots.png", dpi=400)
plt.show()

In [None]:
dateparseDaily = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y')
timeSeriesDaily = pd.read_csv(filePathDaily, sep=',', parse_dates=[0], header=1,
                         names=['Date','Index', 'Fat', 'Sanitary Products', 'Roots','Other','Total'],
                         date_parser=dateparseDaily, index_col='Date')

In [None]:
plt.plot(timeSeriesDaily['Fat'], color='r', marker='o')
plt.title('Blockage trend for Fat - Per Day', **title_font)
plt.xlabel('Date', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop, numpoints=1)
plt.show()

In [None]:
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats.stats import pearsonr

data= pd.DataFrame(pd.read_csv(filePathMonthly))

In [None]:
modFat = smf.poisson('FatCount~Index', data=data).fit(method='bfgs')
print(modFat.summary2())


modSP = smf.poisson('SanitaryProductCount~Index', data=data).fit(method='bfgs')
print(modSP.summary2())

modRoots = smf.poisson('RootsCount~Index', data=data).fit(method='bfgs')
print(modRoots.summary2())

In [None]:
dataDaily = pd.DataFrame(pd.read_csv(filePathDaily))

In [None]:
modFat = smf.poisson('FatCount~Index', data=dataDaily).fit(method='bfgs')
print(modFat.summary2())

modSP = smf.poisson('SanitaryProductCount~Index', data=dataDaily).fit(method='bfgs')
print(modSP.summary2())

modRoots = smf.poisson('RootsCount~Index', data=dataDaily).fit(method='bfgs')
print(modRoots.summary2())

modTotal = smf.poisson('TotalCount~Index', data=dataDaily).fit(method='bfgs')
print(modTotal.summary2())

In [None]:
fileGrid0 = 'C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\DataFiles\\Grid1_Size20000_Poisson.csv'
dateparseMonthly = lambda dates: pd.datetime.strptime(dates, '%m/%Y')
dataGrid0Size20 = pd.read_csv(fileGrid0, sep=',', parse_dates=[1], header=1,
                         names=['GridIndex','Date','Index', 'Fat', 'Sanitary Products', 'Roots','Other','Total'],
                         date_parser=dateparseMonthly, index_col='Date')
rolmeanMonthlyGrid1 = dataGrid0Size20['Fat'].rolling(center=False,window=3).mean()
rolstdMonthlyGrid1 = dataGrid0Size20['Fat'].rolling(center=False,window=3).std()
plt.figure(figsize=(8, 6), dpi=400)
plt.plot(dataGrid0Size20['Fat'], color='r', marker='o')
plt.plot(rolmeanMonthlyGrid1, color='orange', marker='^', label="Rolling Mean - Fat")
plt.xlabel('Month/Year', **axis_font)
plt.ylabel('Blockage Count', **axis_font)
plt.legend(prop=font_prop, numpoints=1,loc='upper right')
plt.savefig("C:\\Users\\lenovo\\Documents\\Dissertation\\Report\\Images\\Grid1Size20.png", dpi=400)
plt.show()