In this notebook we take a closer look at the data from the Doganella Aquifer.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import sklearn as sk  # machine learning
import matplotlib.pyplot as plt  # data visualization
import seaborn as sns  # data visualization
%matplotlib inline

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# /kaggle/input/acea-water-prediction/Aquifer_Doganella.csv
dog = pd.read_csv("/kaggle/input/acea-water-prediction/Aquifer_Doganella.csv", parse_dates=['Date'])

In [None]:
dog.info()

In [None]:
dog.columns=dog.columns.str.replace('+','')

In [None]:
dog.tail()

In [None]:
fig, axs = plt.subplots(4, figsize=(20, 20))
axs[0].set_title('Rainfall')
axs[0].plot(dog.Rainfall_Monteporzio)
axs[0].plot(dog.Rainfall_Velletri)
axs[1].set_title('Depth_to_Groundwater')
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_1)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_2)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_3)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_4)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_5)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_6)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_7)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_8)
axs[1].plot(dog.Depth_to_Groundwater_Pozzo_9)
axs[2].set_title('Volume')
axs[2].plot(dog.Volume_Pozzo_1)
axs[2].plot(dog.Volume_Pozzo_2)
axs[2].plot(dog.Volume_Pozzo_3)
axs[2].plot(dog.Volume_Pozzo_4)
axs[2].plot(dog.Volume_Pozzo_56)
axs[2].plot(dog.Volume_Pozzo_7)
axs[2].plot(dog.Volume_Pozzo_8)
axs[2].plot(dog.Volume_Pozzo_9)
axs[3].set_title('Temperature')
axs[3].plot(dog.Temperature_Monteporzio)
axs[3].plot(dog.Temperature_Velletri)
plt.show()

In [None]:
dog = dog.set_index(['Date'])

In [None]:
fig, ax = plt.subplots(figsize=(18, 6))
dog.last('3Y').Rainfall_Monteporzio.plot()
dog.last('3Y').Depth_to_Groundwater_Pozzo_1.plot()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
dog.loc['2019-01-01':'2020-01-01'].Rainfall_Monteporzio.plot()
dog.loc['2019-01-01':'2020-01-01'].Depth_to_Groundwater_Pozzo_1.plot()
plt.show()

In [None]:
## -- display a heatmap of missing values
## with thanks to https://www.kaggle.com/iamleonie/intro-to-time-series-forecasting

f, ax = plt.subplots(nrows=1, ncols=1, figsize=(16,5))
sns.heatmap(dog.T.isna(), cmap='Blues')
ax.set_title('Fields with Missing Values', fontsize=16)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(14)
plt.show()

In [None]:
## -- let's look at the more complete recent data 
dog1 = dog.loc['2017-01-01':'2020-06-01']

In [None]:
f, ax = plt.subplots(nrows=1, ncols=1, figsize=(16,5))
sns.heatmap(dog1.T.isna(), cmap='Blues')
ax.set_title('Fields with Missing Values', fontsize=16)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(14)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
dog1.Rainfall_Monteporzio.ffill().plot()
dog1.Rainfall_Velletri.ffill().plot()
dog1.Depth_to_Groundwater_Pozzo_1.ffill().plot()
plt.show()

In [None]:
## -- interpolate to fill missing values (first three columns)
dog1['Rainfall_Monteporzio'] = dog1['Rainfall_Monteporzio'].interpolate()
dog1['Rainfall_Velletri'] = dog1['Rainfall_Velletri'].interpolate()
dog1['Depth_to_Groundwater_Pozzo_1'] = dog1['Depth_to_Groundwater_Pozzo_1'].interpolate()

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
dog1.Rainfall_Monteporzio.plot()
dog1.Rainfall_Velletri.plot()
dog1.Depth_to_Groundwater_Pozzo_1.plot()
plt.show()

In [None]:
f, ax = plt.subplots(nrows=1, ncols=1, figsize=(16,5))
sns.heatmap(dog1.T.isna(), cmap='Blues')
ax.set_title('Fields with Missing Values', fontsize=16)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(14)
plt.show()

In [None]:
#dog_week = dog.resample('w').mean()
#dog_week.tail()

In [None]:
#dog1_norm = np.log(dog1).diff()

In [None]:
dog1.Depth_to_Groundwater_Pozzo_1.plot(kind='line', figsize=(18, 6));

In [None]:
dog1 = dog1.sort_values(by=['Date'])

In [None]:
!pip install hampel

In [None]:
from hampel import hampel

In [None]:
dog1.Depth_to_Groundwater_Pozzo_1 = hampel(dog1.Depth_to_Groundwater_Pozzo_1, window_size=19, n=2)

In [None]:
dog1.Depth_to_Groundwater_Pozzo_1.plot(kind='line', figsize=(18, 6));

In [None]:
#dog1.Depth_to_Groundwater_Pozzo_1.tail(56)

In [None]:
#dog1['logdiff'] = np.log(dog1['Depth_to_Groundwater_Pozzo_1']).diff()

In [None]:
#dog1.Depth_to_Groundwater_Pozzo_1.dropna(inplace=True)

In [None]:
dog1.Depth_to_Groundwater_Pozzo_1.plot(kind='line', figsize=(18, 6));

In [None]:
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

In [None]:
rolmean = dog1.Depth_to_Groundwater_Pozzo_1.rolling(20).mean()
rolstd = dog1.Depth_to_Groundwater_Pozzo_1.rolling(20).std()

In [None]:
plt.figure(figsize=(18, 6))
orig = plt.plot(dog1.Depth_to_Groundwater_Pozzo_1, color='blue', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std Deviation')
plt.title('Rolling Mean & Standard Deviation')
plt.legend(loc='best')
plt.show(block=False)

In [None]:
# Perform Dickey-Fuller test
dftest = sm.tsa.adfuller(dog1.Depth_to_Groundwater_Pozzo_1, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key, value in dftest[4].items():
    dfoutput['Critical Value ({0})'.format(key)] = value
    
dfoutput

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

# the autocorrelation chart provides just the correlation at increasing lags
fig, ax = plt.subplots(figsize=(12,5))
plot_acf(dog1.Depth_to_Groundwater_Pozzo_1.values, lags=10, ax=ax)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

fig, ax = plt.subplots(figsize=(12,5))
plot_pacf(dog1.Depth_to_Groundwater_Pozzo_1.values, lags=10, ax=ax)
plt.show()

In [None]:
from statsmodels.tsa.arima_model import ARIMA

# Notice that you have to use udiff - the differenced data rather than the original data. 
ar1 = ARIMA(tuple(dog1.Depth_to_Groundwater_Pozzo_1.values), (2, 1, 1)).fit()
ar1.summary()

In [None]:
plt.figure(figsize=(12, 8))
plt.plot(dog1.Depth_to_Groundwater_Pozzo_1.values, color='blue')
preds = ar1.fittedvalues
plt.plot(preds, color='red')
plt.show()