In [None]:
# loading packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
plt.rcParams['figure.dpi'] = 300

import matplotlib.dates as mdates

# ignoring warnings
import warnings
warnings.simplefilter("ignore")

# Loading and a first look at the data

In [None]:
print('Datasets:')
os.listdir('../input/acea-water-prediction')

In [None]:
Aquifer_Doganella = pd.read_csv('../input/acea-water-prediction/Aquifer_Doganella.csv', index_col = 'Date')
Aquifer_Auser = pd.read_csv('../input/acea-water-prediction/Aquifer_Auser.csv', index_col = 'Date')
Water_Spring_Amiata = pd.read_csv('../input/acea-water-prediction/Water_Spring_Amiata.csv', index_col = 'Date')
Lake_Bilancino = pd.read_csv('../input/acea-water-prediction/Lake_Bilancino.csv', index_col = 'Date')
Water_Spring_Madonna_di_Canneto = pd.read_csv('../input/acea-water-prediction/Water_Spring_Madonna_di_Canneto.csv', index_col = 'Date')
Aquifer_Luco = pd.read_csv('../input/acea-water-prediction/Aquifer_Luco.csv', index_col = 'Date')
Aquifer_Petrignano = pd.read_csv('../input/acea-water-prediction/Aquifer_Petrignano.csv', index_col = 'Date')
Water_Spring_Lupa = pd.read_csv('../input/acea-water-prediction/Water_Spring_Lupa.csv', index_col = 'Date')
River_Arno = pd.read_csv('../input/acea-water-prediction/River_Arno.csv', index_col = 'Date')

print('Datasets shape:')
print('*'*30)
print('Aquifer_Doganella: {}'.format(Aquifer_Doganella.shape))
print('Aquifer_Auser: {}'.format(Aquifer_Auser.shape))
print('Water_Spring_Amiata: {}'.format(Water_Spring_Amiata.shape))
print('Lake_Bilancino: {}'.format(Lake_Bilancino.shape))
print('Water_Spring_Madonna_di_Canneto: {}'.format(Water_Spring_Madonna_di_Canneto.shape))
print('Aquifer_Luco: {}'.format(Aquifer_Luco.shape))
print('Aquifer_Petrignano: {}'.format(Aquifer_Petrignano.shape))
print('Water_Spring_Lupa: {}'.format(Water_Spring_Lupa.shape))
print('River_Arno: {}'.format(River_Arno.shape))

In [None]:
datasets = [Aquifer_Doganella, Aquifer_Auser, Water_Spring_Amiata,
            Lake_Bilancino, Water_Spring_Madonna_di_Canneto, Aquifer_Luco,
            Aquifer_Petrignano, Water_Spring_Lupa, River_Arno]

datasets_names = ['Aquifer_Doganella', 'Aquifer_Auser', 'Water_Spring_Amiata',
                'Lake_Bilancino', 'Water_Spring_Madonna_di_Canneto', 'Aquifer_Luco',
                'Aquifer_Petrignano', 'Water_Spring_Lupa', 'River_Arno']

In [None]:
print('Datasets dtypes:')
print('*'*30)
for i in range(len(datasets)):
    print('{}: \n{}'.format(datasets_names[i], datasets[i].dtypes.value_counts()))
    print('-'*20)

# EDA

In [None]:
def bar_plot(x, y, title, palette_len, xlim = None, ylim = None, 
             xticklabels = None, yticklabels = None, 
             top_visible = False, right_visible = False, 
             bottom_visible = True, left_visible = False,
             xlabel = None, ylabel = None, figsize = (10, 4),
             axis_grid = 'y'):
    fig, ax = plt.subplots(figsize = figsize)
    plt.title(title, size = 15, fontweight = 'bold', fontfamily = 'serif')

    for i in ['top', 'right', 'bottom', 'left']:
        ax.spines[i].set_color('black')
    
    ax.spines['top'].set_visible(top_visible)
    ax.spines['right'].set_visible(right_visible)
    ax.spines['bottom'].set_visible(bottom_visible)
    ax.spines['left'].set_visible(left_visible)

    sns.barplot(x = x, y = y, edgecolor = 'black', ax = ax,
                palette = reversed(sns.color_palette("viridis", len(palette_len))))
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)    
    ax.set_xticklabels(xticklabels, fontfamily = 'serif')
    ax.set_yticklabels(yticklabels, fontfamily = 'serif')
    plt.xlabel(xlabel, fontfamily = 'serif')
    plt.ylabel(ylabel, fontfamily = 'serif')
    ax.grid(axis = axis_grid, linestyle = '--', alpha = 0.9)
    plt.show()

In [None]:
for i in range(len(datasets)):
    NaN_values = (datasets[i].isnull().sum() / len(datasets[i]) * 100).sort_values(ascending = False)
    bar_plot(x = NaN_values, 
             y = NaN_values.index,
             title = '{}: NaN values (%)'.format(datasets_names[i]),
             palette_len = NaN_values.index, 
             xlim = (0, 100), 
             xticklabels = range(0, 101, 20),
             yticklabels = NaN_values.index,
             left_visible = True,
             figsize = (10, 8), axis_grid = 'x')

In [None]:
def line_plot(data, y, title, color,
              top_visible = False, right_visible = False, 
              bottom_visible = True, left_visible = False,
              ylabel = None, figsize = (10, 4), axis_grid = 'y'):
    fig, ax = plt.subplots(figsize = figsize)
    plt.title(title, size = 15, fontweight = 'bold', fontfamily = 'serif')

    for i in ['top', 'right', 'bottom', 'left']:
        ax.spines[i].set_color('black')
    
    ax.spines['top'].set_visible(top_visible)
    ax.spines['right'].set_visible(right_visible)
    ax.spines['bottom'].set_visible(bottom_visible)
    ax.spines['left'].set_visible(left_visible)
    
    sns.lineplot(x = pd.to_datetime(data.index), y = data[y], dashes = False, 
                 color = color, linewidth = .5)
    ax.xaxis.set_major_locator(plt.MaxNLocator(20))
    
    plt.xticks(rotation = 90)
    plt.xlabel('')
    plt.ylabel(ylabel, fontfamily = 'serif')
    ax.grid(axis = axis_grid, linestyle = '--', alpha = 0.9)
    plt.show()

def corr_plot(data,
              top_visible = False, right_visible = False, 
              bottom_visible = True, left_visible = False,
              ylabel = None, figsize = (15, 11), axis_grid = 'y'):
    fig, ax = plt.subplots(figsize = figsize)
    plt.title('Correlations (Pearson)', size = 15, fontweight = 'bold', fontfamily = 'serif')
    
    mask = np.triu(np.ones_like(data.corr(), dtype = bool))
    sns.heatmap(round(data.corr(), 2), mask = mask, cmap = 'viridis', annot = True)
    plt.show()
    
def columns_viz(data, color):
    for i in range(len(data.columns)):
        line_plot(data = data, y = data.columns[i],
                  color = color,
                  title = '{} dynamics'.format(data.columns[i]),
                  bottom_visible = False, figsize = (10, 3))

![](https://storage.cloud.google.com/kaggle-media/competitions/Acea/Screen%20Shot%202020-12-02%20at%2012.40.17%20PM.png)

### Aquifer ([Wikipedia](https://en.wikipedia.org/wiki/Aquifer))

An aquifer is an underground layer of water-bearing permeable rock, rock fractures or unconsolidated materials (gravel, sand, or silt).

![](https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/Aquifer_en.svg/465px-Aquifer_en.svg.png)

### Spring ([Wikipedia](https://en.wikipedia.org/wiki/Spring_%28hydrology%29))

A spring is a point at which water flows from an aquifer to the Earth's surface. It is a component of the hydrosphere.

### River ([Wikipedia](https://en.wikipedia.org/wiki/River))

A river is a natural flowing watercourse, usually freshwater, flowing towards an ocean, sea, lake or another river. In some cases a river flows into the ground and becomes dry at the end of its course without reaching another body of water.

### Lake ([Wikipedia](https://en.wikipedia.org/wiki/Lake))

A lake is an area filled with water, localized in a basin, surrounded by land, apart from any river or other outlet that serves to feed or drain the lake.

# Aquifer_Doganella

Description: The wells field Doganella is fed by two underground aquifers not fed by rivers or lakes but fed by meteoric infiltration. The upper aquifer is a water table with a thickness of about 30m. The lower aquifer is a semi-confined artesian aquifer with a thickness of 50m and is located inside lavas and tufa products. These aquifers are accessed through wells called Well 1, ..., Well 9. Approximately 80% of the drainage volumes come from the artesian aquifer. The aquifer levels are influenced by the following parameters: rainfall, humidity, subsoil, temperatures and drainage volumes.

In [None]:
datasets[0].head()

In [None]:
print('The earliest date: \t%s' %datasets[0].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[0].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[0])

In [None]:
columns_viz(datasets[0], color = '#FFC30F')

# Aquifer_Auser

Description: This waterbody consists of two subsystems, called NORTH and SOUTH, where the former partly influences the behavior of the latter. Indeed, the north subsystem is a water table (or unconfined) aquifer while the south subsystem is an artesian (or confined) groundwater.

The levels of the NORTH sector are represented by the values of the SAL, PAG, CoS and DIEC wells, while the levels of the SOUTH sector by the LT2 well.

In [None]:
datasets[1].head()

In [None]:
print('The earliest date: \t%s' %datasets[1].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[1].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[1])

In [None]:
columns_viz(datasets[1], color = '#FF5733')

# Water_Spring_Amiata

Description: The Amiata waterbody is composed of a volcanic aquifer not fed by rivers or lakes but fed by meteoric infiltration. This aquifer is accessed through Ermicciolo, Arbure, Bugnano and Galleria Alta water springs. The levels and volumes of the four sources are influenced by the parameters: rainfall, depth to groundwater, hydrometry, temperatures and drainage volumes.

In [None]:
datasets[2].head()

In [None]:
print('The earliest date: \t%s' %datasets[2].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[2].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[2])

In [None]:
columns_viz(datasets[2], color = '#C70039')

# Lake_Bilancino

Description: Bilancino lake is an artificial lake located in the municipality of Barberino di Mugello (about 50 km from Florence). It is used to refill the Arno river during the summer months. Indeed, during the winter months, the lake is filled up and then, during the summer months, the water of the lake is poured into the Arno river.

In [None]:
datasets[3].head()

In [None]:
print('The earliest date: \t%s' %datasets[3].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[3].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[3])

In [None]:
columns_viz(datasets[3], color = '#900C3F')

# Water_Spring_Madonna_di_Canneto

Description: The Madonna di Canneto spring is situated at an altitude of 1010m above sea level in the Canneto valley. It does not consist of an aquifer and its source is supplied by the water catchment area of the river Melfa.

In [None]:
datasets[4].head()

In [None]:
print('The earliest date: \t%s' %datasets[4].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[4].index.dropna().values[[0, -1]][1])

In [None]:
corr_plot(datasets[4])

In [None]:
columns_viz(datasets[4], color = '#581845')

# Aquifer_Luco

Description: The Luco wells field is fed by an underground aquifer. This aquifer not fed by rivers or lakes but by meteoric infiltration at the extremes of the impermeable sedimentary layers. Such aquifer is accessed through wells called Well 1, Well 3 and Well 4 and is influenced by the following parameters: rainfall, depth to groundwater, temperature and drainage volumes.

In [None]:
datasets[5].head()

In [None]:
print('The earliest date: \t%s' %datasets[5].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[5].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[5])

In [None]:
columns_viz(datasets[5], color = '#547980')

# Aquifer_Petrignano

Description: The wells field of the alluvial plain between Ospedalicchio di Bastia Umbra and Petrignano is fed by three underground aquifers separated by low permeability septa. The aquifer can be considered a water table groundwater and is also fed by the Chiascio river. The groundwater levels are influenced by the following parameters: rainfall, depth to groundwater, temperatures and drainage volumes, level of the Chiascio river.

In [None]:
datasets[6].head()

In [None]:
print('The earliest date: \t%s' %datasets[6].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[6].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[6])

In [None]:
columns_viz(datasets[6], color = '#45ADA8')

# Water_Spring_Lupa

Description: this water spring is located in the Rosciano Valley, on the left side of the Nera river. The waters emerge at an altitude of about 375 meters above sea level through a long draining tunnel that crosses, in its final section, lithotypes and essentially calcareous rocks. It provides drinking water to the city of Terni and the towns around it.

In [None]:
datasets[7].head()

In [None]:
print('The earliest date: \t%s' %datasets[7].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[7].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[7])

In [None]:
columns_viz(datasets[7], color = '#9DE0AD')

# River_Arno

Description: Arno is the second largest river in peninsular Italy and the main waterway in Tuscany and it has a relatively torrential regime, due to the nature of the surrounding soils (marl and impermeable clays). Arno results to be the main source of water supply of the metropolitan area of Florence-Prato-Pistoia. The availability of water for this waterbody is evaluated by checking the hydrometric level of the river at the section of Nave di Rosano.

In [None]:
datasets[8].head()

In [None]:
print('The earliest date: \t%s' %datasets[8].index.values[[0, -1]][0])
print('The latest date: \t%s' %datasets[8].index.values[[0, -1]][1])

In [None]:
corr_plot(datasets[8])

In [None]:
columns_viz(datasets[8], color = '#474747')

# Some ideas for prediction

The challenge is to determine how features influence the water availability of each presented waterbody. To be more straightforward, gaining a better understanding of volumes, they will be able to ensure water availability for each time interval of the year.

The time interval is defined as day/month depending on the available measures for each waterbody. Models should capture volumes for each waterbody(for instance, for a model working on a monthly interval a forecast over the month is expected).

![](https://www.googleapis.com/download/storage/v1/b/kaggle-user-content/o/inbox%2F6195295%2Fcca952eecc1e49c54317daf97ca2cca7%2FAcea-Input.png?generation=1606932492951317&alt=media)

## River Arno (features)
![**(Arno river on map)**](https://upload.wikimedia.org/wikipedia/commons/f/f6/Arno_%28fleuve%29.png)
The Arno is a river in the Tuscany region of Italy. It is the most important river of central Italy after the Tiber. With a length of 241 kilometres (150 mi), it is the largest river in the region. It has many tributaries: Sieve at 60 kilometres (37 mi) long, Bisenzio at 49 kilometres (30 mi), Ombrone Pistoiese at 47 kilometres (29 mi), and the Era, Elsa, Pesa, and Pescia. The drainage basin amounts to more than 8,200 square kilometres (3,200 sq mi).


The main indicator that will be predicted for the river is **hydrometry**. The other two features in this dataset are **rainfall** and **temperature**. Let's look at their dynamics. **Below I'll take some liberties with axes scales in order to make the graphs more visual (somewhere the values will be logarithmized, somewhere they'll be adjusted)**.

In [None]:
df = River_Arno[['Hydrometry_Nave_di_Rosano', 'Temperature_Firenze']].reset_index()
df['rainfall'] = River_Arno.iloc[:, 0:-2].sum(axis = 1).values
df['year'] = pd.to_datetime(df.Date).dt.year
df['month'] = pd.to_datetime(df.Date).dt.month

# Monthly dynamics
df['month_year'] = pd.to_datetime(df.Date).apply(lambda x: x.strftime('%Y/%m'))

r_means = np.log(df.groupby('month_year').Hydrometry_Nave_di_Rosano.mean() * 10).reset_index()
r_means['month_year'] = pd.to_datetime(r_means['month_year'])

r_rain = np.log(df.groupby('month_year').rainfall.mean()).reset_index()
r_rain['month_year'] = pd.to_datetime(r_rain['month_year'])

r_temp = np.log(df.groupby('month_year').Temperature_Firenze.mean()).reset_index()
r_temp['month_year'] = pd.to_datetime(r_temp['month_year'])

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))
plt.title('Monthly dynamics (Arno River)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = r_rain, x = 'month_year', y = 'rainfall', 
             color = 'gray', label = 'Rainfall', alpha = 0.4)
plt.xticks(rotation = 45)
sns.lineplot(data = r_temp, x = 'month_year', y = 'Temperature_Firenze', 
             color = 'green', label = 'Temperature_Firenze', alpha = 0.6)
plt.xticks(rotation = 45)
sns.lineplot(data = r_means, x = 'month_year', y = 'Hydrometry_Nave_di_Rosano', 
             color = 'blue', label = 'Hydrometry')
plt.xticks(rotation = 45)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax.spines[i].set_visible(False)

ax.set_xticks(r_means.month_year[::12])
ax.set_xticklabels(range(1998, 2021, 1), fontfamily = 'serif')
ax.set_xlabel('')
ax.set_ylabel('')
ax.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

The graph above demonstrates quite logical patterns: water level depends on average monthly temperatures (temperature peaks coincide with the lowest annual hydrometry data; water level in rivers is always lower in summer) and in general, depends on the amount of rainfall. There is a strange drop in hydrometry between 2008 and 2009.

In [None]:
# Yearly dynamics
r_means_y = np.log(df.groupby('year').Hydrometry_Nave_di_Rosano.mean() * 10).reset_index()
r_rain_y = np.log(df.groupby('year').rainfall.mean()).reset_index()
r_temp_y = np.log(df.groupby('year').Temperature_Firenze.mean()).reset_index()


fig, ax = plt.subplots(figsize = (15, 5))
plt.title('Yearly dynamics (Arno River)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = r_rain_y, x = 'year', y = 'rainfall', 
             color = 'gray', label = 'Rainfall', alpha = 0.4)
plt.xticks(rotation = 45)
sns.lineplot(data = r_temp_y, x = 'year', y = 'Temperature_Firenze', 
             color = 'green', label = 'Temperature_Firenze', alpha = 0.6)
plt.xticks(rotation = 45)
sns.lineplot(data = r_means_y, x = 'year', y = 'Hydrometry_Nave_di_Rosano', 
             color = 'blue', label = 'Hydrometry')
plt.xticks(rotation = 45)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax.spines[i].set_visible(False)

ax.set_xticks(r_means_y.year[::1])
ax.set_xticklabels(range(1998, 2021, 1), fontfamily = 'serif')
ax.set_xlabel('')
ax.set_ylabel('')
ax.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

In general, the average annual temperature remains stable throughout the study period. The hydrometry indicator is also relatively stable from year to year, apart from small fluctuations and the significant deviation in 2008. But the amount of rainfall is gradually decreasing, which may be caused by climate changes (or a large number of missing values).

## Lake Bilancino (features)
![](https://www.hoteldeivicari.com/images/demo/gallery/intestazione1280x500/mugello_florence_lake_bilancino_near_florence.jpg)
Bilancino lake is an artificial lake located in the municipality of Barberino di Mugello (about 50 km from Florence). It is used to refill the Arno river during the summer months. Indeed, during the winter months, the lake is filled up and then, during the summer months, the water of the lake is poured into the Arno river.

In [None]:
df = Lake_Bilancino[['Lake_Level', 'Flow_Rate', 'Temperature_Le_Croci']].reset_index()
df['rainfall'] = Lake_Bilancino.iloc[:, 0:-3].sum(axis = 1).values
df['year'] = pd.to_datetime(df.Date).dt.year
df['month'] = pd.to_datetime(df.Date).dt.month

# Monthly dynamics
df['month_year'] = pd.to_datetime(df.Date).apply(lambda x: x.strftime('%Y/%m'))

l_means = np.log(df.groupby('month_year').Flow_Rate.mean()).reset_index()
l_means['month_year'] = pd.to_datetime(l_means['month_year'])

l_means_LL = df.groupby('month_year').Lake_Level.mean().reset_index()
l_means_LL['month_year'] = pd.to_datetime(l_means_LL['month_year'])

l_rain = np.log(df.groupby('month_year').rainfall.mean()).reset_index()
l_rain['month_year'] = pd.to_datetime(l_rain['month_year'])

l_temp = np.log(df.groupby('month_year').Temperature_Le_Croci.mean()).reset_index()
l_temp['month_year'] = pd.to_datetime(l_temp['month_year'])

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (15, 8))
plt.suptitle('Monthly dynamics (Lake Bilancino)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = l_rain, x = 'month_year', y = 'rainfall', ax = ax1, 
             color = 'gray', label = 'Rainfall', alpha = 0.4)
sns.lineplot(data = l_temp, x = 'month_year', y = 'Temperature_Le_Croci', ax = ax1,
             color = 'green', label = 'Temperature_Le_Croci', alpha = 0.6)
sns.lineplot(data = l_means, x = 'month_year', y = 'Flow_Rate', ax = ax1,
             color = 'blue', label = 'Flow Rate')
    
for i in ['top', 'right', 'bottom', 'left']:
        ax1.spines[i].set_visible(False)

ax1.set_xticks(l_means.month_year[::12])
ax1.set_xticklabels(range(2002, 2021, 1), fontfamily = 'serif')
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = l_means_LL, x = 'month_year', y = 'Lake_Level', ax = ax2,
             color = 'orange', label = 'Lake Level')
    
for i in ['top', 'right', 'bottom', 'left']:
        ax2.spines[i].set_visible(False)

ax2.set_xticks(l_means_LL.month_year[::12])
ax2.set_xticklabels(range(2002, 2021, 1), fontfamily = 'serif')
ax2.set_xlabel('')
ax2.set_ylabel('')
ax2.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

The graph above demonstrates the same patterns: flow rate and lake level depend on average monthly temperatures and, in general, depends on the amount of rainfall. The lake level decreases in summer and autumn every year, as the water from it flows into the Arno River. There are two strong decreases in 2008 and 2012-2013. In 2008, the water level in the river was still much lower (as can be seen in the graphs above), but in 2012-2013, a serious discharge of water from the lake helped to keep the water level in the Arno River stable.

In [None]:
# Yearly dynamics
l_means_y = np.log(df.groupby('year').Flow_Rate.mean()).reset_index()
l_means_LL_y = df.groupby('year').Lake_Level.mean().reset_index()
l_rain_y = np.log(df.groupby('year').rainfall.mean()).reset_index()
l_temp_y = np.log(df.groupby('year').Temperature_Le_Croci.mean()).reset_index()

fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (15, 8))
plt.suptitle('Yearly dynamics (Lake Bilancino)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = l_rain_y, x = 'year', y = 'rainfall', ax = ax1, 
             color = 'gray', label = 'Rainfall', alpha = 0.4)
sns.lineplot(data = l_temp_y, x = 'year', y = 'Temperature_Le_Croci', ax = ax1,
             color = 'green', label = 'Temperature_Le_Croci', alpha = 0.6)
sns.lineplot(data = l_means_y, x = 'year', y = 'Flow_Rate', ax = ax1,
             color = 'blue', label = 'Flow Rate')
    
for i in ['top', 'right', 'bottom', 'left']:
        ax1.spines[i].set_visible(False)

ax1.set_xticks(l_means_y.year[::1])
ax1.set_xticklabels(range(2002, 2021, 1), fontfamily = 'serif')
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = l_means_LL_y, x = 'year', y = 'Lake_Level', ax = ax2,
             color = 'orange', label = 'Lake Level')
    
for i in ['top', 'right', 'bottom', 'left']:
        ax2.spines[i].set_visible(False)

ax2.set_xticks(l_means_LL_y.year[::1])
ax2.set_xticklabels(range(2002, 2021, 1), fontfamily = 'serif')
ax2.set_xlabel('')
ax2.set_ylabel('')
ax2.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

In general, the annual dynamics of flow rate and lake level are the same. Most importantly, the rainfall indicator is relatively stable throughout the observation period (in contrast to the analogous indicator in the Arno_River dataset). Considering that both water bodies are located in the same region of Italy and the Lake_Bilancino dataset has fewer missing values, the conclusion about a decrease in rainfall caused by climate change is incorrect.

## Aquifer Petrignano (features)
![](https://www.researchgate.net/profile/Elisabetta_Preziosi/publication/26812692/figure/fig8/AS:279197076475915@1443577178810/The-Petrignano-dAssisi-plain-Gray-areas-alluvial-deposits-White-areas-lacustrine-and.png)
(Source: [The Sustainable Pumping Rate Concept: Lessons from a Case Study in Central Italy](https://www.researchgate.net/publication/26812692_The_Sustainable_Pumping_Rate_Concept_Lessons_from_a_Case_Study_in_Central_Italy))

Gray areas: alluvial deposits. White areas: lacustrine and fluvio-lacustrine deposits. White stars: wells belonging to the Petrignano pumping station. Black dots: other wells located within the area. The bold line indicates the boundaries of the model area.

In [None]:
df = Aquifer_Petrignano.reset_index().copy()
df['year'] = pd.to_datetime(df.Date).dt.year
df['month'] = pd.to_datetime(df.Date).dt.month

# Monthly dynamics
df['month_year'] = pd.to_datetime(df.Date).apply(lambda x: x.strftime('%Y/%m'))

ap_means_P24 = df.groupby('month_year').Depth_to_Groundwater_P24.mean().reset_index()
ap_means_P24['month_year'] = pd.to_datetime(ap_means_P24['month_year'])

ap_means_P25 = df.groupby('month_year').Depth_to_Groundwater_P25.mean().reset_index()
ap_means_P25['month_year'] = pd.to_datetime(ap_means_P25['month_year'])

ap_rain = df.groupby('month_year').Rainfall_Bastia_Umbra.mean().reset_index()
ap_rain['month_year'] = pd.to_datetime(ap_rain['month_year'])

ap_temp_B = df.groupby('month_year').Temperature_Bastia_Umbra.mean().reset_index()
ap_temp_B['month_year'] = pd.to_datetime(ap_temp_B['month_year'])

ap_temp_P = df.groupby('month_year').Temperature_Petrignano.mean().reset_index()
ap_temp_P['month_year'] = pd.to_datetime(ap_temp_P['month_year'])

ap_C10 = df.groupby('month_year').Volume_C10_Petrignano.mean().reset_index()
ap_C10['month_year'] = pd.to_datetime(ap_C10['month_year'])

ap_hydro = df.groupby('month_year').Hydrometry_Fiume_Chiascio_Petrignano.mean().reset_index()
ap_hydro['month_year'] = pd.to_datetime(ap_hydro['month_year'])

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize = (15, 15))
plt.suptitle('Monthly dynamics (Aquifer Petrignano)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = ap_means_P24, x = 'month_year', y = 'Depth_to_Groundwater_P24', 
             color = 'green', label = 'Depth_to_Groundwater_P24', alpha = 0.6,
             ax = ax1)
sns.lineplot(data = ap_means_P25, x = 'month_year', y = 'Depth_to_Groundwater_P25', 
             color = 'blue', label = 'Depth_to_Groundwater_P25', alpha = 0.6,
             ax = ax1)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax1.spines[i].set_visible(False)

ax1.set_xticks(ap_temp_B.month_year[::12])
ax1.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_temp_B, x = 'month_year', y = 'Temperature_Bastia_Umbra', 
             color = 'gray', label = 'Temperature_Bastia_Umbra', alpha = 0.6,
             ax = ax4)
sns.lineplot(data = ap_temp_P, x = 'month_year', y = 'Temperature_Petrignano', 
             color = 'red', label = 'Temperature_Petrignano', alpha = 0.6,
             ax = ax4)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax4.spines[i].set_visible(False)

ax4.set_xticks(ap_temp_P.month_year[::12])
ax4.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax4.set_xlabel('')
ax4.set_ylabel('')
ax4.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_rain, x = 'month_year', y = 'Rainfall_Bastia_Umbra', 
             color = 'grey', label = 'Rainfall_Bastia_Umbra', alpha = 0.6,
             ax = ax3)
sns.lineplot(data = ap_hydro, x = 'month_year', y = 'Hydrometry_Fiume_Chiascio_Petrignano', 
             color = 'orange', label = 'Hydrometry_Fiume_Chiascio_Petrignano', alpha = 0.6,
             ax = ax3)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax3.spines[i].set_visible(False)

ax3.set_xticks(ap_rain.month_year[::12])
ax3.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax3.set_xlabel('')
ax3.set_ylabel('')
ax3.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_C10, x = 'month_year', y = 'Volume_C10_Petrignano', 
             color = 'purple', label = 'Volume_C10_Petrignano', alpha = 0.6,
             ax = ax2)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax2.spines[i].set_visible(False)

ax2.set_xticks(ap_C10.month_year[::12])
ax2.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax2.set_xlabel('')
ax2.set_ylabel('')
ax2.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

Depth_to_Groundwater_P24 and Depth_to_Groundwater_P25 are completely correlated, and therefore averaged values can be used.
There is a strange dip in monthly temperatures for Temperature_Petrignano in 2015. This is most likely due to missing values since, in general, the dynamics are almost identical to Temperature_Bastia_Umbra. The same dip is seen in the Hydrometry metric.

Most of all (judging by the graphs and correlation indices) Depth_to_Groundwater_P24 and Depth_to_Groundwater_P25 are affected by the drainage volume (Volume_C10).

In [None]:
# Yearly dynamics
ap_means_P24_y = df.groupby('year').Depth_to_Groundwater_P24.mean().reset_index()
ap_means_P25_y = df.groupby('year').Depth_to_Groundwater_P25.mean().reset_index()
ap_rain_y = df.groupby('year').Rainfall_Bastia_Umbra.mean().reset_index()
ap_temp_B_y = df.groupby('year').Temperature_Bastia_Umbra.mean().reset_index()
ap_temp_P_y = df.groupby('year').Temperature_Petrignano.mean().reset_index()
ap_C10_y = df.groupby('year').Volume_C10_Petrignano.mean().reset_index()
ap_hydro_y = df.groupby('year').Hydrometry_Fiume_Chiascio_Petrignano.mean().reset_index()


fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize = (15, 15))
plt.suptitle('Yearly dynamics (Aquifer Petrignano)', size = 15, fontweight = 'bold', fontfamily = 'serif')

sns.lineplot(data = ap_means_P24_y, x = 'year', y = 'Depth_to_Groundwater_P24', 
             color = 'green', label = 'Depth_to_Groundwater_P24', alpha = 0.6,
             ax = ax1)
sns.lineplot(data = ap_means_P25_y, x = 'year', y = 'Depth_to_Groundwater_P25', 
             color = 'blue', label = 'Depth_to_Groundwater_P25', alpha = 0.6,
             ax = ax1)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax1.spines[i].set_visible(False)

ax1.set_xticks(ap_temp_B_y.year[::1])
ax1.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_temp_B_y, x = 'year', y = 'Temperature_Bastia_Umbra', 
             color = 'gray', label = 'Temperature_Bastia_Umbra', alpha = 0.6,
             ax = ax4)
sns.lineplot(data = ap_temp_P_y, x = 'year', y = 'Temperature_Petrignano', 
             color = 'red', label = 'Temperature_Petrignano', alpha = 0.6,
             ax = ax4)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax4.spines[i].set_visible(False)

ax4.set_xticks(ap_temp_P_y.year[::1])
ax4.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax4.set_xlabel('')
ax4.set_ylabel('')
ax4.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_rain_y, x = 'year', y = 'Rainfall_Bastia_Umbra', 
             color = 'grey', label = 'Rainfall_Bastia_Umbra', alpha = 0.6,
             ax = ax3)
sns.lineplot(data = ap_hydro_y, x = 'year', y = 'Hydrometry_Fiume_Chiascio_Petrignano', 
             color = 'orange', label = 'Hydrometry_Fiume_Chiascio_Petrignano', alpha = 0.6,
             ax = ax3)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax3.spines[i].set_visible(False)

ax3.set_xticks(ap_rain_y.year[::1])
ax3.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax3.set_xlabel('')
ax3.set_ylabel('')
ax3.grid(axis = 'y', linestyle = '--', alpha = 0.9)


sns.lineplot(data = ap_C10_y, x = 'year', y = 'Volume_C10_Petrignano', 
             color = 'purple', label = 'Volume_C10_Petrignano', alpha = 0.6,
             ax = ax2)
    
for i in ['top', 'right', 'bottom', 'left']:
        ax2.spines[i].set_visible(False)

ax2.set_xticks(ap_C10_y.year[::1])
ax2.set_xticklabels(range(2006, 2021, 1), fontfamily = 'serif')
ax2.set_xlabel('')
ax2.set_ylabel('')
ax2.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

In 2008 and 2012, a significant increase in groundwater depth was observed. In 2012, there was a significant decrease in rainfall that also accompanied by a change in drainage volume. After 2012 up to 2014, with an increase in annual rainfall, the depth of groundwater and drainage volume became less that looks logical. There is no rainfall data for 2008.

## WORK IN PROGRESS...