# Aquifer data

## Imports

In [None]:
#pip install geopy pandas sqlite3 folium

In [None]:
# Data manipulation
import pandas as pd
import sqlite3

# Geographical data plotting
import folium
from folium.map import Popup
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import time

## Loading the database

In [None]:
# Connect to the SQLite database
conn = sqlite3.connect('../data/external/data.db')

# Get a cursor object
cursor = conn.cursor()

# List all of the tables
tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
table_names = [table[0] for table in tables]
print("Tables in the database:", table_names)

# Load data from each table into a pandas DataFrame
dataframes = {table: pd.read_sql_query(f"SELECT * FROM {table}", conn) for table in table_names}

# Name the datasets that we are going to work with
aquifer_stations = dataframes[table_names[4]]
aquifer_measurements = dataframes[table_names[5]]
weather_locations = dataframes[table_names[6]]
weather = dataframes[table_names[7]]

print("aquifer stations:")
print(aquifer_stations.head())
print("aquifer measurements:")
print(aquifer_measurements.head())
print("weather stations:")
print(weather_locations.head())
print("Weather measurements:")
print(weather.head())


## Data manipulation

### Aquifer data

In [None]:
# Convert 'date' column to datetime
aquifer_measurements['date'] = pd.to_datetime(aquifer_measurements['date'])

# Filter data for years 2010 to 2017
aquifer_measurements = aquifer_measurements[aquifer_measurements['date'].dt.year.between(2010, 2017)]

# Group by station_id and count the number of unique years
station_years_counts = aquifer_measurements.groupby('station_id')['date'].apply(lambda x: x.dt.year.nunique())

# Filter station_ids that have instances in all years from 2010 to 2017
station_ids_all_years = station_years_counts[station_years_counts == 8].index.tolist()

# Filter aquifer_measurements to include only stations with instances in all years from 2010 to 2017
aquifer_measurements = aquifer_measurements[aquifer_measurements['station_id'].isin(station_ids_all_years)]

# Display the filtered DataFrame
print(aquifer_measurements)

# Save the dataframe to a file
aquifer_measurements.to_csv("aquifer_measurements.txt", index=False)

# Filter the locations file, so it only includes the remaining stations
aquifer_stations = aquifer_stations[aquifer_stations['id'].isin(station_ids_all_years)]

In [None]:
# Get the names of all of the aquifer stations that are in final selction
aquifer_stations.to_csv("aquifer_stations.csv", index=False)

# Print filtered station ids
print(len(station_ids_all_years))

### Weather data

In [None]:
# Convert 'time' column from Unix timestamp to datetime
weather['time'] = pd.to_datetime(weather['time'], unit='s')

# Convert 'day_time' from seconds past midnight to HH:MM:SS format
weather['day_time'] = pd.to_timedelta(weather['day_time'], unit='s')

# Group by location_id and count the number of unique years
location_years_counts = weather.groupby('location_id')['time'].apply(lambda x: x.dt.year.nunique())

# Filter location_ids that have instances in all years from 2010 to 2017
location_ids_all_years = location_years_counts[location_years_counts == 8].index.tolist()

# Filter weather to include only location with instances in all years from 2010 to 2017
weather = weather[weather['location_id'].isin(location_ids_all_years)]

# Save as a .csv file
weather.to_csv("weather.csv", index=False)
print(weather)

# Fetch all of the location_id's
location_ids = weather['location_id'].unique()

print("Locations with instances all years from 2010 to 2017:")
print(len(location_ids))

# Filter the locations that are in the final locations
weather_locations = weather_locations[weather_locations['id'].isin(location_ids_all_years)]

### Splitting the dataframes based on the stations

In [None]:
# Make a dictionary with dataframes for specific aquifer stations
aquifer_measurements_by_stations = {aquifer: data for aquifer, data in aquifer_measurements.groupby('station_id')}

# Make a dictionary with dataframes for specific weather locations
weather_by_locatons = {location: data for location, data in weather.groupby('location_id')}

### Completing the aquifer_stations dataset

In [None]:
# Initialize geocoder
'''geolocator = Nominatim(user_agent="myGeocoder")

# Function to geocode a place name
def geocode_place(place):
    try:
        location = geolocator.geocode(place)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None
print(geocode_place("Miren"))
# Geocode each station name
aquifer_stations['latitude'], aquifer_stations['longitude'] = zip(*aquifer_stations['name'].apply(geocode_place))'''

### Plotting the aquifer stations and weather locations on a map

In [None]:
#Initialize a map centered on Slovenia
slovenia_map = folium.Map(location=[46.151241, 14.995463], zoom_start=8)

# Add weather locations to the map (red colour)
for _, row in weather_locations.iterrows():
    folium.Marker(
        location=[row['lat'], row['lng']],
        popup=row['id'],
        icon=folium.Icon(color='red')
    ).add_to(slovenia_map)

# Add aquifer stations to the map (blue colour)
'''for _, row in aquifer_stations.iterrows():
    folium.Marker(
        location=[row['lat'], row['lng']],
        popup=row['id'],
        icon=folium.Icon(color='red')
    ).add_to(slovenia_map)'''


# Save the map to an HTML file
slovenia_map.save('slovenia_map.html')

### Combining the weather and aquifer data