In [26]:
import os, re
from dotenv import dotenv_values

from sqlalchemy import create_engine, text

from datetime import date,timedelta
import pandas as pd

import requests

## Configuration (SQL, WeatherAPI, station informations)

In [27]:
# Settings from .env file

settings = dotenv_values()

# SQL CONFIG

settings = dotenv_values() # Loads settings from .env file
ROOT='..' # relative path to the root of the project

db_uri = (
    f"mysql+pymysql://{settings['SQL_USER']}:{settings['SQL_PWD']}"
    f"@{settings['SQL_HOST']}/{settings['SQL_DB']}"
    f"?ssl_ca={os.path.join(ROOT,settings['SQL_SSL_CA'])}"
    f"&ssl_cert={os.path.join(ROOT,settings['SQL_SSL_CERT'])}"
    f"&ssl_key={os.path.join(ROOT,settings['SQL_SSL_KEY'])}"
    f"&ssl_check_hostname=false"
)

engine = create_engine(db_uri,echo=False, future=False)

# WeatherAPI CONFIG

url = 'http://api.weatherapi.com/v1/history.json'
key = settings['WA_KEY']

# Stations Dataframe

stations = pd.read_csv('../data/stations/stations.csv')
stations = stations.set_index('station_id')

## Weather History From weatherAPI

In [28]:
def get_weatherapi_history(nb_of_day,station_id,station_coord):
    
    # Past Days to check
    days = [ date.today() - timedelta(delta) for delta in range(nb_of_day,0,-1) ]
    
    dt=days[0].strftime('%Y-%m-%d')
    end_dt=days[-1].strftime('%Y-%m-%d')
    
    params = { 'key':key, 'q':station_coord, 'dt':dt,'end_dt':end_dt }
    
    response = requests.get(url,params)
    jr = response.json()
    forecasts = jr.get('forecast').get('forecastday')
    
    history_list = []

    for f in forecasts :

        dt = f['date']
        temp = f['day']['avgtemp_c']
        precipitation = f['day']['totalprecip_mm']
        maxwind = f['day']['maxwind_kph']
        condition = f['day']['condition']['text']

        history_list.append((dt,temp,precipitation,maxwind,condition))

    columns = [
        'day',
        'temperature',    # °C
        'precipitation',  # mm
        'maxwind',        # km/h
        'description'       # description
    ]

    history = pd.DataFrame(history_list,columns=columns)
    history['day'] = pd.to_datetime(history.day).dt.date # converts strings to dates
    history['station_id'] = station_id # adds station_id column to match SQL Table
    
    return history

## Known Days in our SQL Database

In [29]:
def get_sql_known_days(nb_of_days,station_id):
    """
    nb_of_days : number of days to check in the db
    station_id : the id of the concerned station
    """
    
    # Past Days to check
    days = [ date.today() - timedelta(delta) for delta in range(nb_of_days,0,-1) ]
    
    days = [ d.strftime("%Y-%m-%d") for d in days ] # string conversion

    query = """
                SELECT day FROM weather 
                WHERE station_id = :st_id AND day BETWEEN :first_day AND :last_day ;
            """

    params = { 'st_id':station_id, 'first_day':days[0], 'last_day':days[-1] }

    with engine.connect() as conn:
        result = conn.execute(text(query),params)

    known_days = list(row.day for row in result)
    
    return known_days

## Update the SQL DB

In [30]:
def update_sql_station(station_id,history,known_days):
    """
    station_id : the id of the concerned station
    history : the weather history from weatherAPI
    known_days : list of days not to update, already in the DB
    """
    
    # Computes the unknown history dataframe
    unknown_bi = ~history['day'].isin(known_days) # == day not in known_days
    unknown_history = history[unknown_bi]
    
    # Feed the SQL weather table with these new rows
    unknown_history.to_sql('weather',engine,if_exists='append',index=False)
    

## Updating all the stations

In [35]:
for station_id in stations.index :
    
    nb_of_days = 25 # We check and update the last 25 days

    station_coord = f"{stations.loc[station_id,'lat']},{stations.loc[station_id,'lon']}"

    known_days = get_sql_known_days(nb_of_days,station_id)
    print(f'Station {station_id} : {len(known_days)}/{nb_of_days} days on the SQL DB')

    if len(known_days) < nb_of_days :

        print(15*' ' + 'requesting Weather API...',end='')
        history = get_weatherapi_history(nb_of_days,station_id,station_coord)
        print('done')

        print(15*' ' + 'Updating SQL table...',end='')
        update_sql_station(station_id,history,known_days)
        print('done')

Station 6000990 : 25/25 days on the SQL DB
Station 6000998 : 25/25 days on the SQL DB
Station 6000993 : 25/25 days on the SQL DB
Station 6001000 : 25/25 days on the SQL DB
Station 6003600 : 25/25 days on the SQL DB
Station 6002500 : 25/25 days on the SQL DB
Station 6005500 : 25/25 days on the SQL DB
Station 6011000 : 25/25 days on the SQL DB
Station 6017050 : 25/25 days on the SQL DB
Station 6017070 : 25/25 days on the SQL DB
Station 6039500 : 25/25 days on the SQL DB
Station 6037400 : 25/25 days on the SQL DB
Station 6053800 : 25/25 days on the SQL DB
Station 6045800 : 25/25 days on the SQL DB
Station 6810010 : 25/25 days on the SQL DB
Station 6059500 : 25/25 days on the SQL DB
