# Forecast

Open Weather One-call API documentation:
https://openweathermap.org/api/one-call-3

Response parameters:

lat Geographical coordinates of the location (latitude)

lon Geographical coordinates of the location (longitude)

timezone Timezone name for the requested location

timezone_offset Shift in seconds from UTC


data

data.dt Requested time, Unix, UTC

data.sunrise Sunrise time, Unix, UTC

data.sunset Sunset time, Unix, UTC

data.temp Temperature. Units – default: kelvin, metric: Celsius, imperial: Fahrenheit. How to change units used

data.feels_like Temperature. This accounts for the human perception of weather. Units – default: kelvin, metric: Celsius, imperial: Fahrenheit.

data.pressure Atmospheric pressure on the sea level, hPa

data.humidity Humidity, %

data.dew_point Atmospheric temperature (varying according to pressure and humidity) below which water droplets begin to condense and dew can form. Units – default: kelvin, metric: Celsius, imperial: Fahrenheit.

data.clouds Cloudiness, %

data.uvi (where available) UV index

data.visibility Average visibility, metres. The maximum value of the visibility is 10km

data.wind_speed Wind speed. Units – default: metre/sec, metric: metre/sec, imperial: miles/hour. How to change units used

data.wind_gust (where available) Wind gust. Wind speed. Units – default: metre/sec, metric: metre/sec, imperial: miles/hour. How to change units used

data.wind_deg Wind direction, degrees (meteorological)

data.weather

data.weather.id Weather condition id

data.weather.main Group of weather parameters (Rain, Snow, Extreme etc.)

data.weather.description Weather condition within the group (full list of weather conditions). Get the output in your language

data.weather.icon Weather icon id. How to get icons

data.rain (where available) Precipitation intensity, mm/hour

data.snow (where available) Snow intensity, mm/hour


In [11]:
import pandas as pd
import requests
import os
import csv

appid = os.environ["OPENWEATHER_API_KEY"]



# Using nearby strategic locations to forecast local weather 
position_dict = {
    'Cabella Ligure': (44.674764307997286, 9.09444094802635),
    'Pavia': (45.24107300209213, 9.099799444822875),
    'Piacenza': (45.058644269058696, 9.699800703799545),
    'Genova': (44.4243667310768, 8.960239025427146),
    'Asti': (44.93277995705124, 8.214539905740455),
    'Bardi': (44.63180230890582, 9.738377467960671),
    'La Spezia': (44.12963871565086, 9.815603836725689),
    'Finale Ligure': (44.173025909303284, 8.383861052691618)
}

date_intervals = dict.fromkeys(position_dict)



n_days = 998 # Number of days to predict




def get_daily_forecast(name):

    lat, lon = position_dict[name]

    r = requests.get(f'https://api.openweathermap.org/data/3.0/onecall?lat={lat}&lon={lon}&units=metric&appid={appid}')
    r_dict = dict(r.json())
    df_onecall = pd.DataFrame(r_dict['hourly'])
    df_daily = pd.DataFrame(r_dict['daily'])

    from operator import itemgetter
    for key in df_daily['temp'][0]:
        df_daily['temp_'+key] = df_daily['temp'].apply(itemgetter(key))

    def getting_through(x,key):
        return x[0][key]
    
    for key in df_daily['weather'][0][0]:
        df_daily['weather_'+key] = df_daily['weather'].apply(lambda x: getting_through(x,key))

    df_daily.index = pd.to_datetime(df_daily.dt,unit='s').round('D')
    df_daily.drop(columns=['temp','weather','dt'], inplace=True)

    today = pd.Timestamp.now().date()

    df_daily.to_csv(f'weather_data/{name}_{today}_forecast.csv')

    return df_daily


def prepare_df(rh):
    
    df = pd.DataFrame(dict(rh.json())['data'])

    def getting_through(x,key):
        return x[0][key]

    for key in df['weather'][0][0]:
        df['weather_'+key] = df['weather'].apply(lambda x: getting_through(x,key))

    df.set_index(pd.to_datetime(df['dt'], unit='s').dt.date, inplace = True)
    df.drop(columns=['weather', 'dt'], inplace=True)


    return df



# Getting historical weather.
# To implement: download specific seasons, or even better: the respective week/month in the previous years

def get_historical_weather(name='Cabella Ligure', n_days=999, interval=24*60*60, start:pd.Timestamp=None, rang=None):
    
    today =pd.Timestamp.now().round('D')
    if start is None:
        start = today
        
    midday_today = (start - pd.Timestamp("1970-01-01 12:00:00")) // pd.Timedelta("1s")
    lat, lon = position_dict[name]

    # Making the first line
    try:
        df = retrieve_downloaded_data(name)
        print('Found history')
    except:
        rh = requests.get(f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={midday_today}&appid={appid}')
        df = prepare_df(rh)

    existing_dates = df.index.date


#   Readability needs improvement here: Better to create a list of dts in the if/else and put the rh-concat lines outside 
    if rang is None:
        n = 0
        i = 0
        while n<n_days:
            dt = midday_today - interval*i 
            if pd.to_datetime(dt,unit='s').date() in existing_dates:
                i += 1
                continue
            rh = requests.get(f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={dt}&appid={appid}')
            r_df = prepare_df(rh)
            df = pd.concat([df, r_df], join='outer')
            i += 1
            n += 1
            print(f'Completed {pd.to_datetime(dt,unit="s").date()}', end='  ')
    else:
        dt_final = midday_today - interval*(n_days-1)
        rang = range(midday_today,dt_final,  -interval)    
        for dt in  rang:
            rh = requests.get(f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={dt}&appid={appid}')
            r_df = prepare_df(rh)
            df = pd.concat([df, r_df], join='outer')
            print(f'Completed {pd.to_datetime(dt,unit="s").date()}', end='  ')

    name_lowered = name.replace(' ','_').lower()
    df.to_csv(f'weather_data/{name}_{today.date()}_historical.csv')
    df = merge_to_history(df, name)

    return df

    
def get_data(n_days=1000//7, exclude:list = ['Cabella Ligure']):
    dfs = {name:0 for name in position_dict if name not in exclude}
    for name in dfs:
        dfs[name] = get_historical_weather(name=name, n_days=n_days)
        merge_to_history(dfs[name], name)
    return dfs


# Database-related functions

def merge_to_history(df, name):
    file_name = name.replace(' ','_').lower()+'_weather.csv'
    try:
        df_0 = pd.read_csv('weather_data/'+file_name)        
        df_0 = pd.concat([df,df_0]).drop_duplicates(subset='sunrise', keep='first')
        print('Found old file!')
    except:
        df_0 = df
        print('No history. A new file is created.')
    finally:
        df_0.to_csv('weather_data/'+file_name)

    return df_0
        
def retrieve_downloaded_data(name):
    df = pd.read_csv('weather_data/'+name.replace(' ','_').lower()+'_weather.csv', index_col=0, parse_dates=True) 
    return df


def get_retrieved_dates():
    global date_intervals
    for name in position_dict:
        df = retrieve_downloaded_data(name)
        date_intervals[name] = (str(df.index.sort_values()[0].date()), str(df.index.sort_values()[-1].date()))



In [None]:
# Get forecast

forecast = get_daily_forecast('Cabella Ligure')



In [None]:
get_data(n_days=1000//7 ,exclude=['Pavia', 'Cabella Ligure'])

# Download History

- 31/10: Cabella Ligure 900 days
- 01/11: All other cities 100 days


In [7]:
get_retrieved_dates()

date_intervals


{'Cabella Ligure': ('2020-04-07', '2022-11-01'),
 'Pavia': ('2022-07-26', '2022-11-01'),
 'Piacenza': ('2022-07-26', '2022-11-01'),
 'Genova': ('2022-07-21', '2022-11-01'),
 'Asti': ('2022-07-26', '2022-11-01'),
 'Bardi': ('2022-07-26', '2022-11-01'),
 'La Spezia': ('2022-07-26', '2022-11-01'),
 'Finale Ligure': ('2022-07-26', '2022-11-01')}

# ML

## Preprocessing

In [91]:
# To be documented


def make_wind_dummies(df):
    wind_dir = pd.get_dummies(pd.cut(df['wind_deg'], bins = 6)).set_index(df.index)
    wind_dir.columns = ['wind_NE', 'wind_N', 'wind_NW', 'wind_SW', 'wind_S', 'wind_SE']
    return pd.concat([df.drop(columns=['wind_deg']),wind_dir],axis=1)

def get_season_dummies(df):
    seasons = {1:'Spring', 2: 'Summer', 3: 'Autumn', 4:'Winter'}
    seasons_dummies = pd.get_dummies((pd.to_datetime(df.index).month%12 // 3 + 1).map(seasons)).set_index(df.index)
    return pd.concat([df,seasons_dummies], axis=1)

def is_rain_snow(df):
    df.rain = df.rain.isna()*1
    df.snow = df.snow.isna()*1
    return df


# In a future version, one should predict weather_main as a regression
def drop_columns(df):
    columns_to_drop = [ 'visibility', 'uvi', 'dew_point', 'pressure', 'humidity', 'sunrise', 'sunset', 'feels_like', 'weather_main', 'weather_description', 'weather_id', 'weather_icon', 'wind_gust']
    return df.drop(columns=columns_to_drop).fillna(0)

def preprocessing(df):
    functions = [drop_columns, is_rain_snow, make_wind_dummies, get_season_dummies]
    for foo in functions:
        df = foo(df)
    return df


In [92]:
preprocessing(df)


Unnamed: 0_level_0,temp,clouds,wind_speed,rain,snow,wind_NE,wind_N,wind_NW,wind_SW,wind_S,wind_SE,Autumn,Spring,Summer,Winter
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-10-30,295.87,99,1.03,0,0,0,0,0,0,1,0,0,0,0,1
2022-10-30,295.87,99,1.03,0,0,0,0,0,0,1,0,0,0,0,1
2022-10-29,297.00,3,1.13,0,0,0,0,0,0,0,1,0,0,0,1
2022-10-28,292.81,100,1.36,0,0,0,0,0,0,0,1,0,0,0,1
2022-10-27,292.82,20,1.45,0,0,0,0,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-19,292.09,20,4.10,0,0,0,0,0,0,0,1,0,0,1,0
2020-05-18,296.07,100,4.86,0,0,0,0,0,0,0,1,0,0,1,0
2020-05-17,293.49,87,2.90,0,0,0,0,0,0,0,1,0,0,1,0
2020-05-16,289.92,20,2.60,0,0,0,0,0,0,0,1,0,0,1,0


In [100]:
pd.options.plotting.backend='plotly'
df.temp.plot()