In [1]:
%load_ext autoreload

%autoreload 2

In [2]:

        #end_time is the next 24th full hour
        #end_time = ((datetime.now() + relativedelta(days=1)).replace(minute=0, second=0, microsecond=0)+timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M')
        
        # create a list of all full hours:
        #full_hours = []
        #start_time = datetime.now().replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
        #while start_time < datetime.strptime(end_time, '%Y-%m-%dT%H:%M'):
        #    full_hours.append(start_time.strftime('%Y-%m-%dT%H:%M'))
        #    start_time += timedelta(hours=1)

In [1]:
import pandas as pd
import requests
from datetime import date
from geopy.geocoders import Nominatim
from datetime import datetime,timedelta


#rewrite to accept list (if we need to improve)

class WeatherForecast:
    def __init__(self, city:str):
        self.city = city

    def get_city_lonlan(self):
        '''
        This function receives the name of one city and returns the lat and lon of that city
        in a dictionary
        '''

        # Create a geolocator object
        geolocator = Nominatim(user_agent="my_app")

        #save the coordinates of each city in self.city in a dictionary
        coordinates = {}

        # Get the location of the city
        location = geolocator.geocode(self.city)

        #check if the location exists
        if location:
            lat, lon = location.latitude, location.longitude # Extract the latitude and longitude
            coordinates[self.city] = [lat,lon]
        else:
            print(f"Could not retrieve coordinates for {self.city}")

        return coordinates

    def get_weather_forecast(self):

        '''
        This function receives
            * the name of the city list
            * a number of days of weather forecast we want to work on

        This function returns a dataframe with the average of the weather data from these city list during those days of forecast
        '''

        # First we declare the weather parameters. Here we'll be taking all params supported by the API
        weather_params = ['temperature_2m','relativehumidity_2m','dewpoint_2m',
                      'apparent_temperature','pressure_msl','surface_pressure',
                      'precipitation','rain','snowfall','cloudcover',
                      'cloudcover_low','cloudcover_mid','cloudcover_high',
                      'shortwave_radiation','direct_radiation','direct_normal_irradiance',
                      'diffuse_radiation','windspeed_10m','windspeed_120m',
                      'winddirection_10m','winddirection_120m','windgusts_10m',
                      'et0_fao_evapotranspiration','weathercode','vapor_pressure_deficit',
                      'soil_temperature_0cm','soil_temperature_6cm',
                      'soil_temperature_18cm','soil_temperature_54cm',
                      'soil_moisture_0_1cm','soil_moisture_3_9cm','soil_moisture_9_27cm',
                      'soil_moisture_27_81cm']

        # Then we compute the dates used to get the weather forecast data


        #call the method to receive the coordinates from the self.city list
        coordinates = self.get_city_lonlan()

        #create a dataframe with weather params for the selected city and store it
        lat = coordinates[self.city][0]
        lan = coordinates[self.city][1]


        # So we make the request to the weather API archive
        weather_forecast_response= requests.get('https://api.open-meteo.com/v1/forecast',
                            params = {'latitude': lat,
                                        'longitude': lan,
                                        'forecast_days' : 2,
                                        'hourly': weather_params,
                                        'timezone': 'auto'}).json()

        weather_forecast_df = pd.DataFrame(weather_forecast_response['hourly'], columns = ['time'] + weather_params)
        weather_forecast_df['time'] = pd.to_datetime(weather_forecast_df['time'], format='%Y-%m-%d')
        #get the start and end time
        start_time = datetime.now()
        end_time = start_time + timedelta(hours=24)
        weather_forecast_df = weather_forecast_df[(weather_forecast_df['time'] >= start_time) & (weather_forecast_df['time'] < end_time)]
        #set time as index
        weather_forecast_df = weather_forecast_df.set_index('time')
        # Format float to 1 decimal, sum the 3 tables and return the average
        pd.options.display.float_format = "{:,.1f}".format


        return weather_forecast_df

    def rename_columns(self):
        weather_df_old_columns_names = self.get_weather_forecast()

        weather_params_history_data = ['temperature_2m','relativehumidity_2m','dewpoint_2m',
                      'apparent_temperature','pressure_msl','surface_pressure',
                      'precipitation','rain','snowfall','cloudcover',
                      'cloudcover_low','cloudcover_mid','cloudcover_high',
                      'shortwave_radiation','direct_radiation','direct_normal_irradiance',
                      'diffuse_radiation','windspeed_10m','windspeed_100m',
                      'winddirection_10m','winddirection_100m','windgusts_10m',
                      'et0_fao_evapotranspiration','weathercode','vapor_pressure_deficit',
                      'soil_temperature_0_to_7cm','soil_temperature_7_to_28cm',
                      'soil_temperature_28_to_100cm','soil_temperature_100_to_255cm',
                      'soil_moisture_0_to_7cm','soil_moisture_7_to_28cm',
                      'soil_moisture_28_to_100cm','soil_moisture_100_to_255cm']

        weather_df_old_columns_names.columns = weather_params_history_data
        return weather_df_old_columns_names


In [2]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer
import pandas as pd
import numpy as np

class FeaturePreprocessing:
    def __init__(self,df,target=None):
        self.df = df
        self.target = target
        if target!=None:
            self.target = df[target]

    def get_wind_components(self):


        # Convert degrees to radians and store the values into wd_rad
        #wind direction 10 m
        wd_rad_10 = self.df.pop('winddirection_10m')*np.pi / 180

        #wind direction 100 m
        wd_rad_100 = self.df.pop('winddirection_100m')*np.pi / 180

        # Calculate the wind x and y components and store then in two new columns
        # `Wx` and `Wy`
        #wind speed 10 m
        wv_10 = self.df.pop('windspeed_10m')
        self.df['Wx_10'] = wv_10*np.cos(wd_rad_10)
        self.df['Wy_10'] = wv_10*np.sin(wd_rad_10)

        #wind speed 100 m
        wv_100 = self.df.pop('windspeed_100m')
        self.df['Wx_100'] = wv_100*np.cos(wd_rad_100)
        self.df['Wy_100'] = wv_100*np.sin(wd_rad_100)

        return self.df

    def feature_processing(self):
        #has to acces the get_wind_components so change
        #columns to use

        unprocessed_dataframe = self.get_wind_components()
        columns_for_standardscaler = ['temperature_2m','dewpoint_2m',
                                    'apparent_temperature','pressure_msl','surface_pressure',
                                    'Wx_10','Wx_100','Wy_10',
                                    'Wy_100','windgusts_10m','soil_temperature_0_to_7cm',
                                    'soil_temperature_7_to_28cm','soil_temperature_28_to_100cm',
                                    'soil_temperature_100_to_255cm','soil_moisture_0_to_7cm',
                                    'soil_moisture_7_to_28cm','soil_moisture_28_to_100cm',
                                    'soil_moisture_100_to_255cm']

        columns_for_robustscaler = ['cloudcover','cloudcover_low',
                                    'cloudcover_mid','cloudcover_high']

        columns_for_powertransformer = ['relativehumidity_2m','precipitation','rain',
                                        'snowfall', 'shortwave_radiation','direct_radiation',
                                        'direct_normal_irradiance','diffuse_radiation',
                                        'et0_fao_evapotranspiration','vapor_pressure_deficit']

        #function doesnt work like this
        scaler = make_column_transformer(
            (StandardScaler(),columns_for_standardscaler),
            (RobustScaler(),columns_for_robustscaler),
            (PowerTransformer(),columns_for_powertransformer))

        scaled_data = scaler.fit_transform(unprocessed_dataframe)
        scaled_dataframe = pd.DataFrame(scaled_data, columns=scaler.get_feature_names_out())
        processed_dataframe = scaled_dataframe.set_index(unprocessed_dataframe.index)
        return processed_dataframe

    def get_season(self):
        """
        Calls a function data gets the day from the time column,
        outputs whether the day is in the Spring, Summer, Fall or
        Winter and creates
        """
        processed_dataframe = self.feature_processing()
        season = []

        # get the current day of the year
        doy = processed_dataframe.iloc[0].name.timetuple().tm_yday

        # "day of year" ranges for the northern hemisphere
        spring = range(80, 172)
        summer = range(172, 264)
        fall = range(264, 355)
        # winter = everything else

        for doy in range(len(processed_dataframe)):
            if doy in spring:
                season.append('Spring')
            elif doy in summer:
                season.append('Summer')
            elif doy in fall:
                season.append('Fall')
            else:
                season.append('Winter')

        processed_dataframe['season'] = season
        processed_dataframe = processed_dataframe.join(pd.get_dummies(processed_dataframe['season'], prefix='season'))
        processed_dataframe.drop('season', axis=1, inplace=True)
        return processed_dataframe


    #Returns if the day is a weekday or not
    def get_weekday(self):
        processed_dataframe = self.get_season()

        weekday = []

        for day in range(len(processed_dataframe)):
            if processed_dataframe.iloc[day].name.weekday() < 5:
                weekday.append('Weekday')
            else:  # 5 Sat, 6 Sun
                weekday.append('Weekend')

        processed_dataframe['weekday'] = weekday
        processed_dataframe = processed_dataframe.join(pd.get_dummies(processed_dataframe['weekday'], prefix='weekday'))
        processed_dataframe.drop('weekday', axis=1, inplace=True)
        return processed_dataframe

    #Returns the period of the day for each row
    def get_period_day(self):

        processed_dataframe = self.get_weekday()
        period = []

        for day in range(len(processed_dataframe)):
            if 4 <= processed_dataframe.iloc[day].name.hour <= 11:
                period.append('Morning')
            elif 12 <= processed_dataframe.iloc[day].name.hour <= 19:
                period.append('Afternoon')
            else:
                period.append('Night')

        processed_dataframe['period'] = period
        processed_dataframe = processed_dataframe.join(pd.get_dummies(processed_dataframe['period'], prefix='period'))
        processed_dataframe.drop('period', axis=1, inplace=True)

        if self.target==None:
            return processed_dataframe
        else:
            merge = pd.merge(processed_dataframe,self.target,left_index=True, right_index=True)
            return merge

In [3]:
weather_amiens = WeatherForecast("Amiens")

In [4]:
renamed_columns = weather_amiens.rename_columns()

In [5]:
renamed_columns.head(1)

Unnamed: 0_level_0,temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,...,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-17 17:00:00,15.6,60,7.9,13.7,1006.5,1002.5,0.0,0.0,0.0,100,...,3,0.7,15.4,14.3,10.3,8.3,0.3,0.3,0.3,0.3


In [48]:
process_data = FeaturePreprocessing(renamed_columns)

In [49]:
processed_forecast = process_data.get_period_day()

In [50]:
processed_forecast

Unnamed: 0_level_0,standardscaler__temperature_2m,standardscaler__dewpoint_2m,standardscaler__apparent_temperature,standardscaler__pressure_msl,standardscaler__surface_pressure,standardscaler__Wx_10,standardscaler__Wx_100,standardscaler__Wy_10,standardscaler__Wy_100,standardscaler__windgusts_10m,...,powertransformer__direct_normal_irradiance,powertransformer__diffuse_radiation,powertransformer__et0_fao_evapotranspiration,powertransformer__vapor_pressure_deficit,season_Winter,weekday_Weekday,weekday_Weekend,period_Afternoon,period_Morning,period_Night
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-17 17:00:00,2.0,-0.7,2.0,-2.0,-1.9,0.3,1.4,-1.7,-1.0,0.6,...,1.4,1.2,1.5,1.9,1,1,0,1,0,0
2023-03-17 18:00:00,1.8,-0.3,1.9,-2.2,-2.1,0.2,1.1,-0.9,-0.4,0.4,...,1.2,0.9,0.9,1.7,1,1,0,1,0,0
2023-03-17 19:00:00,1.3,0.5,1.6,-2.0,-2.0,1.0,0.9,-0.5,-0.7,-0.4,...,0.8,0.4,0.0,1.1,1,1,0,1,0,0
2023-03-17 20:00:00,0.9,0.3,1.2,-1.4,-1.4,0.9,1.0,0.4,0.0,-1.0,...,-0.9,-1.0,-0.9,0.9,1,1,0,0,0,1
2023-03-17 21:00:00,0.3,1.4,0.5,-0.5,-0.5,-0.3,-0.1,0.2,-0.4,-0.8,...,-0.9,-1.0,-0.9,0.0,1,1,0,0,0,1
2023-03-17 22:00:00,0.3,2.4,0.6,-0.5,-0.5,0.3,0.6,-0.9,0.2,-0.6,...,-0.9,-1.0,-0.9,-0.3,1,1,0,0,0,1
2023-03-17 23:00:00,0.0,2.4,0.5,-0.8,-0.8,0.8,1.2,-0.5,-1.2,-0.9,...,-0.9,-1.0,-0.9,-0.7,1,1,0,0,0,1
2023-03-18 00:00:00,-0.1,1.4,0.2,-0.4,-0.4,0.8,0.6,-0.7,-0.9,-1.0,...,-0.9,-1.0,-0.9,-0.4,1,0,1,0,0,1
2023-03-18 01:00:00,-0.3,0.4,-0.1,0.3,0.3,0.7,0.2,-0.7,-1.2,-0.8,...,-0.9,-1.0,-0.9,-0.3,1,0,1,0,0,1
2023-03-18 02:00:00,-0.6,0.1,-0.3,0.7,0.7,0.9,0.7,0.1,-0.3,-0.8,...,-0.9,-1.0,-0.9,-0.6,1,0,1,0,0,1
