In [39]:
import pandas as pd
import matplotlib.pyplot as plt
import requests

In [40]:
# Set the time period for data extraction
start_date = '2013-06-16'
end_date = '2021-05-13'

In [41]:
def get_weather_data_open_meteo(lat, lon, start_date, end_date):
    # Base URL for Open-Meteo historical data API
    base_url = 'https://archive-api.open-meteo.com/v1/archive'

    # Define parameters for the request
    params = {
        'latitude': lat,
        'longitude': lon,
        'start_date': start_date,
        'end_date': end_date,
        'daily': ['temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'shortwave_radiation_sum', 'et0_fao_evapotranspiration'],
        'timezone': 'Asia/Kathmandu'
    }

    # Make the request
    response = requests.get(base_url, params=params)
    data = response.json()

    # Extract relevant data and convert to DataFrame
    if 'daily' in data:
        daily_data = data['daily']
        df = pd.DataFrame(daily_data)
        df['time'] = pd.to_datetime(df['time'])
        df.rename(columns={'time':'date'}, inplace=True)
        return df
    else:
        print('Error: Could not retrieve weather data.')
        return None
    
# Set latitude and longitude for Kathmandu, Nepal and define date range
latitude = 27.7172
longitude = 85.3240
# Fetch the weather data
weather_df = get_weather_data_open_meteo(latitude, longitude, start_date, end_date)

weather_df.set_index('date', inplace=True)

# Display the resulting DataFrame
weather_df.head()

Unnamed: 0_level_0,temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,shortwave_radiation_sum,et0_fao_evapotranspiration
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-06-16,22.0,18.8,20.4,41.7,7.6,5.65,1.27
2013-06-17,24.0,19.2,21.4,9.6,6.4,16.08,3.14
2013-06-18,24.9,19.4,21.7,11.0,7.4,14.58,2.99
2013-06-19,22.0,19.2,20.3,100.1,5.4,3.96,0.97
2013-06-20,25.1,17.7,21.0,3.4,6.6,19.9,3.92


In [42]:
# Define the dates provided by the user
dates = [
    "11 Jan", "15 Jan", "30 Jan", "10 Feb", "10 Feb", "19 Feb", "8 Mar", "8 Mar",
    "8 Apr", "10 Apr", "13 Apr", "16 Apr", "24 Apr", "1 May", "23 May", "29 May",
    "17 Jun", "19 Aug", "20 Aug", "26 Aug", "26 Aug", "6 Sep", "7 Sep", "8 Sep",
    "17 Sep", "19 Sep", "3 Oct", "10 Oct", "11 Oct", "12 Oct", "13 Oct", "14 Oct",
    "15 Oct", "17 Oct", "1 Nov", "1 Nov", "2 Nov", "3 Nov", "7 Nov", "15 Nov",
    "15 Dec", "25 Dec", "30 Dec"
]

# Create a list to store the final dates
date_series = []

# Iterate over each year in the specified range
for year in range(2013, 2022):
    for date in dates:
        # Combine the year with the date and convert to datetime
        full_date = pd.to_datetime(f"{date} {year}", format="%d %b %Y", errors='coerce')
        
        # Only include dates within the specified range
        if start_date <= str(full_date) <= end_date:
            date_series.append(full_date)

# Convert the final list to a Pandas Series
date_series = pd.Series(date_series).sort_values()

# Create a date range from start_date to end_date
date_index = pd.date_range(start=start_date, end=end_date, freq='D')

# Initialize a DataFrame with the date index
holiday_df = pd.DataFrame(index=date_index)

# Add a column indicating whether the date is a holiday (1) or not (0)
holiday_df['Holiday'] = 0

# Mark holidays in the DataFrame
for date in date_series:
    holiday_df.loc[date, 'Holiday'] = 1

holiday_df.index.name = 'date'
# Display the DataFrame
holiday_df.tail(15)

Unnamed: 0_level_0,Holiday
date,Unnamed: 1_level_1
2021-04-29,0
2021-04-30,0
2021-05-01,1
2021-05-02,0
2021-05-03,0
2021-05-04,0
2021-05-05,0
2021-05-06,0
2021-05-07,0
2021-05-08,0


## **Merge Data**

In [45]:
merged_df = holiday_df.join(weather_df, how='inner')
merged_df.reset_index(inplace=True)
merged_df.isna().sum()

date                          0
Holiday                       0
temperature_2m_max            0
temperature_2m_min            0
temperature_2m_mean           0
precipitation_sum             0
wind_speed_10m_max            0
shortwave_radiation_sum       0
et0_fao_evapotranspiration    0
dtype: int64

In [46]:
merged_df.to_csv("../data/processed/features.csv", index=False)