In [None]:
import requests
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

date_from = '2020-01-01' #@param {type:"date"}
date_to = "2021-01-01" #@param {type:"date"}
path_to_read = "La Molina" #@param ["La Molina", "San Ramon"]
latitude = -12.08 #@param {type:"number"}
longitude = -76.95 #@param {type:"number"}


if path_to_read == "La Molina":
    station_df = pd.read_excel("/content/01_Meteorology data_La Molina CIP station 2013-2022.xlsx")
else:
    station_df = pd.read_excel("/content/01_Meteorology data_San Ramon CIP station 2019-2022.xlsx")

api_key = "api_key"

In [None]:
# Function to fetch data from Open-Meteo
def fetch_weather_data_open_meteo(latitude, longitude, date_from,date_to):
    base_url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": date_from,
        "end_date": date_to,
        "hourly": "temperature_2m,relative_humidity_2m,rain",
        "timezone": "UTC"
    }
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"Error fetching data from Open-Meteo: {e}")
        return None

# Function to fetch data from WeatherAPI
def fetch_weather_data_weatherapi(latitude, longitude, start_date, end_date):
    """
    Fetch weather data from WeatherAPI for a given latitude, longitude, and date range.
    
    Parameters:
        latitude (float): Latitude of the location.
        longitude (float): Longitude of the location.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.
        
    Returns:
        list: A list of JSON responses for each date in the range.
    """
    # api_key = os.getenv('WEATHER_API_KEY')
    if not api_key:
        print("API key for WeatherAPI is not set.")
        return None

    base_url = "http://api.weatherapi.com/v1/history.json"
    current_date = datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.strptime(end_date, '%Y-%m-%d')
    all_data = []

    while current_date <= end_date:
        date_str = current_date.strftime('%Y-%m-%d')
        params = {
            "key": api_key,
            "q": f"{latitude},{longitude}",
            "dt": date_str
        }
        try:
            response = requests.get(base_url, params=params)
            response.raise_for_status()
            all_data.append(response.json())
        except requests.RequestException as e:
            print(f"Error fetching data from WeatherAPI for {date_str}: {e}")
        current_date += timedelta(days=1)

    return all_data

def process_open_meteo_data(data):
    if not data or 'hourly' not in data:
        return pd.DataFrame()
    df = pd.DataFrame(data['hourly'])
    df['time'] = pd.to_datetime(df['time'])
    df.set_index('time', inplace=True)
    df.rename(columns={
        'temperature_2m': 'Temperature_OpenMeteo',
        'relative_humidity_2m': 'Humidity_OpenMeteo',
        'rain': 'Rain_OpenMeteo'
    }, inplace=True)
    return df

def process_weatherapi_data(data_list):
    """
    Process a list of weather data JSON responses into a single DataFrame.
    
    Parameters:
        data_list (list): List of JSON responses from WeatherAPI.
        
    Returns:
        pd.DataFrame: DataFrame containing processed weather data.
    """
    if not data_list:
        return pd.DataFrame()

    all_dfs = []
    for data in data_list:
        if 'forecast' in data and data['forecast']['forecastday']:
            hourly_data = data['forecast']['forecastday'][0]['hour']
            df = pd.DataFrame(hourly_data)
            df['time'] = pd.to_datetime(df['time'])
            df.set_index('time', inplace=True)
            df.rename(columns={
                'temp_c': 'Temperature_WeatherAPI',
                'humidity': 'Humidity_WeatherAPI',
                'precip_mm': 'Rain_WeatherAPI'
            }, inplace=True)
            all_dfs.append(df)
        else:
            print("Warning: 'forecast' data missing in response.")

    if not all_dfs:
        return pd.DataFrame()

    # Concatenate all daily DataFrames
    combined_df = pd.concat(all_dfs)

    # Handle overlapping date ranges by removing duplicate indices, keeping the first occurrence
    combined_df = combined_df[~combined_df.index.duplicated(keep='first')]

    # Ensure consistent hourly data by reindexing
    full_time_range = pd.date_range(start=combined_df.index.min(), end=combined_df.index.max(), freq='H')
    combined_df = combined_df.reindex(full_time_range)

    # Handle missing data (e.g., due to inconsistent hourly data)
    combined_df['Temperature_WeatherAPI'].interpolate(method='time', inplace=True)
    combined_df['Humidity_WeatherAPI'].interpolate(method='time', inplace=True)
    combined_df['Rain_WeatherAPI'].fillna(0, inplace=True)  # Assuming missing rain data means no rain

    return combined_df

def plot_comparison(df_combined, variable, ylabel, df_station, variable_station):
    """
    Plots monthly trends with hourly averages and standard deviations for two data sources and a station.
    Each month is displayed in a separate subplot (arranged from January to December).

    Parameters:
    - df_combined: DataFrame containing combined data from Open-Meteo and WeatherAPI.
    - variable: String representing the variable name to compare (e.g., 'Temperature').
    - ylabel: Label for the Y-axis (e.g., 'Temperature (°C)').
    - df_station: DataFrame containing station data for comparison.
    - variable_station: String representing the column name for the station variable.

    Returns:
    None
    """
    # Ensure relevant columns are numeric
    df_combined[f'{variable}_OpenMeteo'] = pd.to_numeric(df_combined[f'{variable}_OpenMeteo'], errors='coerce')
    df_combined[f'{variable}_WeatherAPI'] = pd.to_numeric(df_combined[f'{variable}_WeatherAPI'], errors='coerce')
    df_station[variable_station] = pd.to_numeric(df_station[variable_station], errors='coerce')

    # Drop rows with NaN values
    df_combined = df_combined.dropna(subset=[f'{variable}_OpenMeteo', f'{variable}_WeatherAPI'])
    df_station = df_station.dropna(subset=[variable_station])

    # Add Hour and Month columns
    df_combined['Hour'] = df_combined.index.hour
    df_combined['Month'] = df_combined.index.month
    df_station['Hour'] = df_station.index.hour
    df_station['Month'] = df_station.index.month

    # Group by Month and Hour
    grouped_open_meteo = df_combined.groupby(['Month', 'Hour'])[f'{variable}_OpenMeteo']
    grouped_weatherapi = df_combined.groupby(['Month', 'Hour'])[f'{variable}_WeatherAPI']
    grouped_station = df_station.groupby(['Month', 'Hour'])[variable_station]

    mean_open_meteo = grouped_open_meteo.mean().unstack(level=0)
    std_open_meteo = grouped_open_meteo.std().unstack(level=0)

    mean_weatherapi = grouped_weatherapi.mean().unstack(level=0)
    std_weatherapi = grouped_weatherapi.std().unstack(level=0)

    mean_station = grouped_station.mean().unstack(level=0)
    std_station = grouped_station.std().unstack(level=0)

    # Create subplots
    fig, axes = plt.subplots(4, 3, figsize=(15, 12), sharex=True, sharey=True)
    fig.suptitle(f'Monthly {ylabel} Trends by Hour (with Standard Deviation)', fontsize=16)

    for month in range(1, 13):
        ax = axes[(month - 1) // 3, (month - 1) % 3]  # Calculate subplot position

        if month in mean_open_meteo.columns:
            ax.errorbar(mean_open_meteo.index, mean_open_meteo[month], yerr=std_open_meteo[month],
                        fmt='-o', label='Open-Meteo', capsize=3)
        if month in mean_weatherapi.columns:
            ax.errorbar(mean_weatherapi.index, mean_weatherapi[month], yerr=std_weatherapi[month],
                        fmt='-x', label='WeatherAPI', capsize=3)
        if month in mean_station.columns:
            ax.errorbar(mean_station.index, mean_station[month], yerr=std_station[month],
                        fmt='-*', label='Station', capsize=3)

        ax.set_title(f'Month {month}')
        ax.grid(True)
        ax.set_xticks(range(0, 24, 3))  # Show every 3rd hour for clarity
        ax.set_xlabel('Hour of the Day')
        ax.set_ylabel(ylabel)

    # Add a single legend for all plots
    handles, labels = ax.get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper center', ncol=3)

    plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust layout to fit the title and legend
    plt.show()

In [None]:
station_df['Time'] = station_df['Time'].astype(str)
station_df['Time'] = station_df['Time'].apply(
    lambda x: x.zfill(2) + ":00:00" if x.isdigit() and len(x) <= 2 else x
)

station_df['DateTime'] = pd.to_datetime(
    station_df["Year"].astype(str) + "-" +
    station_df['Month'].astype(str).str.zfill(2) + "-" +
    station_df['Day'].astype(str).str.zfill(2) + " " +
    station_df['Time'].astype(str), # Remove .str to access the string values directly
    errors='coerce'  # Handle errors by setting invalid values to NaT
)

station_df.set_index('DateTime', inplace=True)
station_df['DateTime'] = station_df.index

date_from_dt = pd.to_datetime(date_from)
date_to_dt = pd.to_datetime(date_to)
station_df_subset = station_df[(station_df['DateTime'] >= date_from_dt) & (station_df['DateTime'] <= date_to_dt)]
station_df_subset

# Fetch data from both APIs
data_open_meteo = fetch_weather_data_open_meteo(latitude, longitude, date_from, date_to)
data_weatherapi = fetch_weather_data_weatherapi(latitude, longitude, date_from, date_to)

# Process the data
df_open_meteo = process_open_meteo_data(data_open_meteo)
df_weatherapi = process_weatherapi_data(data_weatherapi)

# Merge the dataframes on time index
df_combined = pd.merge(df_open_meteo, df_weatherapi, left_index=True, right_index=True, how='outer')
df_combined


In [None]:
plot_comparison(df_combined, 'Temperature', 'Temperature (°C)',station_df,"Ta")