In [1]:
import requests
import pandas as pd
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import copy
from tqdm import tqdm


In [2]:
df = pd.read_csv('/Users/user/Desktop/india_monitoring_locations_filtered.csv')
df_guwahati = df[df['city'].str.contains('Guwahati', case=False, na=False)]


In [3]:
df_guwahati

Unnamed: 0,id,name,city,country,latitude,longitude,parameters
171,6941,"Railway Colony, Guwahati - APCB","Railway Colony, Guwahati - APCB",IN,26.1445,91.7362,
293,10903,"Railway Colony, Guwahati - APCB","Railway Colony, Guwahati - APCB",IN,26.181742,91.78063,
339,42240,"Pan Bazaar, Guwahati - APCB","Pan Bazaar, Guwahati - APCB",IN,26.1875,91.744194,
452,361411,"IITG, Guwahati - PCBA","IITG, Guwahati - PCBA",IN,26.202864,91.700464,
495,3409360,"IITG, Guwahati - PCBA","IITG, Guwahati - PCBA",IN,26.202864,91.700464,
525,3409390,"LGBI Airport, Guwahati - PCBA","LGBI Airport, Guwahati - PCBA",IN,26.10887,91.589544,


In [4]:
guwahati_locations = df_guwahati[['id', 'name', 'latitude', 'longitude']].dropna()

city_data = []
for _, row in guwahati_locations.iterrows():
    city_data.append({
        "Station": row['name']+"_"+str(row['id']),
        "Latitude": row['latitude'],
        "Longitude": row['longitude']
    })

df_stations = pd.DataFrame(city_data)


In [5]:
import time
import requests
import pandas as pd
from tqdm import tqdm

def get_historical_weather(latitude, longitude, start_date, end_date):
    """
    Fetches historical daily weather data for a given location and date range.
    Implements retry logic if the API rate limit is exceeded.
    Returns: DataFrame with columns: date, temp_min, temp_max, humidity, pressure, wind_speed, precipitation, day_of_year, month, day
    """
    base_url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "daily": "temperature_2m_min,temperature_2m_max,precipitation_sum,relative_humidity_2m_mean,surface_pressure_mean,windspeed_10m_max",
        "timezone": "auto"
    }

    while True:
        try:
            response = requests.get(base_url, params=params)
            if response.status_code == 429:
                print("Rate limit exceeded. Waiting 60 seconds before retrying...")
                time.sleep(60)
                continue

            response.raise_for_status()
            data = response.json()

            df = pd.DataFrame({
                "date": pd.to_datetime(data["daily"]["time"]),
                "temp_min": data["daily"]["temperature_2m_min"],
                "temp_max": data["daily"]["temperature_2m_max"],
                "humidity": data["daily"]["relative_humidity_2m_mean"],
                "pressure": data["daily"]["surface_pressure_mean"],
                "wind_speed": data["daily"]["windspeed_10m_max"],
                "precipitation": data["daily"]["precipitation_sum"]
            })

            # Add additional time features
            df["day_of_year"] = df["date"].dt.dayofyear
            df["month"] = df["date"].dt.month
            df["day"] = df["date"].dt.day

            return df

        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying in 60 seconds...")
            time.sleep(60)


In [6]:
import time
from tqdm import tqdm

start_date = "2015-01-01"
end_date = "2024-12-31"
weather_data = {}

for i in tqdm(range(df_stations.shape[0]), desc="Fetching Weather Data", unit="city"):  
    city = df_stations.iloc[i, 0] 
    latitude = df_stations.iloc[i, 1]  
    longitude = df_stations.iloc[i, 2]  

    try:
        weather_df = get_historical_weather(latitude, longitude, start_date, end_date)
        weather_data[city] = weather_df
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching data for {city}: {e}")
    
    time.sleep(2)  # Wait for 2 seconds before making the next request


Fetching Weather Data:  83%|████████████████▋   | 5/6 [00:27<00:04,  4.43s/city]

Rate limit exceeded. Waiting 60 seconds before retrying...


Fetching Weather Data: 100%|████████████████████| 6/6 [01:35<00:00, 15.94s/city]


In [7]:
weather_data

{'Railway Colony, Guwahati - APCB_6941':            date  temp_min  temp_max  humidity  pressure  wind_speed  \
 0    2015-01-01      13.9      25.8        69    1009.9         9.2   
 1    2015-01-02      16.6      26.3        69    1010.3         7.0   
 2    2015-01-03      18.0      24.5        72    1010.2         7.1   
 3    2015-01-04      17.2      22.8        85    1007.0        11.5   
 4    2015-01-05      14.9      22.1        78    1005.8        20.2   
 ...         ...       ...       ...       ...       ...         ...   
 3648 2024-12-27      11.1      24.1        81    1012.3         8.2   
 3649 2024-12-28      12.0      24.6        79    1010.8         9.7   
 3650 2024-12-29      12.1      23.9        78    1009.0         6.9   
 3651 2024-12-30      13.0      24.5        79    1010.2         7.3   
 3652 2024-12-31      12.4      24.2        83    1010.1         6.7   
 
       precipitation  day_of_year  month  day  
 0               0.0            1      1    1 

In [8]:
merged_df = pd.DataFrame()

for name, df in weather_data.items():
    df_copy = df.copy()
    df_copy["Station"] = name  
    merged_df = pd.concat([merged_df, df_copy], ignore_index=True)


In [9]:
merged_df

Unnamed: 0,date,temp_min,temp_max,humidity,pressure,wind_speed,precipitation,day_of_year,month,day,Station
0,2015-01-01,13.9,25.8,69,1009.9,9.2,0.0,1,1,1,"Railway Colony, Guwahati - APCB_6941"
1,2015-01-02,16.6,26.3,69,1010.3,7.0,0.0,2,1,2,"Railway Colony, Guwahati - APCB_6941"
2,2015-01-03,18.0,24.5,72,1010.2,7.1,2.7,3,1,3,"Railway Colony, Guwahati - APCB_6941"
3,2015-01-04,17.2,22.8,85,1007.0,11.5,1.7,4,1,4,"Railway Colony, Guwahati - APCB_6941"
4,2015-01-05,14.9,22.1,78,1005.8,20.2,0.9,5,1,5,"Railway Colony, Guwahati - APCB_6941"
...,...,...,...,...,...,...,...,...,...,...,...
21913,2024-12-27,13.3,24.4,77,1012.7,11.2,0.0,362,12,27,"LGBI Airport, Guwahati - PCBA_3409390"
21914,2024-12-28,12.8,25.1,75,1011.2,11.3,0.0,363,12,28,"LGBI Airport, Guwahati - PCBA_3409390"
21915,2024-12-29,13.4,23.8,80,1009.6,6.9,0.0,364,12,29,"LGBI Airport, Guwahati - PCBA_3409390"
21916,2024-12-30,14.3,24.4,78,1010.7,8.6,0.0,365,12,30,"LGBI Airport, Guwahati - PCBA_3409390"


In [10]:
merged_df.to_csv('/Users/user/Desktop/Guwahati_weather.csv')