In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!apt-get install awscli -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  docutils-common fonts-droid-fallback fonts-noto-mono fonts-urw-base35
  ghostscript groff gsfonts imagemagick imagemagick-6-common imagemagick-6.q16
  libdjvulibre-text libdjvulibre21 libfftw3-double3 libgs9 libgs9-common
  libidn12 libijs-0.35 libimagequant0 libjbig2dec0 libjxr-tools libjxr0
  liblqr-1-0 libmagickcore-6.q16-6 libmagickcore-6.q16-6-extra
  libmagickwand-6.q16-6 libnetpbm10 libraqm0 libwmflite-0.2-7 netpbm
  poppler-data psutils python3-botocore python3-certifi python3-chardet
  python3-colorama python3-dateutil python3-docutils python3-idna
  python3-jmespath python3-olefile python3-pil python3-pyasn1 python3-pygments
  python3-requests python3-roman python3-rsa python3-s3transfer
  python3-urllib3 python3-yaml sgml-base xml-core
Suggested packages:
  fonts-noto fonts-freefont-otf | fonts-freefont-ttf fonts-texgyre
  gh

In [None]:
!pip install --upgrade pip
!pip install --upgrade urllib3==1.26.16 pyOpenSSL

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting urllib3==1.26.16
  Downloading urllib3-1.26.16-py2.py3-none-any.whl.metadata (48 kB)
Collecting pyOpenSSL
  Downloading pyopenssl-25.3.0-py3-none-any.whl.metadata (17 kB)
Collecting cryptography<47,>=45.0.7 (from pyOpenSSL)
  Downloading cryptography-46.0.2-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Downloading urllib3-1.26.16-py2.py3-none-any.whl (143 kB)
Downloading pyopenssl-25.3.0-py3-none-any.whl (57 kB)
Downloading cryptography-46.0.2-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

In [None]:
!aws --version

/bin/bash: line 1: aws: command not found


In [None]:
import subprocess
import os
import json
import pandas as pd
import glob

# Load extracted location IDs
with open("extracted_locations.json", "r") as f:
    locations = json.load(f)

def download_location_years(location_ids, start_year, end_year, output_dir="/content/openaq_merged"):
    os.makedirs(output_dir, exist_ok=True)

    for loc in location_ids:
        loc_id = loc["id"]
        loc_name = loc.get("name", f"loc_{loc_id}")
        print(f"\n=== Processing location: {loc_id} ({loc_name}) ===")

        for year in range(start_year, end_year + 1):
            s3_path = f"s3://openaq-data-archive/records/csv.gz/locationid={loc_id}/year={year}/"
            local_raw_dir = f"/content/openaq_raw/locationid={loc_id}/year={year}/"
            merged_output_path_parquet = f"{output_dir}/locationid={loc_id}_year={year}.parquet"
            merged_output_path_csv = f"{output_dir}/locationid={loc_id}_year={year}.csv"

            # Skip if merged file already exists
            if os.path.exists(merged_output_path_parquet) or os.path.exists(merged_output_path_csv):
                print(f"Merged data for location {loc_id}, year {year} already exists. Skipping download and merge.")
                continue

            # Check if folder exists on S3
            result = subprocess.run(
                ["aws", "s3", "ls", s3_path, "--no-sign-request"],
                capture_output=True, text=True
            )

            if result.stdout.strip() == "":
                print(f"❌ No data for location {loc_id} in {year}")
                continue

            # Download recursively
            print(f"✅ Downloading data for location {loc_id}, year {year} to {local_raw_dir}")
            os.makedirs(local_raw_dir, exist_ok=True)
            subprocess.run([
                "aws", "s3", "cp", "--recursive", "--no-sign-request", s3_path,
                local_raw_dir
            ])

            # Merge CSVs and save as Parquet
            print(f"Merging CSVs for location {loc_id}, year {year} and saving as Parquet")
            all_files = glob.glob(os.path.join(local_raw_dir, "*.csv.gz"))
            if not all_files:
                print(f"No CSV files found for location {loc_id}, year {year}. Skipping merge.")
                continue

            try:
                df = pd.concat((pd.read_csv(f, compression='gzip', low_memory=False) for f in all_files), ignore_index=True)
                df.to_parquet(merged_output_path_parquet)
                print(f"Successfully merged and saved data for location {loc_id}, year {year} to {merged_output_path_parquet}")
            except Exception as e:
                print(f"Error merging and saving data for location {loc_id}, year {year}: {e}")

            # Clean up raw CSVs to save space
            print(f"Cleaning up raw CSV files in {local_raw_dir}")
            subprocess.run(["rm", "-rf", local_raw_dir])


# Example: run for all locations in the file
download_location_years(locations, start_year=2000, end_year=2010)

# Example: run for a subset of locations to test
# download_location_years(locations[:2], start_year=2020, end_year=2024)


=== Processing location: 12 (SPARTAN - IIT Kanpur) ===
❌ No data for location 12 in 2000
❌ No data for location 12 in 2001
❌ No data for location 12 in 2002
❌ No data for location 12 in 2003
❌ No data for location 12 in 2004
❌ No data for location 12 in 2005
❌ No data for location 12 in 2006
❌ No data for location 12 in 2007
❌ No data for location 12 in 2008
❌ No data for location 12 in 2009
❌ No data for location 12 in 2010

=== Processing location: 13 (Delhi Technological University, Delhi - CPCB) ===
❌ No data for location 13 in 2000
❌ No data for location 13 in 2001
❌ No data for location 13 in 2002
❌ No data for location 13 in 2003
❌ No data for location 13 in 2004
❌ No data for location 13 in 2005
❌ No data for location 13 in 2006
❌ No data for location 13 in 2007
❌ No data for location 13 in 2008
❌ No data for location 13 in 2009
❌ No data for location 13 in 2010

=== Processing location: 15 (IGI Airport) ===
❌ No data for location 15 in 2000
❌ No data for location 15 in 2001
❌

In [None]:
import os
import glob
import pandas as pd

def merge_raw_data_by_year(raw_dir="/content/openaq_raw", output_dir="/content/openaq_merged_yearly"):
    os.makedirs(output_dir, exist_ok=True)
    year_data = {}

    # Iterate through the raw data directory structure
    for location_dir in glob.glob(os.path.join(raw_dir, "locationid=*")):
        for year_dir in glob.glob(os.path.join(location_dir, "year=*")):
            year = os.path.basename(year_dir).split("=")[1]
            all_files_year = glob.glob(os.path.join(year_dir, "**/*.csv.gz"), recursive=True)

            if not all_files_year:
                print(f"No CSV files found for year {year} in {location_dir}. Skipping.")
                continue

            print(f"Processing year {year} in {location_dir}...")

            # Read and concatenate CSVs for the current year
            try:
                df_year = pd.concat((pd.read_csv(f, compression='gzip', low_memory=False) for f in all_files_year), ignore_index=True)
                if year not in year_data:
                    year_data[year] = []
                year_data[year].append(df_year)
            except Exception as e:
                print(f"Error processing year {year} in {location_dir}: {e}")


    # Merge dataframes for each year and save as Parquet
    for year, dfs in year_data.items():
        merged_df = pd.concat(dfs, ignore_index=True)
        output_path = os.path.join(output_dir, f"merged_openaq_{year}.parquet")
        try:
            merged_df.to_parquet(output_path)
            print(f"Successfully merged and saved data for year {year} to {output_path}")
        except Exception as e:
            print(f"Error saving merged data for year {year}: {e}")


# Run the merging function
merge_raw_data_by_year()

Processing year 2019 in /content/openaq_raw/locationid=5622...
Processing year 2018 in /content/openaq_raw/locationid=5622...
Processing year 2016 in /content/openaq_raw/locationid=716...
Processing year 2017 in /content/openaq_raw/locationid=716...
Processing year 2016 in /content/openaq_raw/locationid=594...
Processing year 2017 in /content/openaq_raw/locationid=594...
Processing year 2018 in /content/openaq_raw/locationid=594...
Processing year 2016 in /content/openaq_raw/locationid=2594...
Processing year 2017 in /content/openaq_raw/locationid=2594...
Processing year 2018 in /content/openaq_raw/locationid=2594...
Processing year 2019 in /content/openaq_raw/locationid=5613...
Processing year 2018 in /content/openaq_raw/locationid=5613...
Processing year 2019 in /content/openaq_raw/locationid=5593...
Processing year 2018 in /content/openaq_raw/locationid=5593...
Processing year 2016 in /content/openaq_raw/locationid=378...
Processing year 2017 in /content/openaq_raw/locationid=378...

In [None]:
import os
import glob
import shutil

def save_to_drive(source_dir="/content/openaq_merged_yearly", destination_dir="/content/drive/MyDrive/openaq_data"):
    os.makedirs(destination_dir, exist_ok=True)
    parquet_files = glob.glob(os.path.join(source_dir, "*.parquet"))

    if not parquet_files:
        print(f"No Parquet files found in {source_dir}")
        return

    for parquet_file in parquet_files:
        file_name = os.path.basename(parquet_file)
        destination_path = os.path.join(destination_dir, file_name)

        if os.path.exists(destination_path):
            print(f"File {file_name} already exists in {destination_dir}. Skipping copy.")
            continue

        try:
            shutil.copy(parquet_file, destination_dir)
            print(f"Copied {parquet_file} to {destination_dir}")
        except Exception as e:
            print(f"Error copying {parquet_file} to {destination_dir}: {e}")

save_to_drive()

File merged_openaq_2017.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2019.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2014.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2013.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2016.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2018.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.
File merged_openaq_2015.parquet already exists in /content/drive/MyDrive/openaq_data. Skipping copy.


In [None]:
import os
import glob
import pandas as pd

def combine_parquet_files(source_dir="/content/drive/MyDrive/openaq_data", output_path="/content/drive/MyDrive/combined_openaq_data.parquet"):
    parquet_files = glob.glob(os.path.join(source_dir, "*.parquet"))

    if not parquet_files:
        print(f"No Parquet files found in {source_dir}")
        return

    print(f"Combining {len(parquet_files)} Parquet files from {source_dir}")

    # Read and concatenate all parquet files
    try:
        combined_df = pd.concat((pd.read_parquet(f) for f in parquet_files), ignore_index=True)
        print("Successfully combined Parquet files.")
    except Exception as e:
        print(f"Error combining Parquet files: {e}")
        return

    # Save the combined dataframe to a new parquet file
    try:
        combined_df.to_parquet(output_path)
        print(f"Successfully saved combined data to {output_path}")
    except Exception as e:
        print(f"Error saving combined data to {output_path}: {e}")

# Run the combine function
combine_parquet_files()

Combining 10 Parquet files from /content/drive/MyDrive/openaq_data
Successfully combined Parquet files.
Successfully saved combined data to /content/drive/MyDrive/combined_openaq_data.parquet


In [None]:
import pandas as pd

# Load the combined parquet file
combined_df = pd.read_parquet("/content/drive/MyDrive/combined_openaq_data.parquet")

# Convert 'datetime' column to datetime objects
combined_df['datetime'] = pd.to_datetime(combined_df['datetime'])

# Extract year, month, day, and time
combined_df['year'] = combined_df['datetime'].dt.year
combined_df['month'] = combined_df['datetime'].dt.month
combined_df['day'] = combined_df['datetime'].dt.day
combined_df['time'] = combined_df['datetime'].dt.time

# Display the first few rows with new columns
display(combined_df.head())

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value,year,month,day,time
0,5622,34957,"NSIT Dwarka, Delhi - CPCB-5622",2022-09-26 00:30:00+05:30,28.60909,77.032541,pm10,µg/m³,102.4,2022,9,26,00:30:00
1,5622,34957,"NSIT Dwarka, Delhi - CPCB-5622",2022-09-26 02:30:00+05:30,28.60909,77.032541,pm10,µg/m³,130.6,2022,9,26,02:30:00
2,5622,34957,"NSIT Dwarka, Delhi - CPCB-5622",2022-09-26 03:30:00+05:30,28.60909,77.032541,pm10,µg/m³,126.0,2022,9,26,03:30:00
3,5622,34957,"NSIT Dwarka, Delhi - CPCB-5622",2022-09-26 04:30:00+05:30,28.60909,77.032541,pm10,µg/m³,99.4,2022,9,26,04:30:00
4,5622,34957,"NSIT Dwarka, Delhi - CPCB-5622",2022-09-26 05:30:00+05:30,28.60909,77.032541,pm10,µg/m³,94.1,2022,9,26,05:30:00


# Task
Use the `combined_df` to create new columns for each unique parameter, including their respective units and values.

## Identify unique parameters

### Subtask:
Identify all unique values in the 'parameter' column of the `combined_df`.


**Reasoning**:
Access the 'parameter' column and find the unique values to identify all unique parameters.



In [None]:
unique_parameters = combined_df['parameter'].unique()
print(unique_parameters)

['pm10' 'pm25' 'o3' 'co' 'no2' 'so2']


## Pivot the dataframe

### Subtask:
Reshape the DataFrame so that each unique parameter becomes a separate set of columns for its 'value' and 'units'.


**Reasoning**:
Reshape the DataFrame by pivoting it to have parameters as columns with their corresponding values and units.



**Reasoning**:
The pivot_table method defaults to using the mean aggregation function, which fails on the 'units' column because it is of object dtype (strings). I need to explicitly specify an aggregation function that works for both numeric ('value') and object ('units') columns, such as 'first' or 'last', since for a given datetime, location, and parameter, there should only be one value and one unit.



In [None]:
pivoted_df = combined_df.pivot_table(
    index=['location_id', 'sensors_id', 'location', 'datetime', 'lat', 'lon', 'year', 'month', 'day', 'time'],
    columns='parameter',
    values=['value', 'units'],
    aggfunc='first' # Use 'first' as the aggregation function
)

# Reset the index to make location_id, sensors_id, and location regular columns
pivoted_df = pivoted_df.reset_index()

display(pivoted_df.head())

Unnamed: 0_level_0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,units,units,units,units,value,value,value,value,value,value
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,o3,pm10,pm25,so2,co,no2,o3,pm10,pm25,so2
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00+05:30,26.519,80.233,2013,12,14,16:00:00,...,,,µg/m³,,,,,,106.5,
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00+05:30,26.519,80.233,2013,12,14,17:00:00,...,,,µg/m³,,,,,,127.6,
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00+05:30,26.519,80.233,2013,12,14,18:00:00,...,,,µg/m³,,,,,,124.0,
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00+05:30,26.519,80.233,2013,12,14,19:00:00,...,,,µg/m³,,,,,,84.9,
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00+05:30,26.519,80.233,2013,12,14,20:00:00,...,,,µg/m³,,,,,,36.8,


In [None]:
display(pivoted_df.head())
display(pivoted_df.info())

Unnamed: 0_level_0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,units,units,units,units,value,value,value,value,value,value
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,o3,pm10,pm25,so2,co,no2,o3,pm10,pm25,so2
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00+05:30,26.519,80.233,2013,12,14,16:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,106.5,0.0
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00+05:30,26.519,80.233,2013,12,14,17:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,127.6,0.0
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00+05:30,26.519,80.233,2013,12,14,18:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,124.0,0.0
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00+05:30,26.519,80.233,2013,12,14,19:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,84.9,0.0
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00+05:30,26.519,80.233,2013,12,14,20:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,36.8,0.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14841314 entries, 0 to 14841313
Data columns (total 22 columns):
 #   Column           Dtype                    
---  ------           -----                    
 0   (location_id, )  int64                    
 1   (sensors_id, )   int64                    
 2   (location, )     object                   
 3   (datetime, )     datetime64[ns, UTC+05:30]
 4   (lat, )          float64                  
 5   (lon, )          float64                  
 6   (year, )         int32                    
 7   (month, )        int32                    
 8   (day, )          int32                    
 9   (time, )         object                   
 10  (units, co)      object                   
 11  (units, no2)     object                   
 12  (units, o3)      object                   
 13  (units, pm10)    object                   
 14  (units, pm25)    object                   
 15  (units, so2)     object                   
 16  (value, co)     

None

In [None]:
# Fill missing values appropriately based on column data type
for col in pivoted_df.columns:
    if pivoted_df[col].dtype == 'object':
        # Fill object type columns (like units) with an empty string
        pivoted_df[col] = pivoted_df[col].fillna('').astype(str) # Explicitly convert to string
    else:
        # Fill numeric columns (like values) with 0
        pivoted_df[col] = pivoted_df[col].fillna(0)


# Display the head and null counts to verify
display(pivoted_df.head())
display(pivoted_df.isnull().sum())

Unnamed: 0_level_0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,units,units,units,units,value,value,value,value,value,value
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,o3,pm10,pm25,so2,co,no2,o3,pm10,pm25,so2
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00+05:30,26.519,80.233,2013,12,14,16:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,106.5,0.0
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00+05:30,26.519,80.233,2013,12,14,17:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,127.6,0.0
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00+05:30,26.519,80.233,2013,12,14,18:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,124.0,0.0
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00+05:30,26.519,80.233,2013,12,14,19:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,84.9,0.0
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00+05:30,26.519,80.233,2013,12,14,20:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,36.8,0.0


Unnamed: 0_level_0,Unnamed: 1_level_0,0
Unnamed: 0_level_1,parameter,Unnamed: 2_level_1
location_id,,0
sensors_id,,0
location,,0
datetime,,0
lat,,0
lon,,0
year,,0
month,,0
day,,0
time,,0


In [None]:
# Save the pivoted DataFrame to a Feather file as an alternative
output_path = "/content/drive/MyDrive/pivoted_openaq_data.feather"
try:
    pivoted_df.to_feather(output_path)
    print(f"Successfully saved pivoted data to {output_path}")
except Exception as e:
    print(f"Error saving pivoted data to {output_path}: {e}")

Successfully saved pivoted data to /content/drive/MyDrive/pivoted_openaq_data.feather


In [None]:
# Check the timezone of the datetime column and get unique timezones
timezone_info = combined_df['datetime'].dt.tz

# Check if timezone_info is a single timezone object or a Series of timezone objects
if isinstance(timezone_info, object) and isinstance(timezone_info.tzname(None), str):
    # It's a single timezone object
    unique_timezones = [timezone_info]
    num_unique_timezones = 1
else:
    # It's likely a Series of timezone objects
    unique_timezones = timezone_info.unique()
    num_unique_timezones = len(unique_timezones)


print(f"There are {num_unique_timezones} unique timezones in the 'datetime' column.")
print("The unique timezones are:")
print(unique_timezones)

There are 1 unique timezones in the 'datetime' column.
The unique timezones are:
[datetime.timezone(datetime.timedelta(seconds=19800))]


In [None]:
%pip install requests



## Fetch weather data for unique combinations

### Subtask:
Retry fetching historical weather data from the Open-Meteo API for the unique location-time combinations, implementing a delay to avoid hitting the rate limit.


In [None]:
import pandas as pd

output_path = "/content/drive/MyDrive/pivoted_openaq_data.feather"

try:
    pivoted_df = pd.read_feather(output_path)
    print(f"Successfully loaded data from {output_path}")
    display(pivoted_df.head())
except Exception as e:
    print(f"Error loading data from {output_path}: {e}")

Successfully loaded data from /content/drive/MyDrive/pivoted_openaq_data.feather


Unnamed: 0_level_0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,units,units,units,units,value,value,value,value,value,value
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,o3,pm10,pm25,so2,co,no2,o3,pm10,pm25,so2
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00+05:30,26.519,80.233,2013,12,14,16:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,106.5,0.0
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00+05:30,26.519,80.233,2013,12,14,17:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,127.6,0.0
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00+05:30,26.519,80.233,2013,12,14,18:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,124.0,0.0
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00+05:30,26.519,80.233,2013,12,14,19:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,84.9,0.0
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00+05:30,26.519,80.233,2013,12,14,20:00:00,...,,,µg/m³,,0.0,0.0,0.0,0.0,36.8,0.0


In [None]:
# Get unique location-year pairs
unique_loc_year = pivoted_df[['lat','lon','year']].drop_duplicates()
print("Total unique (lat,lon,year) pairs:", len(unique_loc_year))

Total unique (lat,lon,year) pairs: 392


In [None]:
import requests
import os
import pandas as pd
import time

def fetch_weather(lat, lon, year, save_dir="/content/drive/MyDrive/weather_data", max_retries=5):
    """Fetch weather for a given lat, lon, year and save as parquet with retries."""
    lat = float(lat)
    lon = float(lon)
    year = int(year)

    os.makedirs(save_dir, exist_ok=True)
    outfile = f"{save_dir}/weather_{lat}_{lon}_{year}.parquet"

    if os.path.exists(outfile):
        print(f"Skipping {lat},{lon},{year} (already exists)")
        return outfile

    start = f"{year}-01-01"
    end   = f"{year}-12-31"

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start,
        "end_date": end,
        "hourly": ["temperature_2m", "windspeed_10m", "relative_humidity_2m"],
        "timezone": "Asia/Kolkata"
    }

    retries = 0
    while retries < max_retries:
        try:
            r = requests.get("https://archive-api.open-meteo.com/v1/archive", params=params, timeout=60)
            r.raise_for_status()
            data = r.json()

            if "hourly" not in data:
                print(f"No data for {lat},{lon},{year}")
                return None

            weather_df = pd.DataFrame({
                "datetime": pd.to_datetime(data["hourly"]["time"]),
                "temperature": data["hourly"]["temperature_2m"],
                "wind": data["hourly"]["windspeed_10m"],
                "humidity": data["hourly"]["relative_humidity_2m"]
            })
            weather_df["lat"] = lat
            weather_df["lon"] = lon

            weather_df.to_parquet(outfile, index=False)
            print(f"Saved {outfile}")
            return outfile

        except requests.exceptions.RequestException as e:
            retries += 1
            wait_time = 2 ** retries   # exponential backoff (2,4,8,16,32 sec)
            print(f"Error fetching {lat},{lon},{year} ({e}), retry {retries}/{max_retries} in {wait_time}s...")
            time.sleep(wait_time)

    print(f"Failed after {max_retries} retries for {lat},{lon},{year}")
    return None


In [None]:
from tqdm import tqdm

for row in tqdm(unique_loc_year.itertuples(index=False), total=len(unique_loc_year)):
    # Access elements by index instead of attribute name
    lat = float(row[0])
    lon = float(row[1])
    year = int(row[2])

    fetch_weather(lat, lon, year)

  0%|          | 1/392 [00:01<06:46,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.519_80.233_2013.parquet


  1%|          | 2/392 [00:01<06:22,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.519_80.233_2014.parquet


  1%|          | 3/392 [00:03<06:32,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.744_77.12_2015.parquet


  1%|          | 4/392 [00:04<07:00,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.744_77.12_2016.parquet


  1%|▏         | 5/392 [00:05<06:52,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.744_77.12_2017.parquet


  2%|▏         | 6/392 [00:06<06:53,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.744_77.12_2018.parquet


  2%|▏         | 7/392 [00:07<06:48,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.56_77.094_2015.parquet


  2%|▏         | 8/392 [00:08<06:32,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6787_77.2262_2015.parquet


  2%|▏         | 9/392 [00:09<06:22,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_28.5648_77.1744_2015.parquet


  3%|▎         | 10/392 [00:10<06:26,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5648_77.1744_2016.parquet


  3%|▎         | 11/392 [00:11<06:30,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5648_77.1744_2017.parquet


  3%|▎         | 12/392 [00:12<06:31,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5648_77.1744_2018.parquet


  3%|▎         | 13/392 [00:13<06:19,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6683_77.1167_2015.parquet


  4%|▎         | 14/392 [00:14<06:22,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6683_77.1167_2016.parquet


  4%|▍         | 15/392 [00:15<06:27,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6683_77.1167_2017.parquet


  4%|▍         | 16/392 [00:16<06:27,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6683_77.1167_2018.parquet


  4%|▍         | 17/392 [00:17<06:27,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6235_77.2494_2016.parquet


  5%|▍         | 18/392 [00:18<06:25,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6235_77.2494_2017.parquet


  5%|▍         | 19/392 [00:19<06:25,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6235_77.2494_2018.parquet


  5%|▌         | 20/392 [00:20<06:15,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6508_77.3152_2015.parquet


  5%|▌         | 21/392 [00:21<06:18,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6508_77.3152_2016.parquet


  6%|▌         | 22/392 [00:22<06:18,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6508_77.3152_2017.parquet


  6%|▌         | 23/392 [00:23<06:14,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6508_77.3152_2018.parquet


  6%|▌         | 24/392 [00:24<06:05,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_28.6341_77.2005_2015.parquet


  6%|▋         | 25/392 [00:25<06:08,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6341_77.2005_2016.parquet


  7%|▋         | 26/392 [00:26<06:11,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6341_77.2005_2017.parquet


  7%|▋         | 27/392 [00:27<06:15,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6341_77.2005_2018.parquet


  7%|▋         | 28/392 [00:28<06:03,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_24.7489694_84.9438395_2016.parquet


  7%|▋         | 29/392 [00:29<05:56,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_24.7489694_84.9438395_2017.parquet


  8%|▊         | 30/392 [00:30<06:02,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.7489694_84.9438395_2018.parquet


  8%|▊         | 31/392 [00:31<05:54,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2016.parquet


  8%|▊         | 32/392 [00:32<06:01,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2017.parquet


  8%|▊         | 33/392 [00:33<06:05,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2018.parquet


  9%|▊         | 34/392 [00:34<06:07,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2019.parquet


  9%|▉         | 35/392 [00:35<06:07,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2020.parquet


  9%|▉         | 36/392 [00:36<06:06,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2021.parquet


  9%|▉         | 37/392 [00:37<06:06,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4501238_77.0263051_2022.parquet


 10%|▉         | 38/392 [00:38<06:06,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.876028_76.619464_2016.parquet


 10%|▉         | 39/392 [00:39<06:05,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.876028_76.619464_2017.parquet


 10%|█         | 40/392 [00:40<06:04,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.876028_76.619464_2018.parquet


 10%|█         | 41/392 [00:41<05:55,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.84578611_80.93874444_2016.parquet


 11%|█         | 42/392 [00:42<05:56,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.84578611_80.93874444_2017.parquet


 11%|█         | 43/392 [00:43<05:46,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.84578611_80.93874444_2018.parquet


 11%|█         | 44/392 [00:44<05:43,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.99711389_80.19151667_2016.parquet


 11%|█▏        | 45/392 [00:45<05:38,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.99711389_80.19151667_2017.parquet


 12%|█▏        | 46/392 [00:46<05:35,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.99711389_80.19151667_2018.parquet


 12%|█▏        | 47/392 [00:47<06:04,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2016.parquet


 12%|█▏        | 48/392 [00:49<06:02,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2017.parquet


 12%|█▎        | 49/392 [00:50<06:02,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2018.parquet


 13%|█▎        | 50/392 [00:51<06:00,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2019.parquet


 13%|█▎        | 51/392 [00:52<05:57,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2020.parquet


 13%|█▎        | 52/392 [00:53<05:55,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2021.parquet


 14%|█▎        | 53/392 [00:54<05:43,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.349694_78.451437_2022.parquet


 14%|█▍        | 54/392 [00:55<05:34,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.0339_77.51321111_2016.parquet


 14%|█▍        | 55/392 [00:56<06:14,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0339_77.51321111_2017.parquet


 14%|█▍        | 56/392 [00:57<06:07,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0339_77.51321111_2018.parquet


 15%|█▍        | 57/392 [00:58<05:50,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6821_77.305_2016.parquet


 15%|█▍        | 58/392 [00:59<05:51,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6821_77.305_2017.parquet


 15%|█▌        | 59/392 [01:00<05:50,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6821_77.305_2018.parquet


 15%|█▌        | 60/392 [01:01<06:05,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.91281111_77.60921944_2016.parquet


 16%|█▌        | 61/392 [01:02<05:48,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.91281111_77.60921944_2017.parquet


 16%|█▌        | 62/392 [01:03<05:46,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.91281111_77.60921944_2018.parquet


 16%|█▌        | 63/392 [01:05<06:03,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2016.parquet


 16%|█▋        | 64/392 [01:06<06:04,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2017.parquet


 17%|█▋        | 65/392 [01:07<06:02,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2018.parquet


 17%|█▋        | 66/392 [01:08<05:49,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2019.parquet


 17%|█▋        | 67/392 [01:09<05:35,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2020.parquet


 17%|█▋        | 68/392 [01:10<05:26,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2021.parquet


 18%|█▊        | 69/392 [01:11<05:20,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_22.06047_88.109737_2022.parquet


 18%|█▊        | 70/392 [01:12<05:24,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.2920639_73.0379111111111_2016.parquet


 18%|█▊        | 71/392 [01:13<05:26,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.2920639_73.0379111111111_2017.parquet


 18%|█▊        | 72/392 [01:14<05:25,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.2920639_73.0379111111111_2018.parquet


 19%|█▊        | 73/392 [01:15<05:17,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_22.627875_88.3804_2016.parquet


 19%|█▉        | 74/392 [01:16<05:13,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_22.627875_88.3804_2017.parquet


 19%|█▉        | 75/392 [01:17<05:07,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9773472_77.5706972222222_2016.parquet


 19%|█▉        | 76/392 [01:18<05:11,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9773472_77.5706972222222_2017.parquet


 20%|█▉        | 77/392 [01:18<05:06,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9773472_77.5706972222222_2018.parquet


 20%|█▉        | 78/392 [01:19<05:02,  1.04it/s]

Saved /content/drive/MyDrive/weather_data/weather_19.95_79.3_2016.parquet


 20%|██        | 79/392 [01:20<05:00,  1.04it/s]

Saved /content/drive/MyDrive/weather_data/weather_19.95_79.3_2017.parquet


 20%|██        | 80/392 [01:21<04:58,  1.05it/s]

Saved /content/drive/MyDrive/weather_data/weather_19.95_79.3_2018.parquet


 21%|██        | 81/392 [01:22<05:05,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2016.parquet


 21%|██        | 82/392 [01:23<05:11,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2017.parquet


 21%|██        | 83/392 [01:24<05:13,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2018.parquet


 21%|██▏       | 84/392 [01:25<05:06,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2019.parquet


 22%|██▏       | 85/392 [01:26<05:09,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2020.parquet


 22%|██▏       | 86/392 [01:27<05:01,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2021.parquet


 22%|██▏       | 87/392 [01:28<05:04,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_27.19865833_78.00598056_2022.parquet


 22%|██▏       | 88/392 [01:29<04:58,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_22.572646_88.363895_2016.parquet


 23%|██▎       | 89/392 [01:30<05:03,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.572646_88.363895_2017.parquet


 23%|██▎       | 90/392 [01:31<04:56,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.0762_85.4115_2016.parquet


 23%|██▎       | 91/392 [01:32<05:02,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.0762_85.4115_2017.parquet


 23%|██▎       | 92/392 [01:33<04:55,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.0762_85.4115_2018.parquet


 24%|██▎       | 93/392 [01:34<04:58,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.9738778_75.7738777777777_2016.parquet


 24%|██▍       | 94/392 [01:35<05:02,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9738778_75.7738777777777_2017.parquet


 24%|██▍       | 95/392 [01:36<04:55,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2016.parquet


 24%|██▍       | 96/392 [01:37<04:58,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2017.parquet


 25%|██▍       | 97/392 [01:38<05:00,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2018.parquet


 25%|██▌       | 98/392 [01:39<05:01,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2019.parquet


 25%|██▌       | 99/392 [01:41<05:01,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2020.parquet


 26%|██▌       | 100/392 [01:42<05:01,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2021.parquet


 26%|██▌       | 101/392 [01:43<05:00,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.83399722_80.8917361_2022.parquet


 26%|██▌       | 102/392 [01:44<04:51,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.443464_78.47489_2016.parquet


 26%|██▋       | 103/392 [01:44<04:44,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_22.547142_88.351048_2016.parquet


 27%|██▋       | 104/392 [01:45<04:39,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.052371_80.251932_2016.parquet


 27%|██▋       | 105/392 [01:46<04:36,  1.04it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.85272778_80.99632778_2016.parquet


 27%|██▋       | 106/392 [01:47<04:41,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_26.85272778_80.99632778_2017.parquet


 27%|██▋       | 107/392 [01:48<04:45,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.85272778_80.99632778_2018.parquet


 28%|██▊       | 108/392 [01:50<05:05,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6517_77.1581_2016.parquet


 28%|██▊       | 109/392 [01:51<05:00,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6517_77.1581_2017.parquet


 28%|██▊       | 110/392 [01:52<04:57,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6517_77.1581_2018.parquet


 28%|██▊       | 111/392 [01:53<04:47,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.99251389_80.23744722_2016.parquet


 29%|██▊       | 112/392 [01:54<04:38,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.99251389_80.23744722_2017.parquet


 29%|██▉       | 113/392 [01:55<04:40,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.99251389_80.23744722_2018.parquet


 29%|██▉       | 114/392 [01:56<04:33,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2016.parquet


 29%|██▉       | 115/392 [01:57<04:37,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2017.parquet


 30%|██▉       | 116/392 [01:58<04:39,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2018.parquet


 30%|██▉       | 117/392 [01:59<04:39,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2019.parquet


 30%|███       | 118/392 [02:00<04:40,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2020.parquet


 30%|███       | 119/392 [02:01<04:41,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2021.parquet


 31%|███       | 120/392 [02:02<04:42,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.7057778_76.8531805555555_2022.parquet


 31%|███       | 121/392 [02:03<04:33,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_18.497484_73.81349_2016.parquet


 31%|███       | 122/392 [02:04<04:30,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_18.497484_73.81349_2017.parquet


 31%|███▏      | 123/392 [02:05<04:33,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_18.497484_73.81349_2018.parquet


 32%|███▏      | 124/392 [02:06<04:26,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2016.parquet


 32%|███▏      | 125/392 [02:07<04:23,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2017.parquet


 32%|███▏      | 126/392 [02:08<04:18,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2018.parquet


 32%|███▏      | 127/392 [02:09<04:15,  1.04it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2019.parquet


 33%|███▎      | 128/392 [02:10<04:12,  1.05it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2020.parquet


 33%|███▎      | 129/392 [02:10<04:09,  1.05it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2021.parquet


 33%|███▎      | 130/392 [02:11<04:07,  1.06it/s]

Saved /content/drive/MyDrive/weather_data/weather_13.164544_80.26285_2022.parquet


 33%|███▎      | 131/392 [02:13<04:21,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.41012222_77.313775_2016.parquet


 34%|███▎      | 132/392 [02:14<04:22,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.41012222_77.313775_2017.parquet


 34%|███▍      | 133/392 [02:15<04:24,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.41012222_77.313775_2018.parquet


 34%|███▍      | 134/392 [02:16<04:20,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9916694_77.5458305555555_2016.parquet


 34%|███▍      | 135/392 [02:17<04:19,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9916694_77.5458305555555_2017.parquet


 35%|███▍      | 136/392 [02:18<04:12,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9916694_77.5458305555555_2018.parquet


 35%|███▍      | 137/392 [02:18<04:07,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_25.35055556_82.97833333_2016.parquet


 35%|███▌      | 138/392 [02:20<04:11,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_25.35055556_82.97833333_2017.parquet


 35%|███▌      | 139/392 [02:21<04:13,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.35055556_82.97833333_2018.parquet


 36%|███▌      | 140/392 [02:21<04:08,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.93890556_77.69727222_2016.parquet


 36%|███▌      | 141/392 [02:22<04:04,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.93890556_77.69727222_2017.parquet


 36%|███▌      | 142/392 [02:23<04:07,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.93890556_77.69727222_2018.parquet


 36%|███▋      | 143/392 [02:24<04:02,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_25.3636_85.0755_2016.parquet


 37%|███▋      | 144/392 [02:25<04:05,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_25.3636_85.0755_2017.parquet


 37%|███▋      | 145/392 [02:26<04:08,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3636_85.0755_2018.parquet
Error fetching 17.42329722,78.47469722,2016 (HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=60)), retry 1/5 in 2s...


 37%|███▋      | 146/392 [03:30<1:20:35, 19.66s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.42329722_78.47469722_2016.parquet


 38%|███▊      | 147/392 [03:31<57:28, 14.07s/it]  

Saved /content/drive/MyDrive/weather_data/weather_17.42329722_78.47469722_2017.parquet


 38%|███▊      | 148/392 [03:32<41:13, 10.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.42329722_78.47469722_2018.parquet


 38%|███▊      | 149/392 [03:34<31:26,  7.76s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.4703088_80.3251749_2016.parquet


 38%|███▊      | 150/392 [03:35<23:10,  5.75s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.4703088_80.3251749_2017.parquet


 39%|███▊      | 151/392 [03:36<17:25,  4.34s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.4703088_80.3251749_2018.parquet


 39%|███▉      | 152/392 [03:37<13:16,  3.32s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2016.parquet


 39%|███▉      | 153/392 [03:38<10:29,  2.63s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2017.parquet


 39%|███▉      | 154/392 [03:39<08:32,  2.16s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2018.parquet


 40%|███▉      | 155/392 [03:40<07:04,  1.79s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2019.parquet


 40%|███▉      | 156/392 [03:41<06:09,  1.56s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2020.parquet


 40%|████      | 157/392 [03:42<05:30,  1.41s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2021.parquet


 40%|████      | 158/392 [03:43<05:04,  1.30s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.6599188_75.9063906_2022.parquet


 41%|████      | 159/392 [03:44<04:46,  1.23s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.598096_77.189066_2016.parquet


 41%|████      | 160/392 [03:45<04:24,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.1494_72.9986_2016.parquet


 41%|████      | 161/392 [03:46<04:16,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.1494_72.9986_2017.parquet


 41%|████▏     | 162/392 [03:47<04:03,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.1494_72.9986_2018.parquet


 42%|████▏     | 163/392 [03:48<03:53,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.1494_72.9986_2019.parquet


 42%|████▏     | 164/392 [03:49<03:46,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_19.1494_72.9986_2020.parquet


 42%|████▏     | 165/392 [03:51<04:40,  1.24s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5908256_80.8574279_2018.parquet


 42%|████▏     | 166/392 [03:52<04:25,  1.17s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.647622_77.315809_2018.parquet


 43%|████▎     | 167/392 [03:53<04:16,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.647622_77.315809_2021.parquet


 43%|████▎     | 168/392 [03:54<04:07,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.647622_77.315809_2022.parquet


 43%|████▎     | 169/392 [03:55<04:01,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6670856_77.1301247_2018.parquet


 43%|████▎     | 170/392 [03:56<03:58,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.7256504_77.2011573_2018.parquet


 44%|████▎     | 171/392 [03:57<03:55,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.7256504_77.2011573_2019.parquet


 44%|████▍     | 172/392 [03:58<03:52,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.7256504_77.2011573_2021.parquet


 44%|████▍     | 173/392 [03:59<03:52,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.7256504_77.2011573_2022.parquet


 44%|████▍     | 174/392 [04:00<03:49,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.321907_75.578914_2018.parquet


 45%|████▍     | 175/392 [04:01<03:50,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.321907_75.578914_2019.parquet


 45%|████▍     | 176/392 [04:02<03:49,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.321907_75.578914_2020.parquet


 45%|████▌     | 177/392 [04:03<03:40,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.321907_75.578914_2021.parquet


 45%|████▌     | 178/392 [04:04<03:40,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.321907_75.578914_2022.parquet


 46%|████▌     | 179/392 [04:05<03:40,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.349388_76.366642_2018.parquet


 46%|████▌     | 180/392 [04:06<03:38,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.349388_76.366642_2019.parquet


 46%|████▌     | 181/392 [04:07<03:39,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.349388_76.366642_2020.parquet


 46%|████▋     | 182/392 [04:08<03:32,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.349388_76.366642_2021.parquet


 47%|████▋     | 183/392 [04:09<03:34,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.349388_76.366642_2022.parquet


 47%|████▋     | 184/392 [04:11<03:42,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.707909_86.41467_2018.parquet


 47%|████▋     | 185/392 [04:11<03:32,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.707909_86.41467_2019.parquet


 47%|████▋     | 186/392 [04:12<03:26,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.707909_86.41467_2020.parquet


 48%|████▊     | 187/392 [04:13<03:27,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.707909_86.41467_2021.parquet


 48%|████▊     | 188/392 [04:14<03:27,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.707909_86.41467_2022.parquet


 48%|████▊     | 189/392 [04:15<03:27,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9352049_77.6814488_2018.parquet


 48%|████▊     | 190/392 [04:16<03:20,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9352049_77.6814488_2019.parquet


 49%|████▊     | 191/392 [04:17<03:16,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9352049_77.6814488_2020.parquet


 49%|████▉     | 192/392 [04:18<03:20,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9352049_77.6814488_2021.parquet


 49%|████▉     | 193/392 [04:19<03:21,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9352049_77.6814488_2022.parquet


 49%|████▉     | 194/392 [04:20<03:16,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9135218_77.5950804_2018.parquet


 50%|████▉     | 195/392 [04:21<03:19,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9135218_77.5950804_2019.parquet


 50%|█████     | 196/392 [04:23<03:47,  1.16s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9135218_77.5950804_2020.parquet


 50%|█████     | 197/392 [04:24<03:38,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9135218_77.5950804_2021.parquet


 51%|█████     | 198/392 [04:25<03:32,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9135218_77.5950804_2022.parquet


 51%|█████     | 199/392 [04:26<03:22,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.62_74.876512_2018.parquet


 51%|█████     | 200/392 [04:27<03:22,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.62_74.876512_2019.parquet


 51%|█████▏    | 201/392 [04:28<03:20,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.62_74.876512_2020.parquet


 52%|█████▏    | 202/392 [04:29<03:18,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.62_74.876512_2021.parquet


 52%|█████▏    | 203/392 [04:30<03:11,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_31.62_74.876512_2022.parquet


 52%|█████▏    | 204/392 [04:31<03:11,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.870083_76.6205_2018.parquet


 52%|█████▏    | 205/392 [04:32<03:15,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.870083_76.6205_2019.parquet


 53%|█████▎    | 206/392 [04:34<03:41,  1.19s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.870083_76.6205_2020.parquet


 53%|█████▎    | 207/392 [04:35<03:27,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9099161_80.1076538_2018.parquet


 53%|█████▎    | 208/392 [04:36<03:21,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9099161_80.1076538_2019.parquet


 53%|█████▎    | 209/392 [04:37<03:12,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9099161_80.1076538_2020.parquet


 54%|█████▎    | 210/392 [04:38<03:06,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_12.9099161_80.1076538_2021.parquet


 54%|█████▍    | 211/392 [04:39<03:00,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9099161_80.1076538_2022.parquet


 54%|█████▍    | 212/392 [04:40<02:57,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_24.7955_84.9994_2018.parquet


 54%|█████▍    | 213/392 [04:41<02:59,  1.00s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.7955_84.9994_2019.parquet


 55%|█████▍    | 214/392 [04:42<03:00,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.7955_84.9994_2020.parquet


 55%|█████▍    | 215/392 [04:43<03:00,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.7955_84.9994_2021.parquet


 55%|█████▌    | 216/392 [04:44<02:55,  1.01it/s]

Saved /content/drive/MyDrive/weather_data/weather_24.7955_84.9994_2022.parquet


 55%|█████▌    | 217/392 [04:45<03:00,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.9775302_79.2337086_2018.parquet


 56%|█████▌    | 218/392 [04:46<03:01,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.9775302_79.2337086_2019.parquet


 56%|█████▌    | 219/392 [04:47<03:03,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.9775302_79.2337086_2020.parquet


 56%|█████▌    | 220/392 [04:48<03:01,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.9775302_79.2337086_2021.parquet


 56%|█████▋    | 221/392 [04:49<02:54,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.9775302_79.2337086_2022.parquet


 57%|█████▋    | 222/392 [04:50<02:49,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_20.9360711_85.1707021_2018.parquet


 57%|█████▋    | 223/392 [04:51<02:56,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.9360711_85.1707021_2019.parquet


 57%|█████▋    | 224/392 [04:52<02:59,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.9360711_85.1707021_2020.parquet


 57%|█████▋    | 225/392 [04:53<03:04,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.9360711_85.1707021_2021.parquet


 58%|█████▊    | 226/392 [04:54<03:10,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.9360711_85.1707021_2022.parquet


 58%|█████▊    | 227/392 [04:56<03:03,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.9028_75.8086_2018.parquet


 58%|█████▊    | 228/392 [04:56<02:53,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.9028_75.8086_2019.parquet


 58%|█████▊    | 229/392 [04:57<02:50,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.9028_75.8086_2020.parquet


 59%|█████▊    | 230/392 [04:59<02:49,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.9028_75.8086_2021.parquet


 59%|█████▉    | 231/392 [05:00<02:48,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_30.9028_75.8086_2022.parquet


 59%|█████▉    | 232/392 [05:01<02:47,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4706914_77.1099364_2018.parquet


 59%|█████▉    | 233/392 [05:02<02:41,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4706914_77.1099364_2019.parquet


 60%|█████▉    | 234/392 [05:03<02:43,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4706914_77.1099364_2020.parquet


 60%|█████▉    | 235/392 [05:04<02:39,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4706914_77.1099364_2021.parquet


 60%|██████    | 236/392 [05:05<02:39,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4706914_77.1099364_2022.parquet


 60%|██████    | 237/392 [05:06<02:34,  1.00it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9756843_77.5660749_2018.parquet


 61%|██████    | 238/392 [05:07<02:31,  1.02it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9756843_77.5660749_2019.parquet


 61%|██████    | 239/392 [05:07<02:28,  1.03it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9756843_77.5660749_2020.parquet


 61%|██████    | 240/392 [05:08<02:26,  1.04it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9756843_77.5660749_2021.parquet


 61%|██████▏   | 241/392 [05:09<02:24,  1.05it/s]

Saved /content/drive/MyDrive/weather_data/weather_12.9756843_77.5660749_2022.parquet


 62%|██████▏   | 242/392 [05:11<02:31,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.554793_76.611536_2018.parquet


 62%|██████▏   | 243/392 [05:12<02:37,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.554793_76.611536_2019.parquet


 62%|██████▏   | 244/392 [05:13<02:36,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.554793_76.611536_2020.parquet


 62%|██████▎   | 245/392 [05:14<02:35,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.554793_76.611536_2021.parquet


 63%|██████▎   | 246/392 [05:15<02:35,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_27.554793_76.611536_2022.parquet


 63%|██████▎   | 247/392 [05:16<02:34,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.610304_77.0996943_2018.parquet


 63%|██████▎   | 248/392 [05:17<02:44,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5886166_73.6321397_2018.parquet


 64%|██████▎   | 249/392 [05:18<02:34,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5886166_73.6321397_2019.parquet


 64%|██████▍   | 250/392 [05:19<02:31,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5886166_73.6321397_2020.parquet


 64%|██████▍   | 251/392 [05:20<02:30,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5886166_73.6321397_2021.parquet


 64%|██████▍   | 252/392 [05:21<02:28,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.5886166_73.6321397_2022.parquet


 65%|██████▍   | 253/392 [05:22<02:25,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5504249_77.2159377_2018.parquet


 65%|██████▍   | 254/392 [05:23<02:24,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5504249_77.2159377_2019.parquet


 65%|██████▌   | 255/392 [05:24<02:23,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5504249_77.2159377_2020.parquet


 65%|██████▌   | 256/392 [05:25<02:20,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5504249_77.2159377_2021.parquet


 66%|██████▌   | 257/392 [05:26<02:19,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5504249_77.2159377_2022.parquet


 66%|██████▌   | 258/392 [05:28<02:19,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.268249_73.0193853_2018.parquet


 66%|██████▌   | 259/392 [05:29<02:17,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.268249_73.0193853_2019.parquet


 66%|██████▋   | 260/392 [05:30<02:17,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.268249_73.0193853_2020.parquet


 67%|██████▋   | 261/392 [05:31<02:16,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.268249_73.0193853_2021.parquet


 67%|██████▋   | 262/392 [05:32<02:14,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.268249_73.0193853_2022.parquet


 67%|██████▋   | 263/392 [05:33<02:14,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.194909_76.862296_2018.parquet


 67%|██████▋   | 264/392 [05:34<02:13,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.194909_76.862296_2019.parquet


 68%|██████▊   | 265/392 [05:35<02:12,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.194909_76.862296_2020.parquet


 68%|██████▊   | 266/392 [05:36<02:09,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.194909_76.862296_2021.parquet


 68%|██████▊   | 267/392 [05:37<02:09,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.194909_76.862296_2022.parquet


 68%|██████▊   | 268/392 [05:38<02:09,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3505986_82.9083074_2018.parquet


 69%|██████▊   | 269/392 [05:39<02:09,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3505986_82.9083074_2019.parquet


 69%|██████▉   | 270/392 [05:40<02:09,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3505986_82.9083074_2020.parquet


 69%|██████▉   | 271/392 [05:41<02:09,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3505986_82.9083074_2021.parquet


 69%|██████▉   | 272/392 [05:42<02:06,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_25.3505986_82.9083074_2022.parquet


 70%|██████▉   | 273/392 [05:43<02:05,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.0073285_73.7762427_2018.parquet


 70%|██████▉   | 274/392 [05:44<02:04,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.0073285_73.7762427_2019.parquet


 70%|███████   | 275/392 [05:45<02:03,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.0073285_73.7762427_2020.parquet


 70%|███████   | 276/392 [05:46<02:02,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.0073285_73.7762427_2021.parquet


 71%|███████   | 277/392 [05:47<02:00,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_20.0073285_73.7762427_2022.parquet


 71%|███████   | 278/392 [05:49<01:59,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.192056_72.9585188_2018.parquet


 71%|███████   | 279/392 [05:50<01:59,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.192056_72.9585188_2019.parquet


 71%|███████▏  | 280/392 [05:51<01:55,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.192056_72.9585188_2020.parquet


 72%|███████▏  | 281/392 [05:52<01:55,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.192056_72.9585188_2021.parquet


 72%|███████▏  | 282/392 [05:53<01:51,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.192056_72.9585188_2022.parquet


 72%|███████▏  | 283/392 [05:54<01:59,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.1209_85.3647_2018.parquet


 72%|███████▏  | 284/392 [06:11<10:23,  5.78s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.1209_85.3647_2019.parquet


 73%|███████▎  | 285/392 [06:12<07:52,  4.42s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.1209_85.3647_2020.parquet


 73%|███████▎  | 286/392 [06:13<06:07,  3.47s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.1209_85.3647_2021.parquet


 73%|███████▎  | 287/392 [06:15<05:02,  2.88s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.1209_85.3647_2022.parquet


 73%|███████▎  | 288/392 [06:16<04:04,  2.35s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.5150833_80.5181667_2018.parquet


 74%|███████▎  | 289/392 [06:17<03:23,  1.98s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.5150833_80.5181667_2019.parquet


 74%|███████▍  | 290/392 [06:18<02:53,  1.70s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.5150833_80.5181667_2020.parquet


 74%|███████▍  | 291/392 [06:19<02:31,  1.50s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.5150833_80.5181667_2021.parquet


 74%|███████▍  | 292/392 [06:20<02:20,  1.41s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.5150833_80.5181667_2022.parquet


 75%|███████▍  | 293/392 [06:21<02:12,  1.34s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5447608_77.3231257_2018.parquet


 75%|███████▌  | 294/392 [06:22<02:02,  1.25s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5447608_77.3231257_2019.parquet


 75%|███████▌  | 295/392 [06:23<01:57,  1.22s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5447608_77.3231257_2020.parquet


 76%|███████▌  | 296/392 [06:24<01:49,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5447608_77.3231257_2021.parquet


 76%|███████▌  | 297/392 [06:25<01:46,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.5447608_77.3231257_2022.parquet


 76%|███████▌  | 298/392 [06:27<01:47,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.540891_78.358528_2018.parquet


 76%|███████▋  | 299/392 [06:28<01:46,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.540891_78.358528_2019.parquet


 77%|███████▋  | 300/392 [06:29<01:45,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.540891_78.358528_2020.parquet


 77%|███████▋  | 301/392 [06:30<01:44,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.540891_78.358528_2021.parquet


 77%|███████▋  | 302/392 [06:31<01:44,  1.16s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.540891_78.358528_2022.parquet


 77%|███████▋  | 303/392 [06:32<01:39,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.825341_78.7213009_2018.parquet


 78%|███████▊  | 304/392 [06:33<01:35,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.825341_78.7213009_2019.parquet


 78%|███████▊  | 305/392 [06:34<01:31,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.825341_78.7213009_2020.parquet


 78%|███████▊  | 306/392 [06:35<01:27,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.825341_78.7213009_2021.parquet


 78%|███████▊  | 307/392 [06:36<01:27,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.825341_78.7213009_2022.parquet


 79%|███████▊  | 308/392 [06:38<01:30,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.685297_86.945968_2018.parquet


 79%|███████▉  | 309/392 [06:39<01:29,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.685297_86.945968_2019.parquet


 79%|███████▉  | 310/392 [06:40<01:27,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.685297_86.945968_2020.parquet


 79%|███████▉  | 311/392 [06:41<01:25,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.685297_86.945968_2021.parquet


 80%|███████▉  | 312/392 [06:42<01:24,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.685297_86.945968_2022.parquet


 80%|███████▉  | 313/392 [06:43<01:27,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.624758_75.675238_2018.parquet


 80%|████████  | 314/392 [06:44<01:28,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.624758_75.675238_2019.parquet


 80%|████████  | 315/392 [06:45<01:30,  1.17s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.624758_75.675238_2020.parquet


 81%|████████  | 316/392 [06:46<01:26,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.624758_75.675238_2021.parquet


 81%|████████  | 317/392 [06:48<01:27,  1.17s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.624758_75.675238_2022.parquet


 81%|████████  | 318/392 [06:49<01:24,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.871428_80.957145_2018.parquet


 81%|████████▏ | 319/392 [06:50<01:21,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.871428_80.957145_2019.parquet


 82%|████████▏ | 320/392 [06:51<01:19,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0270199_77.494094_2018.parquet


 82%|████████▏ | 321/392 [06:52<01:18,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0270199_77.494094_2019.parquet


 82%|████████▏ | 322/392 [06:53<01:15,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0270199_77.494094_2020.parquet


 82%|████████▏ | 323/392 [06:54<01:14,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0270199_77.494094_2021.parquet


 83%|████████▎ | 324/392 [06:55<01:14,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_13.0270199_77.494094_2022.parquet


 83%|████████▎ | 325/392 [06:56<01:14,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.8004996_83.8396977_2018.parquet


 83%|████████▎ | 326/392 [06:57<01:12,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.8004996_83.8396977_2019.parquet


 83%|████████▎ | 327/392 [06:59<01:12,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.8004996_83.8396977_2020.parquet


 84%|████████▎ | 328/392 [07:00<01:11,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.8004996_83.8396977_2021.parquet


 84%|████████▍ | 329/392 [07:01<01:09,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.8004996_83.8396977_2022.parquet


 84%|████████▍ | 330/392 [07:02<01:05,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.9872867_81.7363176_2018.parquet


 84%|████████▍ | 331/392 [07:03<01:05,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.9872867_81.7363176_2019.parquet


 85%|████████▍ | 332/392 [07:04<01:06,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.9872867_81.7363176_2020.parquet


 85%|████████▍ | 333/392 [07:05<01:05,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.9872867_81.7363176_2021.parquet


 85%|████████▌ | 334/392 [07:06<01:04,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_16.9872867_81.7363176_2022.parquet


 85%|████████▌ | 335/392 [07:07<01:02,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6573814_77.1585447_2018.parquet


 86%|████████▌ | 336/392 [07:09<01:01,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6573814_77.1585447_2019.parquet


 86%|████████▌ | 337/392 [07:10<00:59,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6573814_77.1585447_2020.parquet


 86%|████████▌ | 338/392 [07:11<00:56,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6573814_77.1585447_2021.parquet


 86%|████████▋ | 339/392 [07:12<00:55,  1.05s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6573814_77.1585447_2022.parquet


 87%|████████▋ | 340/392 [07:13<00:55,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.645324_77.6345232_2018.parquet


 87%|████████▋ | 341/392 [07:14<00:56,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.645324_77.6345232_2019.parquet


 87%|████████▋ | 342/392 [07:15<00:54,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.645324_77.6345232_2020.parquet


 88%|████████▊ | 343/392 [07:16<00:54,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.645324_77.6345232_2021.parquet


 88%|████████▊ | 344/392 [07:17<00:55,  1.15s/it]

Saved /content/drive/MyDrive/weather_data/weather_19.645324_77.6345232_2022.parquet


 88%|████████▊ | 345/392 [07:19<00:56,  1.20s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9502929_75.730943_2018.parquet


 88%|████████▊ | 346/392 [07:20<00:53,  1.16s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9502929_75.730943_2019.parquet


 89%|████████▊ | 347/392 [07:21<00:50,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9502929_75.730943_2020.parquet


 89%|████████▉ | 348/392 [07:22<00:49,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9502929_75.730943_2021.parquet


 89%|████████▉ | 349/392 [07:23<00:46,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_26.9502929_75.730943_2022.parquet


 89%|████████▉ | 350/392 [07:24<00:45,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6316945_77.2494387_2018.parquet
Error fetching 28.6316945,77.2494387,2019 (HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=60)), retry 1/5 in 2s...


 90%|████████▉ | 351/392 [08:27<13:29, 19.73s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6316945_77.2494387_2019.parquet


 90%|████████▉ | 352/392 [08:28<09:25, 14.13s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6316945_77.2494387_2020.parquet


 90%|█████████ | 353/392 [08:29<06:38, 10.21s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.5687319_88.2797276_2018.parquet


 90%|█████████ | 354/392 [08:30<04:42,  7.43s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.5687319_88.2797276_2019.parquet


 91%|█████████ | 355/392 [08:31<03:23,  5.50s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.5687319_88.2797276_2020.parquet


 91%|█████████ | 356/392 [08:32<02:29,  4.16s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.5687319_88.2797276_2021.parquet


 91%|█████████ | 357/392 [08:33<01:52,  3.22s/it]

Saved /content/drive/MyDrive/weather_data/weather_22.5687319_88.2797276_2022.parquet


 91%|█████████▏| 358/392 [08:34<01:28,  2.59s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.5404352_87.2892225_2018.parquet


 92%|█████████▏| 359/392 [08:35<01:10,  2.13s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.5404352_87.2892225_2019.parquet


 92%|█████████▏| 360/392 [08:37<00:57,  1.80s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.5404352_87.2892225_2020.parquet


 92%|█████████▏| 361/392 [08:38<00:48,  1.57s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.5404352_87.2892225_2021.parquet


 92%|█████████▏| 362/392 [08:39<00:42,  1.43s/it]

Saved /content/drive/MyDrive/weather_data/weather_23.5404352_87.2892225_2022.parquet


 93%|█████████▎| 363/392 [08:40<00:37,  1.31s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6245479_77.3577104_2018.parquet


 93%|█████████▎| 364/392 [08:41<00:34,  1.23s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6245479_77.3577104_2019.parquet


 93%|█████████▎| 365/392 [08:42<00:31,  1.17s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6245479_77.3577104_2020.parquet


 93%|█████████▎| 366/392 [08:43<00:29,  1.14s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6245479_77.3577104_2021.parquet


 94%|█████████▎| 367/392 [08:44<00:27,  1.12s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.6245479_77.3577104_2022.parquet


 94%|█████████▍| 368/392 [08:45<00:26,  1.10s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4088421_77.3099081_2018.parquet


 94%|█████████▍| 369/392 [08:46<00:25,  1.11s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4088421_77.3099081_2019.parquet


 94%|█████████▍| 370/392 [08:47<00:24,  1.09s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4088421_77.3099081_2020.parquet


 95%|█████████▍| 371/392 [08:48<00:22,  1.08s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4088421_77.3099081_2021.parquet


 95%|█████████▍| 372/392 [08:49<00:21,  1.07s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.4088421_77.3099081_2022.parquet


 95%|█████████▌| 373/392 [08:50<00:20,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.10897_82.64558_2018.parquet


 95%|█████████▌| 374/392 [08:51<00:19,  1.06s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.10897_82.64558_2019.parquet


 96%|█████████▌| 375/392 [08:52<00:17,  1.02s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.10897_82.64558_2020.parquet


 96%|█████████▌| 376/392 [08:53<00:16,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.10897_82.64558_2021.parquet


 96%|█████████▌| 377/392 [08:54<00:15,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_24.10897_82.64558_2022.parquet


 96%|█████████▋| 378/392 [08:55<00:14,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.152875_79.0517531_2018.parquet


 97%|█████████▋| 379/392 [08:56<00:13,  1.01s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.152875_79.0517531_2019.parquet


 97%|█████████▋| 380/392 [08:57<00:12,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.152875_79.0517531_2020.parquet


 97%|█████████▋| 381/392 [08:58<00:11,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.152875_79.0517531_2021.parquet


 97%|█████████▋| 382/392 [09:00<00:10,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_21.152875_79.0517531_2022.parquet


 98%|█████████▊| 383/392 [09:01<00:09,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.60909_77.0325413_2018.parquet


 98%|█████████▊| 384/392 [09:02<00:08,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.60909_77.0325413_2019.parquet


 98%|█████████▊| 385/392 [09:03<00:07,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.60909_77.0325413_2020.parquet


 98%|█████████▊| 386/392 [09:04<00:06,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.60909_77.0325413_2021.parquet


 99%|█████████▊| 387/392 [09:05<00:05,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_28.60909_77.0325413_2022.parquet


 99%|█████████▉| 388/392 [09:06<00:04,  1.03s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.460103_78.334361_2018.parquet


 99%|█████████▉| 389/392 [09:07<00:03,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.460103_78.334361_2019.parquet


 99%|█████████▉| 390/392 [09:08<00:02,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.460103_78.334361_2020.parquet


100%|█████████▉| 391/392 [09:09<00:01,  1.04s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.460103_78.334361_2021.parquet


100%|██████████| 392/392 [09:10<00:00,  1.40s/it]

Saved /content/drive/MyDrive/weather_data/weather_17.460103_78.334361_2022.parquet





In [None]:
import glob
import pandas as pd

# Load all saved weather data
files = glob.glob("/content/drive/MyDrive/weather_data/*.parquet")
weather_all = pd.concat([pd.read_parquet(f) for f in files], ignore_index=True)

# Save the combined weather data to a parquet file
output_path = "/content/drive/MyDrive/combined_weather_data.parquet"
try:
    weather_all.to_parquet(output_path, index=False)
    print(f"Successfully saved combined weather data to {output_path}")
except Exception as e:
    print(f"Error saving combined weather data: {e}")

display(weather_all.head())
display(weather_all.info())

Successfully saved combined weather data to /content/drive/MyDrive/combined_weather_data.parquet


Unnamed: 0,datetime,temperature,wind,humidity,lat,lon
0,2013-01-01 00:00:00,10.9,8.4,81,26.519,80.233
1,2013-01-01 01:00:00,10.2,8.3,82,26.519,80.233
2,2013-01-01 02:00:00,9.6,7.6,84,26.519,80.233
3,2013-01-01 03:00:00,9.0,6.3,86,26.519,80.233
4,2013-01-01 04:00:00,8.5,6.2,87,26.519,80.233


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3436248 entries, 0 to 3436247
Data columns (total 6 columns):
 #   Column       Dtype         
---  ------       -----         
 0   datetime     datetime64[ns]
 1   temperature  float64       
 2   wind         float64       
 3   humidity     int64         
 4   lat          float64       
 5   lon          float64       
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 157.3 MB


None

In [None]:
import pandas as pd
weather_all = pd.read_parquet("/content/drive/MyDrive/combined_weather_data.parquet")
weather_all.head()
weather_all.shape

(3436248, 6)

In [None]:
weather_all.head()

Unnamed: 0,datetime,temperature,wind,humidity,lat,lon,datetime_hour,lat_round,lon_round
0,2013-01-01 00:00:00,10.9,8.4,81,26.519,80.233,2013-01-01 00:00:00,26.519,80.233
1,2013-01-01 01:00:00,10.2,8.3,82,26.519,80.233,2013-01-01 01:00:00,26.519,80.233
2,2013-01-01 02:00:00,9.6,7.6,84,26.519,80.233,2013-01-01 02:00:00,26.519,80.233
3,2013-01-01 03:00:00,9.0,6.3,86,26.519,80.233,2013-01-01 03:00:00,26.519,80.233
4,2013-01-01 04:00:00,8.5,6.2,87,26.519,80.233,2013-01-01 04:00:00,26.519,80.233


In [None]:
pivoted_df.head()

Unnamed: 0_level_0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,units,value,value,value,value,value,value,datetime_hour,lat_round,lon_round
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,so2,co,no2,o3,pm10,pm25,so2,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00,26.519,80.233,2013,12,14,16:00:00,...,,0.0,0.0,0.0,0.0,106.5,0.0,2013-12-14 16:00:00,26.519,80.233
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00,26.519,80.233,2013,12,14,17:00:00,...,,0.0,0.0,0.0,0.0,127.6,0.0,2013-12-14 17:00:00,26.519,80.233
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00,26.519,80.233,2013,12,14,18:00:00,...,,0.0,0.0,0.0,0.0,124.0,0.0,2013-12-14 18:00:00,26.519,80.233
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00,26.519,80.233,2013,12,14,19:00:00,...,,0.0,0.0,0.0,0.0,84.9,0.0,2013-12-14 19:00:00,26.519,80.233
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00,26.519,80.233,2013,12,14,20:00:00,...,,0.0,0.0,0.0,0.0,36.8,0.0,2013-12-14 20:00:00,26.519,80.233


In [None]:
# Check for data completeness per hour per day per month per year for each latitude
weather_all['year'] = weather_all['datetime'].dt.year
weather_all['month'] = weather_all['datetime'].dt.month
weather_all['day'] = weather_all['datetime'].dt.day
weather_all['hour'] = weather_all['datetime'].dt.hour

completeness_check = weather_all.groupby(['lat', 'year', 'month', 'day'])['hour'].nunique()

# Identify combinations with less than 24 hours of data
incomplete_days = completeness_check[completeness_check < 24]

if not incomplete_days.empty:
    print("Incomplete data found for the following location-year-month-day combinations (less than 24 hours):")
    display(incomplete_days)
else:
    print("Data seems to be complete for all location-year-month-day combinations.")

# Further check: number of unique datetimes per location-year
datetime_completeness_check = weather_all.groupby(['lat', 'lon', 'year'])['datetime'].nunique()

# Total expected hours in a year (considering leap years)
# This is a simplified check and might not be perfectly accurate for all years
expected_hours = weather_all.groupby('year')['datetime'].apply(lambda x: (x.max() - x.min()).days * 24 if not x.empty else 0)

print("\nNumber of unique datetimes per location-year:")
display(datetime_completeness_check)

# Compare with expected hours (simplified)
print("\nComparison with expected hours (simplified):")

# Reset the index of datetime_completeness_check to align with expected_hours
comparison_df = datetime_completeness_check.reset_index()
comparison_df = comparison_df.rename(columns={'datetime': 'unique_hours'})

# Merge with expected_hours
comparison = comparison_df.merge(
    expected_hours.rename('expected_hours_approx').reset_index(),
    on='year',
    how='left'
)

display(comparison)

Data seems to be complete for all location-year-month-day combinations.

Number of unique datetimes per location-year:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,datetime
lat,lon,year,Unnamed: 3_level_1
12.909916,80.107654,2018,8760
12.909916,80.107654,2019,8760
12.909916,80.107654,2020,8784
12.909916,80.107654,2021,8760
12.909916,80.107654,2022,8760
...,...,...,...
31.620000,74.876512,2018,8760
31.620000,74.876512,2019,8760
31.620000,74.876512,2020,8784
31.620000,74.876512,2021,8760



Comparison with expected hours (simplified):


Unnamed: 0,lat,lon,year,unique_hours,expected_hours_approx
0,12.909916,80.107654,2018,8760,8736
1,12.909916,80.107654,2019,8760,8736
2,12.909916,80.107654,2020,8784,8760
3,12.909916,80.107654,2021,8760,8736
4,12.909916,80.107654,2022,8760,8736
...,...,...,...,...,...
387,31.620000,74.876512,2018,8760,8736
388,31.620000,74.876512,2019,8760,8736
389,31.620000,74.876512,2020,8784,8760
390,31.620000,74.876512,2021,8760,8736


In [None]:
for lat in weather_all['lon'].unique():
    if lat not in pivoted_df['lon'].unique():
        print(f"Missing lat: {lat}")

In [None]:
import pandas as pd

# Normalize datetime (remove timezone info to match)
pivoted_df['datetime'] = pivoted_df['datetime'].dt.tz_localize(None)
weather_all['datetime'] = weather_all['datetime'].dt.tz_localize(None)

pivoted_df['datetime_hour'] = pivoted_df['datetime'].dt.floor('h')
weather_all['datetime_hour'] = weather_all['datetime'].dt.floor('h')

pivoted_df['lat_round'] = pivoted_df['lat'].round(3)
pivoted_df['lon_round'] = pivoted_df['lon'].round(3)
weather_all['lat_round'] = weather_all['lat'].round(3)
weather_all['lon_round'] = weather_all['lon'].round(3)

# Remove index from both DataFrames
pivoted_df = pivoted_df.reset_index(drop=True)
weather_all = weather_all.reset_index(drop=True)

# Flatten the MultiIndex columns of pivoted_df
pivoted_df.columns = ['_'.join(str(s) for s in col if s) for col in pivoted_df.columns]


# Perform the left join
merged_df = pivoted_df.merge(
    weather_all[['datetime_hour', 'lat_round', 'lon_round', 'temperature', 'wind', 'humidity']],
    on=['datetime_hour', 'lat_round', 'lon_round'],
    how='left'
)

# Display the head of the merged DataFrame
display(merged_df.head())
display(merged_df.info())

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,value_o3,value_pm10,value_pm25,value_so2,datetime_hour,lat_round,lon_round,temperature,wind,humidity
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00,26.519,80.233,2013,12,14,16:00:00,...,0.0,0.0,106.5,0.0,2013-12-14 16:00:00,26.519,80.233,22.2,3.8,40
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00,26.519,80.233,2013,12,14,17:00:00,...,0.0,0.0,127.6,0.0,2013-12-14 17:00:00,26.519,80.233,20.6,7.2,46
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00,26.519,80.233,2013,12,14,18:00:00,...,0.0,0.0,124.0,0.0,2013-12-14 18:00:00,26.519,80.233,18.8,9.0,52
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00,26.519,80.233,2013,12,14,19:00:00,...,0.0,0.0,84.9,0.0,2013-12-14 19:00:00,26.519,80.233,17.2,9.4,59
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00,26.519,80.233,2013,12,14,20:00:00,...,0.0,0.0,36.8,0.0,2013-12-14 20:00:00,26.519,80.233,15.8,8.2,65


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14841314 entries, 0 to 14841313
Data columns (total 28 columns):
 #   Column         Dtype         
---  ------         -----         
 0   location_id    int64         
 1   sensors_id     int64         
 2   location       object        
 3   datetime       datetime64[ns]
 4   lat            float64       
 5   lon            float64       
 6   year           int32         
 7   month          int32         
 8   day            int32         
 9   time           object        
 10  units_co       object        
 11  units_no2      object        
 12  units_o3       object        
 13  units_pm10     object        
 14  units_pm25     object        
 15  units_so2      object        
 16  value_co       float64       
 17  value_no2      float64       
 18  value_o3       float64       
 19  value_pm10     float64       
 20  value_pm25     float64       
 21  value_so2      float64       
 22  datetime_hour  datetime64[ns]
 23  lat_r

None

In [None]:
display(merged_df.isnull().sum())

Unnamed: 0,0
location_id,0
sensors_id,0
location,0
datetime,0
lat,0
lon,0
year,0
month,0
day,0
time,0


In [None]:
output_path = "/content/drive/MyDrive/air_quality_with_weather.parquet"

try:
    merged_df.to_parquet(output_path, index=False)
    print(f"Successfully saved merged data to {output_path}")
except Exception as e:
    print(f"Error saving merged data to {output_path}: {e}")

Successfully saved merged data to /content/drive/MyDrive/air_quality_with_weather.parquet


In [None]:
import pandas as pd
combined = pd.read_parquet("/content/drive/MyDrive/air_quality_with_weather.parquet")
combined.head()
combined.shape

(14841314, 28)

In [None]:
# Conversion factor for O3 from ppm to µg/m³ at 25°C and 1 atm
# 1 ppm O3 = 1.96 µg/m³ O3
conversion_factor_o3 = 1.96

# Identify rows where units_o3 is 'ppm'
ppm_o3_mask = combined['units_o3'] == 'ppm'

# Convert values from ppm to µg/m³ for these rows
combined.loc[ppm_o3_mask, 'value_o3'] = combined.loc[ppm_o3_mask, 'value_o3'] * conversion_factor_o3

# Update the units to 'µg/m³' for these rows
combined.loc[ppm_o3_mask, 'units_o3'] = 'µg/m³'

# Verify the changes
print("Unique values for 'units_o3' after conversion:")
display(combined['units_o3'].unique())

print("\nHead of DataFrame after conversion:")
display(combined[combined['units_o3'] == 'µg/m³'].head())

Unique values for 'units_o3' after conversion:


array(['', 'µg/m³'], dtype=object)


Head of DataFrame after conversion:


Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,value_o3,value_pm10,value_pm25,value_so2,datetime_hour,lat_round,lon_round,temperature,wind,humidity
1684,13,24,"Delhi Technological University, Delhi - CPCB-13",2015-04-09 11:30:00,28.744,77.12,2015,4,9,11:30:00,...,2.0,0.0,0.0,0.0,2015-04-09 11:00:00,28.744,77.12,28.0,12.7,42
47091,15,29,IGI Airport-15,2015-07-10 12:00:00,28.56,77.094,2015,7,10,12:00:00,...,61.66,0.0,0.0,0.0,2015-07-10 12:00:00,28.56,77.094,27.1,6.0,92
126199,17,394,"R K Puram, Delhi - DPCC-17",2015-06-29 20:00:00,28.5648,77.1744,2015,6,29,20:00:00,...,24.2,0.0,0.0,0.0,2015-06-29 20:00:00,28.565,77.174,30.2,10.4,63
126200,17,394,"R K Puram, Delhi - DPCC-17",2015-06-30 12:40:00,28.5648,77.1744,2015,6,30,12:40:00,...,147.2,0.0,0.0,0.0,2015-06-30 12:00:00,28.565,77.174,32.3,8.3,57
126201,17,394,"R K Puram, Delhi - DPCC-17",2015-06-30 18:40:00,28.5648,77.1744,2015,6,30,18:40:00,...,25.5,0.0,0.0,0.0,2015-06-30 18:00:00,28.565,77.174,29.5,13.1,66


In [None]:
# List of unit columns to drop
units_to_drop = [
    'units_co',
    'units_no2',
    'units_o3',
    'units_pm10',
    'units_pm25',
    'units_so2'
]

# Drop the individual unit columns
combined = combined.drop(columns=units_to_drop)

# Create a single 'units' column with 'µg/m³'
combined['units'] = 'µg/m³'

# Display the head and info to verify the changes
display(combined.head())
display(combined.info())

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,value_pm10,value_pm25,value_so2,datetime_hour,lat_round,lon_round,temperature,wind,humidity,units
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00,26.519,80.233,2013,12,14,16:00:00,...,0.0,106.5,0.0,2013-12-14 16:00:00,26.519,80.233,22.2,3.8,40,µg/m³
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00,26.519,80.233,2013,12,14,17:00:00,...,0.0,127.6,0.0,2013-12-14 17:00:00,26.519,80.233,20.6,7.2,46,µg/m³
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00,26.519,80.233,2013,12,14,18:00:00,...,0.0,124.0,0.0,2013-12-14 18:00:00,26.519,80.233,18.8,9.0,52,µg/m³
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00,26.519,80.233,2013,12,14,19:00:00,...,0.0,84.9,0.0,2013-12-14 19:00:00,26.519,80.233,17.2,9.4,59,µg/m³
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00,26.519,80.233,2013,12,14,20:00:00,...,0.0,36.8,0.0,2013-12-14 20:00:00,26.519,80.233,15.8,8.2,65,µg/m³


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14841314 entries, 0 to 14841313
Data columns (total 23 columns):
 #   Column         Dtype         
---  ------         -----         
 0   location_id    int64         
 1   sensors_id     int64         
 2   location       object        
 3   datetime       datetime64[ns]
 4   lat            float64       
 5   lon            float64       
 6   year           int32         
 7   month          int32         
 8   day            int32         
 9   time           object        
 10  value_co       float64       
 11  value_no2      float64       
 12  value_o3       float64       
 13  value_pm10     float64       
 14  value_pm25     float64       
 15  value_so2      float64       
 16  datetime_hour  datetime64[ns]
 17  lat_round      float64       
 18  lon_round      float64       
 19  temperature    float64       
 20  wind           float64       
 21  humidity       int64         
 22  units          object        
dtypes: da

None

In [None]:
import pandas as pd
import numpy as np

# --- 1. Breakpoints Dictionary (Remains the same) ---
breakpoints = {
    "pm25": [(0, 30, 0, 50), (31, 60, 51, 100), (61, 90, 101, 200), (91, 120, 201, 300), (121, 250, 301, 400), (251, 9999, 401, 500)],
    "pm10": [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300), (351, 430, 301, 400), (431, 9999, 401, 500)],
    "no2":  [(0, 40, 0, 50), (41, 80, 51, 100), (81, 180, 101, 200), (181, 280, 201, 300), (281, 400, 301, 400), (401, 9999, 401, 500)],
    "so2":  [(0, 40, 0, 50), (41, 80, 51, 100), (81, 380, 101, 200), (381, 800, 201, 300), (801, 1600, 301, 400), (1601, 9999, 401, 500)],
    "co":   [(0, 1, 0, 50), (1.1, 2, 51, 100), (2.1, 10, 101, 200), (10.1, 17, 201, 300), (17.1, 34, 301, 400), (34.1, 9999, 401, 500)],
    "o3":   [(0, 50, 0, 50), (51, 100, 51, 100), (101, 168, 101, 200), (169, 208, 201, 300), (209, 748, 301, 400), (749, 9999, 401, 500)]
}

# --- 2. The Core AQI Calculation Function (Remains the same) ---

def compute_aqi(cp, pollutant):
    """
    Computes the Air Quality Index (AQI) for a given pollutant concentration (Cp).
    """
    if cp is None or pd.isna(cp):
        return None

    # Ensure Cp is treated as a float for calculations
    cp = float(cp)

    # Check for division by zero risk (Bphi - Bplo == 0)
    for bp_lo, bp_hi, i_lo, i_hi in breakpoints[pollutant]:
        if bp_hi == bp_lo:
             continue # Skip invalid breakpoint ranges

        if bp_lo <= cp <= bp_hi:
            # The AQI formula: I = [(Ihi - Ilo) / (Bphi - Bplo)] * (Cp - Bplo) + Ilo
            return ((i_hi - i_lo) / (bp_hi - bp_lo)) * (cp - bp_lo) + i_lo

    return None


# --- 3. Pandas Implementation (Replaces Spark Transformations) ---


# List of pollutants and their corresponding DataFrame columns
pollutants = {
    "pm25": "value_pm25",
    "pm10": "value_pm10",
    "no2":  "value_no2",
    "so2":  "value_so2",
    "co":   "value_co",
    "o3":   "value_o3"
}

# 3a. Calculate individual AQI columns using .apply()
aqi_columns = []
for pol, col_name in pollutants.items():
    aqi_col_name = f"aqi_{pol}"

    # Use .apply() to execute the function row-wise on the concentration column.
    # The 'lambda' handles passing the pollutant name for the breakpoints lookup.
    df[aqi_col_name] = df[col_name].apply(lambda cp: compute_aqi(cp, pol))

    aqi_columns.append(aqi_col_name)

# 3b. Final AQI calculation (Replaces Spark's 'greatest')
# Use the .max(axis=1) method on the subset of AQI columns.
# skipna=True ensures that if one AQI value is None/NaN, the max of the others is taken.
df["AQI"] = df[aqi_columns].max(axis=1, skipna=True)

# print(df) # Uncomment to see the resulting DataFrame

In [2]:
import pandas as pd

df = pd.read_parquet('/content/drive/MyDrive/air_quality_with_weather1.parquet')
print(df.head())

   location_id  sensors_id                 location            datetime  \
0           12          23  SPARTAN - IIT Kanpur-12 2013-12-14 16:00:00   
1           12          23  SPARTAN - IIT Kanpur-12 2013-12-14 17:00:00   
2           12          23  SPARTAN - IIT Kanpur-12 2013-12-14 18:00:00   
3           12          23  SPARTAN - IIT Kanpur-12 2013-12-14 19:00:00   
4           12          23  SPARTAN - IIT Kanpur-12 2013-12-14 20:00:00   

      lat     lon  year  month  day      time  ...  wind  humidity  units  \
0  26.519  80.233  2013     12   14  16:00:00  ...   3.8        40  µg/m³   
1  26.519  80.233  2013     12   14  17:00:00  ...   7.2        46  µg/m³   
2  26.519  80.233  2013     12   14  18:00:00  ...   9.0        52  µg/m³   
3  26.519  80.233  2013     12   14  19:00:00  ...   9.4        59  µg/m³   
4  26.519  80.233  2013     12   14  20:00:00  ...   8.2        65  µg/m³   

     aqi_pm25  aqi_pm10  aqi_no2 aqi_so2  aqi_co  aqi_o3         AQI  
0  253.913793  

In [3]:
# List the columns you want to remove
columns_to_drop = [
    'aqi_pm25',
    'aqi_pm10',
    'aqi_no2',
    'aqi_so2',
    'aqi_co',
    'aqi_o3'
]

df.drop(columns=columns_to_drop, axis=1, inplace=True)

print("Individual AQI columns successfully dropped from df.")

Individual AQI columns successfully dropped from df.


In [4]:
df.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,year,month,day,time,...,value_pm25,value_so2,datetime_hour,lat_round,lon_round,temperature,wind,humidity,units,AQI
0,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 16:00:00,26.519,80.233,2013,12,14,16:00:00,...,106.5,0.0,2013-12-14 16:00:00,26.519,80.233,22.2,3.8,40,µg/m³,253.913793
1,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 17:00:00,26.519,80.233,2013,12,14,17:00:00,...,127.6,0.0,2013-12-14 17:00:00,26.519,80.233,20.6,7.2,46,µg/m³,306.065116
2,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 18:00:00,26.519,80.233,2013,12,14,18:00:00,...,124.0,0.0,2013-12-14 18:00:00,26.519,80.233,18.8,9.0,52,µg/m³,303.302326
3,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 19:00:00,26.519,80.233,2013,12,14,19:00:00,...,84.9,0.0,2013-12-14 19:00:00,26.519,80.233,17.2,9.4,59,µg/m³,182.589655
4,12,23,SPARTAN - IIT Kanpur-12,2013-12-14 20:00:00,26.519,80.233,2013,12,14,20:00:00,...,36.8,0.0,2013-12-14 20:00:00,26.519,80.233,15.8,8.2,65,µg/m³,60.8


In [5]:
output_path = "/content/drive/MyDrive/air_quality_with_weather1.parquet"

try:
    df.to_parquet(output_path, index=False)
    print(f"Successfully saved merged data to {output_path}")
except Exception as e:
    print(f"Error saving merged data to {output_path}: {e}")

Successfully saved merged data to /content/drive/MyDrive/air_quality_with_weather1.parquet
