In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
import os

zip_file_path = '/content/drive/MyDrive/Air Quality Monitoring Data/ raw/Air Quality Data in India (2015 - 2020).zip'
extract_path = './extracted_data'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Files extracted to: {extract_path}")

Files extracted to: ./extracted_data


## Install pyspark

### Subtask:
Install the PySpark library to work with Spark in Colab.


**Reasoning**:
Install the PySpark library using pip.



In [None]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-4.0.1.tar.gz (434.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.2/434.2 MB[0m [31m960.6 kB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting py4j==0.10.9.9 (from pyspark)
  Downloading py4j-0.10.9.9-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading py4j-0.10.9.9-py2.py3-none-any.whl (203 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.0/203.0 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-4.0.1-py2.py3-none-any.whl size=434813860 sha256=0543f88d105ce96850ced283459a19b4a1ec987f178fff66a3219790d77467b6
  Stored in directory: /root/.cache/pip/wheels/31/9f/68/f89fb34ccd88690

## Create sparksession

### Subtask:
Create a SparkSession, which is the entry point to any Spark functionality.


**Reasoning**:
Create a SparkSession to enable Spark functionality.



In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("CSV Loader").getOrCreate()
print(spark)

<pyspark.sql.session.SparkSession object at 0x7aad02be3d40>


## Load data

### Subtask:
Read the `/content/extracted_data/stations.csv` file into a Spark DataFrame.


**Reasoning**:
Read the `/content/extracted_data/stations.csv` file into a Spark DataFrame using the specified options.



In [None]:
stations_df = spark.read.csv("/content/extracted_data/stations.csv", header=True, inferSchema=True)
stations_df.show()

+---------+--------------------+-----------------+--------------+------+
|StationId|         StationName|             City|         State|Status|
+---------+--------------------+-----------------+--------------+------+
|    AP001|Secretariat, Amar...|        Amaravati|Andhra Pradesh|Active|
|    AP002|Anand Kala Kshetr...|Rajamahendravaram|Andhra Pradesh|  NULL|
|    AP003|Tirumala, Tirupat...|         Tirupati|Andhra Pradesh|  NULL|
|    AP004|PWD Grounds, Vija...|       Vijayawada|Andhra Pradesh|  NULL|
|    AP005|GVM Corporation, ...|    Visakhapatnam|Andhra Pradesh|Active|
|    AS001|Railway Colony, G...|         Guwahati|         Assam|Active|
|    BR001|Collectorate, Gay...|             Gaya|         Bihar|  NULL|
|    BR002|SFTI Kusdihra, Ga...|             Gaya|         Bihar|  NULL|
|    BR003|Industrial Area, ...|          Hajipur|         Bihar|  NULL|
|    BR004|Muzaffarpur Colle...|      Muzaffarpur|         Bihar|  NULL|
|    BR005|DRM Office Danapu...|            Patna| 

## Display data

### Subtask:
Show the first few rows and the schema of the loaded Spark DataFrame.


**Reasoning**:
Display the first few rows and the schema of the Spark DataFrame as requested.



In [None]:
stations_df.show(n=5)
stations_df.printSchema()

+---------+--------------------+-----------------+--------------+------+
|StationId|         StationName|             City|         State|Status|
+---------+--------------------+-----------------+--------------+------+
|    AP001|Secretariat, Amar...|        Amaravati|Andhra Pradesh|Active|
|    AP002|Anand Kala Kshetr...|Rajamahendravaram|Andhra Pradesh|  NULL|
|    AP003|Tirumala, Tirupat...|         Tirupati|Andhra Pradesh|  NULL|
|    AP004|PWD Grounds, Vija...|       Vijayawada|Andhra Pradesh|  NULL|
|    AP005|GVM Corporation, ...|    Visakhapatnam|Andhra Pradesh|Active|
+---------+--------------------+-----------------+--------------+------+
only showing top 5 rows

root
 |-- StationId: string (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Status: string (nullable = true)



## Summary:

### Data Analysis Key Findings

*   The `stations.csv` file was successfully loaded into a Spark DataFrame named `stations_df`.
*   The DataFrame `stations_df` contains columns: `StationId`, `StationName`, `City`, `State`, and `Status`.
*   All columns in the loaded DataFrame were inferred as `string` type and are nullable.

### Insights or Next Steps

*   The data types for numerical columns like `StationId` might need to be cast to appropriate numeric types if numerical operations are required.
*   Further data cleaning or transformation might be needed depending on the subsequent analysis tasks.


# Task
Clean the `StationName` column in the `stations_df` Spark DataFrame by removing everything after the hyphen '-', then fetch the longitude and latitude for each station using a combination of `StationName`, `City`, and `State`, falling back to `City` and `State` if the full combination is not found.

## Clean 'stationname'

### Subtask:
Remove the substring starting from '-' in the 'StationName' column.


**Reasoning**:
Remove the substring starting from '-' in the 'StationName' column using regexp_replace.



In [None]:
from pyspark.sql.functions import regexp_replace

stations_df = stations_df.withColumn("StationName", regexp_replace("StationName", "-.*", ""))
stations_df.show(n=5)

+---------+--------------------+-----------------+--------------+------+
|StationId|         StationName|             City|         State|Status|
+---------+--------------------+-----------------+--------------+------+
|    AP001|Secretariat, Amar...|        Amaravati|Andhra Pradesh|Active|
|    AP002|Anand Kala Kshetr...|Rajamahendravaram|Andhra Pradesh|  NULL|
|    AP003| Tirumala, Tirupati |         Tirupati|Andhra Pradesh|  NULL|
|    AP004|PWD Grounds, Vija...|       Vijayawada|Andhra Pradesh|  NULL|
|    AP005|GVM Corporation, ...|    Visakhapatnam|Andhra Pradesh|Active|
+---------+--------------------+-----------------+--------------+------+
only showing top 5 rows



## Geocoding strategy

### Subtask:
Outline the strategy for fetching longitude and latitude based on the available location information.


## Implement geocoding

### Subtask:
Implement the geocoding process using a suitable library or service.


**Reasoning**:
Install the geopy library for geocoding.



In [None]:
!pip install geopy



In [None]:
from pyspark.sql.functions import udf, concat_ws, when
from pyspark.sql.types import DoubleType, StructType, StructField
import time
from geopy.geocoders import Nominatim # Import Nominatim here as well

def geocode_location_safe(location):
    """Geocodes a location string and returns longitude and latitude, initializing geolocator within."""
    geolocator_local = Nominatim(user_agent="air_quality_app") # Initialize geolocator here
    try:
        location_data = geolocator_local.geocode(location)
        if location_data:
            # Add a small delay to avoid hitting API rate limits
            time.sleep(2) # Increased delay
            return (location_data.longitude, location_data.latitude)
        return (None, None)
    except Exception as e:
        print(f"Error geocoding {location}: {e}")
        return (None, None)

# Define a UDF for geocoding that returns a StructType
geocode_udf_safe = udf(geocode_location_safe, StructType([StructField("longitude", DoubleType()), StructField("latitude", DoubleType())]))

# Apply the UDF to geocode using the full location
stations_df = stations_df.withColumn(
    "coordinates_full",
    geocode_udf_safe(concat_ws(", ", stations_df["StationName"], stations_df["City"], stations_df["State"]))
)

# Extract longitude and latitude from the full location geocoding result
stations_df = stations_df.withColumn("longitude_full", stations_df["coordinates_full"].getItem("longitude"))
stations_df = stations_df.withColumn("latitude_full", stations_df["coordinates_full"].getItem("latitude"))

# Apply the UDF to geocode using only City and State for failed full location geocodes
stations_df = stations_df.withColumn(
    "coordinates_city_state",
    geocode_udf_safe(concat_ws(", ", stations_df["City"], stations_df["State"]))
)

# Extract longitude and latitude from the City and State geocoding result
stations_df = stations_df.withColumn("longitude_city_state", stations_df["coordinates_city_state"].getItem("longitude"))
stations_df = stations_df.withColumn("latitude_city_state", stations_df["coordinates_city_state"].getItem("latitude"))

# Combine the results, prioritizing the full location result
stations_df = stations_df.withColumn(
    "longitude",
    when(stations_df["longitude_full"].isNotNull(), stations_df["longitude_full"]).otherwise(stations_df["longitude_city_state"])
)

stations_df = stations_df.withColumn(
    "latitude",
    when(stations_df["latitude_full"].isNotNull(), stations_df["latitude_full"]).otherwise(stations_df["latitude_city_state"])
)

# Drop the intermediate columns
stations_df = stations_df.drop("coordinates_full", "longitude_full", "latitude_full", "coordinates_city_state", "longitude_city_state", "latitude_city_state")

stations_df.show(n=5)
stations_df.printSchema()

+---------+--------------------+-----------------+--------------+------+----------+----------+
|StationId|         StationName|             City|         State|Status| longitude|  latitude|
+---------+--------------------+-----------------+--------------+------+----------+----------+
|    AP001|Secretariat, Amar...|        Amaravati|Andhra Pradesh|Active| 80.486211|16.5368542|
|    AP002|Anand Kala Kshetr...|Rajamahendravaram|Andhra Pradesh|  NULL|81.7804732|17.0050454|
|    AP003| Tirumala, Tirupati |         Tirupati|Andhra Pradesh|  NULL|79.3497522|13.6795235|
|    AP004|PWD Grounds, Vija...|       Vijayawada|Andhra Pradesh|  NULL|80.6160469|16.5115306|
|    AP005|GVM Corporation, ...|    Visakhapatnam|Andhra Pradesh|Active|83.2921297|17.6935526|
+---------+--------------------+-----------------+--------------+------+----------+----------+
only showing top 5 rows

root
 |-- StationId: string (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = t

In [None]:
hours_df = spark.read.csv("/content/extracted_data/station_hour.csv", header=True, inferSchema=True)
hours_df.show()

+---------+-------------------+-----+------+----+-----+-----+-----+----+-----+------+-------+-------+------+-----+----------+
|StationId|           Datetime|PM2.5|  PM10|  NO|  NO2|  NOx|  NH3|  CO|  SO2|    O3|Benzene|Toluene|Xylene|  AQI|AQI_Bucket|
+---------+-------------------+-----+------+----+-----+-----+-----+----+-----+------+-------+-------+------+-----+----------+
|    AP001|2017-11-24 17:00:00| 60.5|  98.0|2.35| 30.8|18.25|  8.5| 0.1|11.85| 126.4|    0.1|    6.1|   0.1| NULL|      NULL|
|    AP001|2017-11-24 18:00:00| 65.5|111.25| 2.7| 24.2|15.07| 9.77| 0.1|13.17|117.12|    0.1|   6.25|  0.15| NULL|      NULL|
|    AP001|2017-11-24 19:00:00| 80.0| 132.0| 2.1|25.18|15.15|12.02| 0.1|12.08| 98.98|    0.2|   5.98|  0.18| NULL|      NULL|
|    AP001|2017-11-24 20:00:00| 81.5|133.25|1.95|16.25|10.23|11.58| 0.1|10.47| 112.2|    0.2|   6.72|   0.1| NULL|      NULL|
|    AP001|2017-11-24 21:00:00|75.25| 116.0|1.43|17.48|10.43|12.03| 0.1| 9.12|106.35|    0.2|   5.75|  0.08| NULL|    

In [None]:
hours_df.printSchema()

root
 |-- StationId: string (nullable = true)
 |-- Datetime: timestamp (nullable = true)
 |-- PM2.5: double (nullable = true)
 |-- PM10: double (nullable = true)
 |-- NO: double (nullable = true)
 |-- NO2: double (nullable = true)
 |-- NOx: double (nullable = true)
 |-- NH3: double (nullable = true)
 |-- CO: double (nullable = true)
 |-- SO2: double (nullable = true)
 |-- O3: double (nullable = true)
 |-- Benzene: double (nullable = true)
 |-- Toluene: double (nullable = true)
 |-- Xylene: double (nullable = true)
 |-- AQI: double (nullable = true)
 |-- AQI_Bucket: string (nullable = true)



In [None]:
from pyspark.sql.functions import col

air_quality_columns = ["`PM2.5`", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene", "AQI", "AQI_Bucket"]

# Create a condition to check if all air quality columns are null
all_null_condition = None
for column in air_quality_columns:
    if all_null_condition is None:
        all_null_condition = col(column).isNull()
    else:
        all_null_condition = all_null_condition & col(column).isNull()

# Filter out rows where all air quality columns are null
hours_df_cleaned = hours_df.filter(~all_null_condition)

# Show the count of rows before and after filtering
print(f"Number of rows before filtering: {hours_df.count()}")
print(f"Number of rows after filtering: {hours_df_cleaned.count()}")

# Display the first few rows of the cleaned DataFrame
hours_df_cleaned.show(n=5)

Number of rows before filtering: 2589083
Number of rows after filtering: 2352243
+---------+-------------------+-----+------+----+-----+-----+-----+---+-----+------+-------+-------+------+----+----------+
|StationId|           Datetime|PM2.5|  PM10|  NO|  NO2|  NOx|  NH3| CO|  SO2|    O3|Benzene|Toluene|Xylene| AQI|AQI_Bucket|
+---------+-------------------+-----+------+----+-----+-----+-----+---+-----+------+-------+-------+------+----+----------+
|    AP001|2017-11-24 17:00:00| 60.5|  98.0|2.35| 30.8|18.25|  8.5|0.1|11.85| 126.4|    0.1|    6.1|   0.1|NULL|      NULL|
|    AP001|2017-11-24 18:00:00| 65.5|111.25| 2.7| 24.2|15.07| 9.77|0.1|13.17|117.12|    0.1|   6.25|  0.15|NULL|      NULL|
|    AP001|2017-11-24 19:00:00| 80.0| 132.0| 2.1|25.18|15.15|12.02|0.1|12.08| 98.98|    0.2|   5.98|  0.18|NULL|      NULL|
|    AP001|2017-11-24 20:00:00| 81.5|133.25|1.95|16.25|10.23|11.58|0.1|10.47| 112.2|    0.2|   6.72|   0.1|NULL|      NULL|
|    AP001|2017-11-24 21:00:00|75.25| 116.0|1.43|17

In [None]:
from pyspark.sql.functions import year, month, dayofmonth, hour, concat, lit, lpad

hours_df_cleaned = hours_df_cleaned.withColumn("year", year("Datetime"))
hours_df_cleaned = hours_df_cleaned.withColumn("month", month("Datetime"))
hours_df_cleaned = hours_df_cleaned.withColumn("day", dayofmonth("Datetime"))
hours_df_cleaned = hours_df_cleaned.withColumn("hour_int", hour("Datetime")) # Keep original hour as integer

# Format the hour as "HH:00:00" string
hours_df_cleaned = hours_df_cleaned.withColumn(
    "hour_formatted",
    concat(lpad(hours_df_cleaned["hour_int"].cast("string"), 2, "0"), lit(":00:00"))
)

# Drop the intermediate hour columns
hours_df_cleaned = hours_df_cleaned.drop("hour", "hour_int")


hours_df_cleaned.show(n=5)
hours_df_cleaned.printSchema()

+---------+-------------------+-----+------+----+-----+-----+-----+---+-----+------+-------+-------+------+----+----------+----+-----+---+--------------+
|StationId|           Datetime|PM2.5|  PM10|  NO|  NO2|  NOx|  NH3| CO|  SO2|    O3|Benzene|Toluene|Xylene| AQI|AQI_Bucket|year|month|day|hour_formatted|
+---------+-------------------+-----+------+----+-----+-----+-----+---+-----+------+-------+-------+------+----+----------+----+-----+---+--------------+
|    AP001|2017-11-24 17:00:00| 60.5|  98.0|2.35| 30.8|18.25|  8.5|0.1|11.85| 126.4|    0.1|    6.1|   0.1|NULL|      NULL|2017|   11| 24|      17:00:00|
|    AP001|2017-11-24 18:00:00| 65.5|111.25| 2.7| 24.2|15.07| 9.77|0.1|13.17|117.12|    0.1|   6.25|  0.15|NULL|      NULL|2017|   11| 24|      18:00:00|
|    AP001|2017-11-24 19:00:00| 80.0| 132.0| 2.1|25.18|15.15|12.02|0.1|12.08| 98.98|    0.2|   5.98|  0.18|NULL|      NULL|2017|   11| 24|      19:00:00|
|    AP001|2017-11-24 20:00:00| 81.5|133.25|1.95|16.25|10.23|11.58|0.1|10.47

In [None]:
stations_df = spark.read.csv("/content/drive/MyDrive/stations_df2.csv", header=True, inferSchema=True)
stations_df.show()

+------+---------+--------------------+-----------------+--------------+------+----------+----------+
|    id|StationId|         StationName|             City|         State|Status|  latitude| longitude|
+------+---------+--------------------+-----------------+--------------+------+----------+----------+
|100001|    AP001|Secretariat, Amar...|        Amaravati|Andhra Pradesh|Active|16.5373864|80.4859507|
|100002|    AP002|Anand Kala Kshetr...|Rajamahendravaram|Andhra Pradesh|  NULL|   17.0088|  81.77137|
|100003|    AP003| Tirumala, Tirupati |         Tirupati|Andhra Pradesh|  NULL|13.6795235|79.3497522|
|100004|    AP004|PWD Grounds, Vija...|       Vijayawada|Andhra Pradesh|  NULL|  16.50647|  80.63246|
|100005|    AP005|GVM Corporation, ...|    Visakhapatnam|Andhra Pradesh|Active|  17.72433|  83.30349|
|100006|    AS001|Railway Colony, G...|         Guwahati|         Assam|Active|26.1808827|91.7824864|
|100007|    BR001| Collectorate, Gaya |             Gaya|         Bihar|  NULL|24.

In [None]:
# Indicate the start of the join operation
print("Starting join operation...")

# Join the two DataFrames on StationId
joined_df = hours_df_cleaned.join(stations_df, on="StationId", how="left")

# Indicate that the join is complete and results are being displayed
print("Join complete. Displaying the first few rows:")

# Show the schema and a few rows of the joined DataFrame
joined_df.printSchema()
joined_df.show(n=5)

Starting join operation...
Join complete. Displaying the first few rows:
root
 |-- StationId: string (nullable = true)
 |-- Datetime: timestamp (nullable = true)
 |-- PM2.5: double (nullable = true)
 |-- PM10: double (nullable = true)
 |-- NO: double (nullable = true)
 |-- NO2: double (nullable = true)
 |-- NOx: double (nullable = true)
 |-- NH3: double (nullable = true)
 |-- CO: double (nullable = true)
 |-- SO2: double (nullable = true)
 |-- O3: double (nullable = true)
 |-- Benzene: double (nullable = true)
 |-- Toluene: double (nullable = true)
 |-- Xylene: double (nullable = true)
 |-- AQI: double (nullable = true)
 |-- AQI_Bucket: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- hour_formatted: string (nullable = true)
 |-- id: integer (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Status: string (n

In [None]:
# Save hours_df_cleaned as Parquet
joined_df.write.parquet("/content/drive/MyDrive/historic_2015_2020.parquet", mode="overwrite")

print("DataFrames saved successfully.")

DataFrames saved successfully.


In [None]:
joined_df = spark.read.parquet('/content/drive/MyDrive/historic_2015_2020.parquet',header=True,infraschema=True)
joined_df.show(5)

+---------+-------------------+-----+-----+----+----+----+----+----+-----+-----+-------+-------+------+-----+----------+----+-----+---+--------------+------+--------------------+--------+-------+------+----------+----------+
|StationId|           Datetime|PM2.5| PM10|  NO| NO2| NOx| NH3|  CO|  SO2|   O3|Benzene|Toluene|Xylene|  AQI|AQI_Bucket|year|month|day|hour_formatted|    id|         StationName|    City|  State|Status|  latitude| longitude|
+---------+-------------------+-----+-----+----+----+----+----+----+-----+-----+-------+-------+------+-----+----------+----+-----+---+--------------+------+--------------------+--------+-------+------+----------+----------+
|    HR013|2020-04-24 20:00:00| 31.7|55.95|3.73| 3.0|2.99|NULL|0.49|12.08|94.94|  10.22|   2.41|  1.26|129.0|  Moderate|2020|    4| 24|      20:00:00|100074|Teri Gram, Gurugram |Gurugram|Haryana|Active|77.1488113|28.4271632|
|    HR013|2020-04-24 21:00:00| 39.8|52.57| 3.7|3.03|2.98|NULL|0.54|12.62|78.75|  11.04|   2.28|  1.

In [None]:
# Get unique location-year pairs using Spark DataFrame operations
unique_loc_year_spark = joined_df.select('latitude', 'longitude', 'year').distinct()

# Show the count of unique location-year pairs
print("Total unique (lat,lon,year) pairs:", unique_loc_year_spark.count())

# Display the first few rows of the unique location-year pairs
unique_loc_year_spark.show(n=5)

Total unique (lat,lon,year) pairs: 389
+----------+----------+----+
|  latitude| longitude|year|
+----------+----------+----+
|  28.42572|  77.15077|2020|
|  25.59354|  85.22718|2020|
|28.6149362|77.0227628|2019|
|   28.6828|  77.30493|2018|
|28.6691641| 77.312267|2020|
+----------+----------+----+
only showing top 5 rows



In [None]:
import requests
import os
import pandas as pd
import time
import json # Import the json library for specific error handling
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, DoubleType, TimestampType, StringType
from pyspark.sql.functions import lit

# Initialize SparkSession if not already initialized (though it should be from previous steps)
# spark = SparkSession.builder.appName("Weather Fetcher").getOrCreate() # Removed redundant initialization


def fetch_weather_and_save(lat, lon, year, save_dir="/content/drive/MyDrive/new_weather_data", max_retries=5):
    """Fetch weather for a given lat, lon, year and save as parquet with retries."""
    lat = float(lat)
    lon = float(lon)
    year = int(year)

    os.makedirs(save_dir, exist_ok=True)
    outfile = f"{save_dir}/weather_{lat}_{lon}_{year}.parquet"

    if os.path.exists(outfile):
        print(f"Skipping {lat},{lon},{year} (already exists)")
        return outfile

    start = f"{year}-01-01"
    end   = f"{year}-12-31"

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start,
        "end_date": end,
        "hourly": ["temperature_2m", "windspeed_10m", "relative_humidity_2m"],
        "timezone": "Asia/Kolkata"
    }

    retries = 0
    while retries < max_retries:
        try:
            r = requests.get("https://archive-api.open-meteo.com/v1/archive", params=params, timeout=60)
            r.raise_for_status()

            # Check if the response has content before trying to parse as JSON
            if not r.text:
                print(f"Empty response for {lat},{lon},{year}")
                return None

            data = r.json()

            if "hourly" not in data or not data["hourly"]["time"]:
                print(f"No hourly data for {lat},{lon},{lon},{year}")
                return None

            # Create pandas DataFrame first, then convert to Spark DataFrame
            weather_pandas_df = pd.DataFrame({
                "datetime": pd.to_datetime(data["hourly"]["time"]),
                "temperature": data["hourly"]["temperature_2m"],
                "wind": data["hourly"]["windspeed_10m"],
                "humidity": data["hourly"]["relative_humidity_2m"]
            })
            weather_pandas_df["lat"] = lat
            weather_pandas_df["lon"] = lon

            # Explicitly cast columns to float to avoid type issues with Spark
            weather_pandas_df["temperature"] = weather_pandas_df["temperature"].astype(float)
            weather_pandas_df["wind"] = weather_pandas_df["wind"].astype(float)
            weather_pandas_df["humidity"] = weather_pandas_df["humidity"].astype(float)


            # Define Spark schema for the weather data
            weather_schema = StructType([
                StructField("datetime", TimestampType(), True),
                StructField("temperature", DoubleType(), True),
                StructField("wind", DoubleType(), True),
                StructField("humidity", DoubleType(), True),
                StructField("lat", DoubleType(), True),
                StructField("lon", DoubleType(), True)
            ])

            # Convert pandas DataFrame to Spark DataFrame
            weather_spark_df = spark.createDataFrame(weather_pandas_df, schema=weather_schema)

            # Save Spark DataFrame to parquet
            weather_spark_df.write.parquet(outfile, mode="overwrite")

            print(f"Saved {outfile}")
            return outfile

        except json.JSONDecodeError as e:
            retries += 1
            wait_time = 2 ** retries
            print(f"JSON decoding error for {lat},{lon},{year} ({e}). Response content: {r.text[:200]}... Retry {retries}/{max_retries} in {wait_time}s...")
            time.sleep(wait_time)
        except requests.exceptions.RequestException as e:
            retries += 1
            wait_time = 2 ** retries   # exponential backoff (2,4,8,16,32 sec)
            print(f"Request error fetching {lat},{lon},{year} ({e}), retry {retries}/{max_retries} in {wait_time}s...")
            time.sleep(wait_time)
        except Exception as e:
            print(f"An unexpected error occurred for {lat},{lon},{year}: {e}")
            return None


    print(f"Failed after {max_retries} retries for {lat},{lon},{year}")
    return None

In [None]:
from tqdm import tqdm

# Collect unique location-year pairs to iterate over (be mindful of memory for large datasets)
# If unique_loc_year_spark is very large, consider saving it and reading in chunks
unique_locations = unique_loc_year_spark.collect()

weather_files = []
for row in tqdm(unique_locations, desc="Fetching weather data"):
    lat = row['latitude']
    lon = row['longitude']
    year = row['year']
    parquet_file = fetch_weather_and_save(lat, lon, year)
    if parquet_file:
        weather_files.append(parquet_file)

# Now, load all saved parquet files into a single Spark DataFrame
if weather_files:
    # Read the first file to infer schema
    weather_df = spark.read.parquet(weather_files[0])

    # If there are more files, union them
    if len(weather_files) > 1:
        for file in weather_files[1:]:
            try:
                df_to_union = spark.read.parquet(file)
                weather_df = weather_df.union(df_to_union)
            except Exception as e:
                print(f"Error reading or unioning file {file}: {e}")

    print("Combined weather DataFrame schema:")
    weather_df.printSchema()
    print("Combined weather DataFrame sample:")
    weather_df.show(n=5)
else:
    print("No weather data files were successfully created.")

Fetching weather data:   0%|          | 1/389 [00:02<15:43,  2.43s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.42572_77.15077_2020.parquet


Fetching weather data:   1%|          | 2/389 [00:04<13:39,  2.12s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.59354_85.22718_2020.parquet


Fetching weather data:   1%|          | 3/389 [00:06<12:38,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6149362_77.0227628_2019.parquet


Fetching weather data:   1%|          | 4/389 [00:08<12:59,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2018.parquet


Fetching weather data:   1%|▏         | 5/389 [00:10<12:50,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6691641_77.312267_2020.parquet


Fetching weather data:   2%|▏         | 6/389 [00:11<12:20,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5685108_77.2513847_2020.parquet


Fetching weather data:   2%|▏         | 7/389 [00:13<12:12,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2016.parquet


Fetching weather data:   2%|▏         | 8/389 [00:15<11:54,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5000822_77.2675147_2020.parquet


Fetching weather data:   2%|▏         | 9/389 [00:17<11:42,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.60086_85.0851_2019.parquet


Fetching weather data:   3%|▎         | 10/389 [00:19<11:24,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7259717_77.162658_2019.parquet


Fetching weather data:   3%|▎         | 11/389 [00:21<12:28,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2015.parquet


Fetching weather data:   3%|▎         | 12/389 [00:23<12:38,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2020.parquet


Fetching weather data:   3%|▎         | 13/389 [00:25<12:10,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6126342_77.2373304_2020.parquet


Fetching weather data:   4%|▎         | 14/389 [00:27<11:45,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6819261_77.0308105_2018.parquet


Fetching weather data:   4%|▍         | 15/389 [00:28<11:30,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6149362_77.0227628_2020.parquet


Fetching weather data:   4%|▍         | 16/389 [00:30<11:10,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2015.parquet


Fetching weather data:   4%|▍         | 17/389 [00:32<11:21,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2020.parquet


Fetching weather data:   5%|▍         | 18/389 [00:34<12:00,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7199257_77.2481823_2018.parquet


Fetching weather data:   5%|▍         | 19/389 [00:38<15:17,  2.48s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5923949_77.2366541_2017.parquet


Fetching weather data:   5%|▌         | 20/389 [00:40<13:58,  2.27s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6569534_77.2947178_2020.parquet


Fetching weather data:   5%|▌         | 21/389 [00:41<12:48,  2.09s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2016.parquet


Fetching weather data:   6%|▌         | 22/389 [00:43<12:29,  2.04s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2016.parquet


Fetching weather data:   6%|▌         | 23/389 [00:45<12:12,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7162092_77.1170743_2018.parquet


Fetching weather data:   6%|▌         | 24/389 [00:47<11:46,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2020.parquet


Fetching weather data:   6%|▋         | 25/389 [00:49<12:07,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2017.parquet


Fetching weather data:   7%|▋         | 26/389 [00:51<12:01,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5833771_77.2335404_2018.parquet


Fetching weather data:   7%|▋         | 27/389 [00:53<11:34,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2018.parquet


Fetching weather data:   7%|▋         | 28/389 [00:55<11:16,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_16.5373864_80.4859507_2020.parquet


Fetching weather data:   7%|▋         | 29/389 [00:56<11:11,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5000822_77.2675147_2018.parquet


Fetching weather data:   8%|▊         | 30/389 [00:58<10:51,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2015.parquet


Fetching weather data:   8%|▊         | 31/389 [01:00<10:47,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_30.7419173_76.7747257_2020.parquet


Fetching weather data:   8%|▊         | 32/389 [01:02<11:28,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2015.parquet


Fetching weather data:   8%|▊         | 33/389 [01:04<11:41,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.72433_83.30349_2019.parquet


Fetching weather data:   9%|▊         | 34/389 [01:06<11:09,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2016.parquet


Fetching weather data:   9%|▉         | 35/389 [01:08<10:58,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5501575_77.2249099_2018.parquet


Fetching weather data:   9%|▉         | 36/389 [01:10<10:44,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2018.parquet


Fetching weather data:  10%|▉         | 37/389 [01:12<11:04,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.68824_77.20919_2020.parquet


Fetching weather data:  10%|▉         | 38/389 [01:13<10:54,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2020.parquet


Fetching weather data:  10%|█         | 39/389 [01:15<11:21,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2017.parquet


Fetching weather data:  10%|█         | 40/389 [01:18<11:49,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5396291_77.2000301_2020.parquet


Fetching weather data:  11%|█         | 41/389 [01:20<11:35,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.476491_77.1329132_2017.parquet


Fetching weather data:  11%|█         | 42/389 [01:21<11:08,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.68824_77.20919_2018.parquet


Fetching weather data:  11%|█         | 43/389 [01:23<10:53,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5413727_77.271623_2020.parquet


Fetching weather data:  11%|█▏        | 44/389 [01:25<10:41,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5543559_77.0974746_2019.parquet


Fetching weather data:  12%|█▏        | 45/389 [01:27<10:48,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2019.parquet


Fetching weather data:  12%|█▏        | 46/389 [01:29<11:23,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5923949_77.2366541_2018.parquet


Fetching weather data:  12%|█▏        | 47/389 [01:32<12:00,  2.11s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5833771_77.2335404_2019.parquet


Fetching weather data:  12%|█▏        | 48/389 [01:33<11:17,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2019.parquet


Fetching weather data:  13%|█▎        | 49/389 [01:35<10:51,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6569534_77.2947178_2016.parquet


Fetching weather data:  13%|█▎        | 50/389 [01:37<10:27,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2016.parquet


Fetching weather data:  13%|█▎        | 51/389 [01:38<10:17,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7973118_77.138667_2018.parquet


Fetching weather data:  13%|█▎        | 52/389 [01:40<10:27,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2017.parquet


Fetching weather data:  14%|█▎        | 53/389 [01:42<10:29,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7199257_77.2481823_2020.parquet


Fetching weather data:  14%|█▍        | 54/389 [01:45<11:05,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2018.parquet


Fetching weather data:  14%|█▍        | 55/389 [01:46<10:35,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.1808827_91.7824864_2019.parquet


Fetching weather data:  14%|█▍        | 56/389 [01:48<10:21,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2016.parquet


Fetching weather data:  15%|█▍        | 57/389 [01:50<10:11,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6124504_76.9865067_2020.parquet


Fetching weather data:  15%|█▍        | 58/389 [01:52<09:53,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.72433_83.30349_2018.parquet


Fetching weather data:  15%|█▌        | 59/389 [01:53<09:49,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2017.parquet


Fetching weather data:  15%|█▌        | 60/389 [01:55<09:49,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6362997_77.1576444_2018.parquet


Fetching weather data:  16%|█▌        | 61/389 [01:57<10:22,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5501575_77.2249099_2017.parquet


Fetching weather data:  16%|█▌        | 62/389 [01:59<10:49,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2020.parquet


Fetching weather data:  16%|█▌        | 63/389 [02:01<10:24,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6885171_77.1739339_2018.parquet


Fetching weather data:  16%|█▋        | 64/389 [02:03<09:58,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2016.parquet


Fetching weather data:  17%|█▋        | 65/389 [02:05<09:51,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2018.parquet


Fetching weather data:  17%|█▋        | 66/389 [02:06<09:40,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5413727_77.271623_2019.parquet


Fetching weather data:  17%|█▋        | 67/389 [02:08<09:41,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2019.parquet


Fetching weather data:  17%|█▋        | 68/389 [02:10<10:12,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2015.parquet


Fetching weather data:  18%|█▊        | 69/389 [02:13<10:34,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5833771_77.2335404_2020.parquet


Fetching weather data:  18%|█▊        | 70/389 [02:14<10:03,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2015.parquet


Fetching weather data:  18%|█▊        | 71/389 [02:16<09:51,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61853_85.14513_2020.parquet


Fetching weather data:  19%|█▊        | 72/389 [02:18<09:39,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5543559_77.0974746_2020.parquet


Fetching weather data:  19%|█▉        | 73/389 [02:20<09:34,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2019.parquet


Fetching weather data:  19%|█▉        | 74/389 [02:21<09:23,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2015.parquet


Fetching weather data:  19%|█▉        | 75/389 [02:23<09:11,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2019.parquet


Fetching weather data:  20%|█▉        | 76/389 [02:25<09:48,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6115923_77.2905644_2020.parquet


Fetching weather data:  20%|█▉        | 77/389 [02:27<10:01,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5923949_77.2366541_2019.parquet


Fetching weather data:  20%|██        | 78/389 [02:29<09:41,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6115923_77.2905644_2019.parquet


Fetching weather data:  20%|██        | 79/389 [02:31<09:22,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.1808827_91.7824864_2020.parquet


Fetching weather data:  21%|██        | 80/389 [02:32<09:19,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7259717_77.162658_2018.parquet


Fetching weather data:  21%|██        | 81/389 [02:34<09:14,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2020.parquet


Fetching weather data:  21%|██        | 82/389 [02:36<09:22,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.4271632_77.1488113_2020.parquet


Fetching weather data:  21%|██▏       | 83/389 [02:38<09:39,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.60086_85.0851_2020.parquet


Fetching weather data:  22%|██▏       | 84/389 [02:40<09:51,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.79966_77.0328847_2018.parquet


Fetching weather data:  22%|██▏       | 85/389 [02:42<09:29,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.42572_77.15077_2018.parquet


Fetching weather data:  22%|██▏       | 86/389 [02:44<09:31,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6126342_77.2373304_2018.parquet


Fetching weather data:  22%|██▏       | 87/389 [02:46<09:21,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2019.parquet


Fetching weather data:  23%|██▎       | 88/389 [02:47<09:12,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2019.parquet


Fetching weather data:  23%|██▎       | 89/389 [02:49<09:09,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2020.parquet


Fetching weather data:  23%|██▎       | 90/389 [02:51<09:48,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.476491_77.1329132_2018.parquet


Fetching weather data:  23%|██▎       | 91/389 [02:54<10:17,  2.07s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2017.parquet


Fetching weather data:  24%|██▎       | 92/389 [02:56<10:01,  2.02s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7259717_77.162658_2020.parquet


Fetching weather data:  24%|██▍       | 93/389 [02:57<09:35,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7973118_77.138667_2020.parquet


Fetching weather data:  24%|██▍       | 94/389 [02:59<09:22,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7285944_77.1993251_2018.parquet


Fetching weather data:  24%|██▍       | 95/389 [03:01<09:11,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2017.parquet


Fetching weather data:  25%|██▍       | 96/389 [03:03<09:03,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6569534_77.2947178_2019.parquet


Fetching weather data:  25%|██▍       | 97/389 [03:05<09:49,  2.02s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2017.parquet


Fetching weather data:  25%|██▌       | 98/389 [03:07<10:01,  2.07s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5543559_77.0974746_2017.parquet


Fetching weather data:  25%|██▌       | 99/389 [03:09<09:28,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.5854515_85.0448266_2020.parquet


Fetching weather data:  26%|██▌       | 100/389 [03:11<09:11,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6124504_76.9865067_2018.parquet


Fetching weather data:  26%|██▌       | 101/389 [03:13<09:11,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.68824_77.20919_2019.parquet


Fetching weather data:  26%|██▌       | 102/389 [03:15<08:54,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6362997_77.1576444_2019.parquet


Fetching weather data:  26%|██▋       | 103/389 [03:16<08:43,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2018.parquet


Fetching weather data:  27%|██▋       | 104/389 [03:18<09:06,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2020.parquet


Fetching weather data:  27%|██▋       | 105/389 [03:21<09:29,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2016.parquet


Fetching weather data:  27%|██▋       | 106/389 [03:22<09:11,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_16.5373864_80.4859507_2017.parquet


Fetching weather data:  28%|██▊       | 107/389 [03:24<09:11,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.42572_77.15077_2019.parquet


Fetching weather data:  28%|██▊       | 108/389 [03:26<08:57,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6691641_77.312267_2019.parquet


Fetching weather data:  28%|██▊       | 109/389 [03:28<08:46,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6800843_77.1702212_2018.parquet


Fetching weather data:  28%|██▊       | 110/389 [03:30<08:36,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_30.7419173_76.7747257_2019.parquet


Fetching weather data:  29%|██▊       | 111/389 [03:32<08:25,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.72433_83.30349_2017.parquet


Fetching weather data:  29%|██▉       | 112/389 [03:34<09:05,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_16.5373864_80.4859507_2018.parquet


Fetching weather data:  29%|██▉       | 113/389 [03:36<08:43,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.6066335_85.1014347_2019.parquet


Fetching weather data:  29%|██▉       | 114/389 [03:37<08:26,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2016.parquet


Fetching weather data:  30%|██▉       | 115/389 [03:39<08:24,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2015.parquet


Fetching weather data:  30%|██▉       | 116/389 [03:41<08:28,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2018.parquet


Fetching weather data:  30%|███       | 117/389 [03:43<08:11,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2019.parquet


Fetching weather data:  30%|███       | 118/389 [03:45<08:09,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6800843_77.1702212_2020.parquet


Fetching weather data:  31%|███       | 119/389 [03:47<08:45,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.476491_77.1329132_2019.parquet


Fetching weather data:  31%|███       | 120/389 [03:49<08:43,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6149362_77.0227628_2018.parquet


Fetching weather data:  31%|███       | 121/389 [03:51<08:27,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5501575_77.2249099_2019.parquet


Fetching weather data:  31%|███▏      | 122/389 [03:53<08:32,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6885171_77.1739339_2019.parquet


Fetching weather data:  32%|███▏      | 123/389 [03:54<08:08,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5685108_77.2513847_2019.parquet


Fetching weather data:  32%|███▏      | 124/389 [03:56<08:02,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6362997_77.1576444_2020.parquet


Fetching weather data:  32%|███▏      | 125/389 [03:58<07:53,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2015.parquet


Fetching weather data:  32%|███▏      | 126/389 [04:00<08:26,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7508153_77.1162765_2020.parquet


Fetching weather data:  33%|███▎      | 127/389 [04:02<08:51,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7162092_77.1170743_2020.parquet


Fetching weather data:  33%|███▎      | 128/389 [04:04<08:29,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2018.parquet


Fetching weather data:  33%|███▎      | 129/389 [04:06<08:12,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6691641_77.312267_2018.parquet


Fetching weather data:  33%|███▎      | 130/389 [04:08<08:02,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6885171_77.1739339_2020.parquet


Fetching weather data:  34%|███▎      | 131/389 [04:09<08:05,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6124504_76.9865067_2019.parquet


Fetching weather data:  34%|███▍      | 132/389 [04:11<07:57,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2015.parquet


Fetching weather data:  34%|███▍      | 133/389 [04:14<08:52,  2.08s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.476491_77.1329132_2020.parquet


Fetching weather data:  34%|███▍      | 134/389 [04:16<09:18,  2.19s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5543559_77.0974746_2018.parquet


Fetching weather data:  35%|███▍      | 135/389 [04:18<08:57,  2.11s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6569534_77.2947178_2018.parquet


Fetching weather data:  35%|███▍      | 136/389 [04:20<08:33,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.8464766_77.0856631_2019.parquet


Fetching weather data:  35%|███▌      | 137/389 [04:22<08:16,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6800843_77.1702212_2019.parquet


Fetching weather data:  35%|███▌      | 138/389 [04:24<07:52,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2016.parquet


Fetching weather data:  36%|███▌      | 139/389 [04:25<07:35,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2018.parquet


Fetching weather data:  36%|███▌      | 140/389 [04:27<07:34,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6115923_77.2905644_2018.parquet


Fetching weather data:  36%|███▌      | 141/389 [04:29<08:04,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2019.parquet


Fetching weather data:  37%|███▋      | 142/389 [04:32<08:21,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.8464766_77.0856631_2018.parquet


Fetching weather data:  37%|███▋      | 143/389 [04:33<07:54,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2019.parquet


Fetching weather data:  37%|███▋      | 144/389 [04:35<07:40,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61113_85.1322_2018.parquet


Fetching weather data:  37%|███▋      | 145/389 [04:37<07:31,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.61853_85.14513_2019.parquet


Fetching weather data:  38%|███▊      | 146/389 [04:39<07:39,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5413727_77.271623_2018.parquet


Fetching weather data:  38%|███▊      | 147/389 [04:41<07:23,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2019.parquet


Fetching weather data:  38%|███▊      | 148/389 [04:43<07:44,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2015.parquet


Fetching weather data:  38%|███▊      | 149/389 [04:45<08:04,  2.02s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6569534_77.2947178_2017.parquet


Fetching weather data:  39%|███▊      | 150/389 [04:47<07:48,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6281909_77.2410437_2018.parquet


Fetching weather data:  39%|███▉      | 151/389 [04:48<07:30,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2016.parquet


Fetching weather data:  39%|███▉      | 152/389 [04:50<07:24,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5000822_77.2675147_2019.parquet


Fetching weather data:  39%|███▉      | 153/389 [04:52<07:11,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6314973_77.1984831_2018.parquet


Fetching weather data:  40%|███▉      | 154/389 [04:54<07:07,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7285944_77.1993251_2017.parquet


Fetching weather data:  40%|███▉      | 155/389 [04:56<07:25,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5685108_77.2513847_2018.parquet


Fetching weather data:  40%|████      | 156/389 [04:58<07:34,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.79966_77.0328847_2019.parquet


Fetching weather data:  40%|████      | 157/389 [05:00<07:41,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6828_77.30493_2016.parquet


Fetching weather data:  41%|████      | 158/389 [05:02<07:29,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5396291_77.2000301_2018.parquet


Fetching weather data:  41%|████      | 159/389 [05:04<07:14,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7162092_77.1170743_2019.parquet


Fetching weather data:  41%|████      | 160/389 [05:05<07:01,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.72433_83.30349_2016.parquet


Fetching weather data:  41%|████▏     | 161/389 [05:07<06:58,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.6066335_85.1014347_2020.parquet


Fetching weather data:  42%|████▏     | 162/389 [05:09<06:54,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5503805_77.1855231_2017.parquet


Fetching weather data:  42%|████▏     | 163/389 [05:11<07:37,  2.02s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.3858766_76.9639317_2020.parquet


Fetching weather data:  42%|████▏     | 164/389 [05:14<07:49,  2.09s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5923949_77.2366541_2020.parquet


Fetching weather data:  42%|████▏     | 165/389 [05:16<07:30,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6819261_77.0308105_2020.parquet


Fetching weather data:  43%|████▎     | 166/389 [05:17<07:13,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2017.parquet


Fetching weather data:  43%|████▎     | 167/389 [05:19<07:13,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2017.parquet
Request error fetching 28.68824,77.20919,2017 (HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=60)), retry 1/5 in 2s...


Fetching weather data:  43%|████▎     | 168/389 [06:23<1:15:44, 20.57s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.68824_77.20919_2017.parquet


Fetching weather data:  43%|████▎     | 169/389 [06:25<54:46, 14.94s/it]  

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2017.parquet


Fetching weather data:  44%|████▎     | 170/389 [06:27<40:03, 10.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.72433_83.30349_2020.parquet


Fetching weather data:  44%|████▍     | 171/389 [06:29<30:17,  8.34s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7199257_77.2481823_2019.parquet


Fetching weather data:  44%|████▍     | 172/389 [06:31<23:39,  6.54s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.42572_77.15077_2017.parquet


Fetching weather data:  44%|████▍     | 173/389 [06:33<18:25,  5.12s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6109477_77.0384563_2019.parquet


Fetching weather data:  45%|████▍     | 174/389 [06:35<14:45,  4.12s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5500925_77.2751557_2020.parquet


Fetching weather data:  45%|████▍     | 175/389 [06:37<12:13,  3.43s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5501575_77.2249099_2020.parquet


Fetching weather data:  45%|████▌     | 176/389 [06:39<10:34,  2.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.646841_77.3160332_2017.parquet


Fetching weather data:  46%|████▌     | 177/389 [06:40<09:11,  2.60s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7973118_77.138667_2019.parquet


Fetching weather data:  46%|████▌     | 178/389 [06:43<08:38,  2.46s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5501575_77.2249099_2016.parquet


Fetching weather data:  46%|████▌     | 179/389 [06:45<08:27,  2.42s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.79966_77.0328847_2020.parquet


Fetching weather data:  46%|████▋     | 180/389 [06:47<07:57,  2.29s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6819261_77.0308105_2019.parquet


Fetching weather data:  47%|████▋     | 181/389 [06:49<07:19,  2.12s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6515829_77.1582587_2015.parquet


Fetching weather data:  47%|████▋     | 182/389 [06:50<07:04,  2.05s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6126342_77.2373304_2019.parquet


Fetching weather data:  47%|████▋     | 183/389 [06:52<06:42,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.7285944_77.1993251_2019.parquet


Fetching weather data:  47%|████▋     | 184/389 [06:54<06:30,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.8464766_77.0856631_2020.parquet


Fetching weather data:  48%|████▊     | 185/389 [06:56<06:39,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.9986596_72.6114013_2020.parquet


Fetching weather data:  48%|████▊     | 186/389 [06:59<07:21,  2.18s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.5396291_77.2000301_2019.parquet


Fetching weather data:  48%|████▊     | 187/389 [07:01<07:03,  2.10s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_16.5373864_80.4859507_2019.parquet


Fetching weather data:  48%|████▊     | 188/389 [07:02<06:44,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.672995_77.1461239_2020.parquet


Fetching weather data:  49%|████▊     | 189/389 [07:04<06:28,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.6362997_77.1576444_2017.parquet


Fetching weather data:  49%|████▉     | 190/389 [07:06<06:17,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3839597_78.439861_2017.parquet


Fetching weather data:  49%|████▉     | 191/389 [07:08<06:13,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2015.parquet


Fetching weather data:  49%|████▉     | 192/389 [07:10<06:14,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.84862_80.9387_2018.parquet


Fetching weather data:  50%|████▉     | 193/389 [07:12<06:23,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_9.9861492_76.2840501_2020.parquet


Fetching weather data:  50%|████▉     | 194/389 [07:14<06:12,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_31.6199787_74.8765281_2019.parquet


Fetching weather data:  50%|█████     | 195/389 [07:16<06:05,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2017.parquet


Fetching weather data:  50%|█████     | 196/389 [07:17<06:00,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2020.parquet


Fetching weather data:  51%|█████     | 197/389 [07:19<05:58,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.37662_72.82964_2019.parquet


Fetching weather data:  51%|█████     | 198/389 [07:21<05:58,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_31.6199787_74.8765281_2020.parquet


Fetching weather data:  51%|█████     | 199/389 [07:23<05:49,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2019.parquet


Fetching weather data:  51%|█████▏    | 200/389 [07:25<06:10,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2019.parquet


Fetching weather data:  52%|█████▏    | 201/389 [07:27<06:02,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2017.parquet


Fetching weather data:  52%|█████▏    | 202/389 [07:29<06:01,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.52744_78.19462_2019.parquet


Fetching weather data:  52%|█████▏    | 203/389 [07:31<05:48,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2015.parquet


Fetching weather data:  52%|█████▏    | 204/389 [07:32<05:41,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.86877_80.95445_2018.parquet


Fetching weather data:  53%|█████▎    | 205/389 [07:34<05:32,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9140008_77.6102821_2020.parquet


Fetching weather data:  53%|█████▎    | 206/389 [07:36<05:37,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9160258_75.8015394_2019.parquet


Fetching weather data:  53%|█████▎    | 207/389 [07:38<05:53,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9332789_75.7972295_2019.parquet


Fetching weather data:  53%|█████▎    | 208/389 [07:40<05:53,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.495499_88.3709006_2020.parquet


Fetching weather data:  54%|█████▎    | 209/389 [07:42<05:42,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.52744_78.19462_2017.parquet


Fetching weather data:  54%|█████▍    | 210/389 [07:44<05:30,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_8.5549422_76.8855036_2020.parquet


Fetching weather data:  54%|█████▍    | 211/389 [07:45<05:20,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2018.parquet


Fetching weather data:  54%|█████▍    | 212/389 [07:47<05:26,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_31.6199787_74.8765281_2018.parquet


Fetching weather data:  55%|█████▍    | 213/389 [07:49<05:16,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_8.513992_76.95128_2020.parquet


Fetching weather data:  55%|█████▌    | 214/389 [07:51<05:36,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.2286993_77.4002881_2019.parquet


Fetching weather data:  55%|█████▌    | 215/389 [07:54<05:52,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_20.95186_85.17714_2017.parquet


Fetching weather data:  56%|█████▌    | 216/389 [07:55<05:34,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2015.parquet


Fetching weather data:  56%|█████▌    | 217/389 [07:57<05:27,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2015.parquet


Fetching weather data:  56%|█████▌    | 218/389 [07:59<05:31,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2015.parquet


Fetching weather data:  56%|█████▋    | 219/389 [08:01<05:16,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5280337_88.3659084_2020.parquet


Fetching weather data:  57%|█████▋    | 220/389 [08:03<05:07,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2015.parquet


Fetching weather data:  57%|█████▋    | 221/389 [08:05<05:44,  2.05s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_20.95186_85.17714_2019.parquet


Fetching weather data:  57%|█████▋    | 222/389 [08:07<05:50,  2.10s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2019.parquet


Fetching weather data:  57%|█████▋    | 223/389 [08:09<05:28,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2016.parquet


Fetching weather data:  58%|█████▊    | 224/389 [08:11<05:17,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2020.parquet


Fetching weather data:  58%|█████▊    | 225/389 [08:13<05:09,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.84862_80.9387_2020.parquet


Fetching weather data:  58%|█████▊    | 226/389 [08:14<04:56,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2017.parquet


Fetching weather data:  58%|█████▊    | 227/389 [08:16<05:00,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2015.parquet


Fetching weather data:  59%|█████▊    | 228/389 [08:18<04:58,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9196037_77.583381_2020.parquet


Fetching weather data:  59%|█████▉    | 229/389 [08:20<05:16,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.55838_91.90169_2019.parquet


Fetching weather data:  59%|█████▉    | 230/389 [08:22<05:06,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.52744_78.19462_2020.parquet


Fetching weather data:  59%|█████▉    | 231/389 [08:24<04:59,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2016.parquet


Fetching weather data:  60%|█████▉    | 232/389 [08:26<05:16,  2.02s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0655397_72.8795636_2019.parquet


Fetching weather data:  60%|█████▉    | 233/389 [08:28<05:02,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2019.parquet


Fetching weather data:  60%|██████    | 234/389 [08:30<04:55,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8986698_75.8163567_2017.parquet


Fetching weather data:  60%|██████    | 235/389 [08:32<04:52,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9556699_77.5402492_2018.parquet


Fetching weather data:  61%|██████    | 236/389 [08:34<05:05,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2018.parquet


Fetching weather data:  61%|██████    | 237/389 [08:36<04:52,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.54169_78.36023_2019.parquet


Fetching weather data:  61%|██████    | 238/389 [08:37<04:39,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.58667_88.35852_2019.parquet


Fetching weather data:  61%|██████▏   | 239/389 [08:39<04:31,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2019.parquet


Fetching weather data:  62%|██████▏   | 240/389 [08:41<04:23,  1.77s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2015.parquet


Fetching weather data:  62%|██████▏   | 241/389 [08:43<04:20,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0085959_72.8192943_2019.parquet


Fetching weather data:  62%|██████▏   | 242/389 [08:45<04:27,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_10.94452_76.98093_2020.parquet


Fetching weather data:  62%|██████▏   | 243/389 [08:47<04:43,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9332789_75.7972295_2017.parquet


Fetching weather data:  63%|██████▎   | 244/389 [08:49<04:53,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8605833_81.0029055_2020.parquet


Fetching weather data:  63%|██████▎   | 245/389 [08:51<04:38,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.103908_72.840301_2019.parquet


Fetching weather data:  63%|██████▎   | 246/389 [08:52<04:25,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2018.parquet


Fetching weather data:  63%|██████▎   | 247/389 [08:54<04:23,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.84862_80.9387_2019.parquet


Fetching weather data:  64%|██████▍   | 248/389 [08:56<04:19,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_21.85545_83.9259_2018.parquet


Fetching weather data:  64%|██████▍   | 249/389 [08:58<04:11,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.2267228_72.8619328_2020.parquet


Fetching weather data:  64%|██████▍   | 250/389 [09:00<04:28,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_21.85545_83.9259_2017.parquet


Fetching weather data:  65%|██████▍   | 251/389 [09:02<04:39,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5122563_88.3635738_2020.parquet


Fetching weather data:  65%|██████▍   | 252/389 [09:04<04:23,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_20.95186_85.17714_2018.parquet


Fetching weather data:  65%|██████▌   | 253/389 [09:06<04:29,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8986698_75.8163567_2020.parquet


Fetching weather data:  65%|██████▌   | 254/389 [09:08<04:16,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5497837_88.3391136_2018.parquet


Fetching weather data:  66%|██████▌   | 255/389 [09:09<04:07,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99161_77.54538_2020.parquet


Fetching weather data:  66%|██████▌   | 256/389 [09:11<04:03,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_21.85545_83.9259_2020.parquet


Fetching weather data:  66%|██████▌   | 257/389 [09:13<04:07,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2016.parquet


Fetching weather data:  66%|██████▋   | 258/389 [09:15<04:15,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0465213_72.8632834_2019.parquet


Fetching weather data:  67%|██████▋   | 259/389 [09:17<04:07,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.709_86.41399_2020.parquet


Fetching weather data:  67%|██████▋   | 260/389 [09:19<03:57,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.2267228_72.8619328_2019.parquet


Fetching weather data:  67%|██████▋   | 261/389 [09:21<03:52,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.18327_80.29182_2020.parquet


Fetching weather data:  67%|██████▋   | 262/389 [09:22<03:52,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2016.parquet


Fetching weather data:  68%|██████▊   | 263/389 [09:24<03:45,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.5187_78.27933_2018.parquet


Fetching weather data:  68%|██████▊   | 264/389 [09:26<03:40,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2020.parquet


Fetching weather data:  68%|██████▊   | 265/389 [09:28<03:51,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2016.parquet


Fetching weather data:  68%|██████▊   | 266/389 [09:30<04:06,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9491453_77.5981385_2018.parquet


Fetching weather data:  69%|██████▊   | 267/389 [09:32<03:52,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5122563_88.3635738_2019.parquet


Fetching weather data:  69%|██████▉   | 268/389 [09:34<03:46,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2020.parquet


Fetching weather data:  69%|██████▉   | 269/389 [09:35<03:38,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5497837_88.3391136_2020.parquet


Fetching weather data:  69%|██████▉   | 270/389 [09:37<03:34,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.54169_78.36023_2018.parquet


Fetching weather data:  70%|██████▉   | 271/389 [09:39<03:29,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_9.9651009_76.3187314_2020.parquet


Fetching weather data:  70%|██████▉   | 272/389 [09:41<03:44,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.709_86.41399_2017.parquet


Fetching weather data:  70%|███████   | 273/389 [09:43<03:49,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2020.parquet


Fetching weather data:  70%|███████   | 274/389 [09:45<03:37,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.86877_80.95445_2017.parquet


Fetching weather data:  71%|███████   | 275/389 [09:47<03:28,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.52744_78.19462_2016.parquet


Fetching weather data:  71%|███████   | 276/389 [09:48<03:22,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_18.915091_72.8259691_2020.parquet


Fetching weather data:  71%|███████   | 277/389 [09:50<03:21,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2016.parquet


Fetching weather data:  71%|███████▏  | 278/389 [09:52<03:17,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2018.parquet


Fetching weather data:  72%|███████▏  | 279/389 [09:54<03:12,  1.75s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2016.parquet


Fetching weather data:  72%|███████▏  | 280/389 [09:56<03:25,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9196037_77.583381_2019.parquet


Fetching weather data:  72%|███████▏  | 281/389 [09:58<03:33,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2015.parquet


Fetching weather data:  72%|███████▏  | 282/389 [10:00<03:21,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2018.parquet


Fetching weather data:  73%|███████▎  | 283/389 [10:01<03:13,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.5187_78.27933_2017.parquet


Fetching weather data:  73%|███████▎  | 284/389 [10:03<03:10,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9556699_77.5402492_2020.parquet


Fetching weather data:  73%|███████▎  | 285/389 [10:05<03:06,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2015.parquet


Fetching weather data:  74%|███████▎  | 286/389 [10:07<03:04,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2017.parquet


Fetching weather data:  74%|███████▍  | 287/389 [10:09<03:18,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.54169_78.36023_2020.parquet


Fetching weather data:  74%|███████▍  | 288/389 [10:11<03:22,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2016.parquet


Fetching weather data:  74%|███████▍  | 289/389 [10:13<03:13,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9556699_77.5402492_2019.parquet


Fetching weather data:  75%|███████▍  | 290/389 [10:15<03:04,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2017.parquet


Fetching weather data:  75%|███████▍  | 291/389 [10:16<02:59,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5280337_88.3659084_2019.parquet


Fetching weather data:  75%|███████▌  | 292/389 [10:18<03:02,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9196037_77.583381_2018.parquet


Fetching weather data:  75%|███████▌  | 293/389 [10:20<02:56,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2015.parquet


Fetching weather data:  76%|███████▌  | 294/389 [10:22<03:00,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.52744_78.19462_2018.parquet


Fetching weather data:  76%|███████▌  | 295/389 [10:24<03:03,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0382184_77.5919_2020.parquet


Fetching weather data:  76%|███████▌  | 296/389 [10:26<02:57,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2016.parquet


Fetching weather data:  76%|███████▋  | 297/389 [10:28<02:48,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2019.parquet


Fetching weather data:  77%|███████▋  | 298/389 [10:29<02:43,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.10289_72.87449_2020.parquet


Fetching weather data:  77%|███████▋  | 299/389 [10:31<02:39,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2020.parquet


Fetching weather data:  77%|███████▋  | 300/389 [10:33<02:38,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2019.parquet


Fetching weather data:  77%|███████▋  | 301/389 [10:35<02:34,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2017.parquet


Fetching weather data:  78%|███████▊  | 302/389 [10:37<02:44,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5544966_88.3380132_2020.parquet


Fetching weather data:  78%|███████▊  | 303/389 [10:39<02:48,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.103908_72.840301_2020.parquet


Fetching weather data:  78%|███████▊  | 304/389 [10:41<02:42,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3839597_78.439861_2020.parquet


Fetching weather data:  78%|███████▊  | 305/389 [10:42<02:35,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.58667_88.35852_2020.parquet


Fetching weather data:  79%|███████▊  | 306/389 [10:44<02:29,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5497837_88.3391136_2019.parquet


Fetching weather data:  79%|███████▉  | 307/389 [10:46<02:31,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2017.parquet


Fetching weather data:  79%|███████▉  | 308/389 [10:48<02:26,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9168583_77.6222833_2019.parquet


Fetching weather data:  79%|███████▉  | 309/389 [10:50<02:32,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2017.parquet


Fetching weather data:  80%|███████▉  | 310/389 [10:52<02:37,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9160258_75.8015394_2017.parquet


Fetching weather data:  80%|███████▉  | 311/389 [10:54<02:34,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9332789_75.7972295_2020.parquet


Fetching weather data:  80%|████████  | 312/389 [10:56<02:27,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2018.parquet


Fetching weather data:  80%|████████  | 313/389 [10:58<02:21,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2020.parquet


Fetching weather data:  81%|████████  | 314/389 [10:59<02:16,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2017.parquet


Fetching weather data:  81%|████████  | 315/389 [11:01<02:12,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2018.parquet


Fetching weather data:  81%|████████  | 316/389 [11:03<02:10,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_10.94452_76.98093_2019.parquet


Fetching weather data:  81%|████████▏ | 317/389 [11:05<02:18,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0382184_77.5919_2018.parquet


Fetching weather data:  82%|████████▏ | 318/389 [11:07<02:13,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9168583_77.6222833_2018.parquet


Fetching weather data:  82%|████████▏ | 319/389 [11:09<02:07,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0465213_72.8632834_2020.parquet


Fetching weather data:  82%|████████▏ | 320/389 [11:10<02:03,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2017.parquet


Fetching weather data:  83%|████████▎ | 321/389 [11:12<01:59,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_20.95186_85.17714_2020.parquet


Fetching weather data:  83%|████████▎ | 322/389 [11:14<02:02,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_31.6199787_74.8765281_2017.parquet


Fetching weather data:  83%|████████▎ | 323/389 [11:16<02:00,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9140008_77.6102821_2018.parquet


Fetching weather data:  83%|████████▎ | 324/389 [11:18<02:04,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2018.parquet


Fetching weather data:  84%|████████▎ | 325/389 [11:20<02:06,  1.98s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2016.parquet


Fetching weather data:  84%|████████▍ | 326/389 [11:22<02:01,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5917378_88.3909399_2020.parquet


Fetching weather data:  84%|████████▍ | 327/389 [11:24<01:55,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2016.parquet


Fetching weather data:  84%|████████▍ | 328/389 [11:25<01:49,  1.80s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.99664_80.19303_2020.parquet


Fetching weather data:  85%|████████▍ | 329/389 [11:27<01:46,  1.77s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.2286993_77.4002881_2020.parquet


Fetching weather data:  85%|████████▍ | 330/389 [11:29<01:43,  1.75s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.495499_88.3709006_2019.parquet


Fetching weather data:  85%|████████▌ | 331/389 [11:31<01:47,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8986698_75.8163567_2019.parquet


Fetching weather data:  85%|████████▌ | 332/389 [11:33<01:52,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.5187_78.27933_2020.parquet


Fetching weather data:  86%|████████▌ | 333/389 [11:35<01:45,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2018.parquet


Fetching weather data:  86%|████████▌ | 334/389 [11:36<01:40,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.58667_88.35852_2018.parquet


Fetching weather data:  86%|████████▌ | 335/389 [11:38<01:36,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_8.513992_76.95128_2017.parquet


Fetching weather data:  86%|████████▋ | 336/389 [11:40<01:32,  1.75s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.10289_72.87449_2019.parquet


Fetching weather data:  87%|████████▋ | 337/389 [11:41<01:32,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.84862_80.9387_2017.parquet


Fetching weather data:  87%|████████▋ | 338/389 [11:43<01:30,  1.77s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9160258_75.8015394_2018.parquet


Fetching weather data:  87%|████████▋ | 339/389 [11:46<01:38,  1.97s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8986698_75.8163567_2018.parquet


Fetching weather data:  87%|████████▋ | 340/389 [11:48<01:39,  2.03s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.86877_80.95445_2019.parquet


Fetching weather data:  88%|████████▊ | 341/389 [11:50<01:34,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_8.513992_76.95128_2018.parquet


Fetching weather data:  88%|████████▊ | 342/389 [11:51<01:29,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2017.parquet


Fetching weather data:  88%|████████▊ | 343/389 [11:53<01:24,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9140008_77.6102821_2019.parquet


Fetching weather data:  88%|████████▊ | 344/389 [11:55<01:22,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2017.parquet


Fetching weather data:  89%|████████▊ | 345/389 [11:57<01:18,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2020.parquet


Fetching weather data:  89%|████████▉ | 346/389 [11:59<01:18,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2019.parquet


Fetching weather data:  89%|████████▉ | 347/389 [12:01<01:24,  2.00s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.709_86.41399_2019.parquet


Fetching weather data:  89%|████████▉ | 348/389 [12:03<01:19,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9332789_75.7972295_2018.parquet


Fetching weather data:  90%|████████▉ | 349/389 [12:04<01:14,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.1187195_72.9073476_2020.parquet


Fetching weather data:  90%|████████▉ | 350/389 [12:06<01:10,  1.81s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_18.915091_72.8259691_2019.parquet


Fetching weather data:  90%|█████████ | 351/389 [12:08<01:09,  1.83s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.37662_72.82964_2020.parquet


Fetching weather data:  90%|█████████ | 352/389 [12:10<01:09,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.4569654_78.4434781_2018.parquet


Fetching weather data:  91%|█████████ | 353/389 [12:12<01:09,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8915907_80.9606943_2018.parquet


Fetching weather data:  91%|█████████ | 354/389 [12:14<01:11,  2.04s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2018.parquet


Fetching weather data:  91%|█████████▏| 355/389 [12:16<01:06,  1.96s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.54169_78.36023_2017.parquet


Fetching weather data:  92%|█████████▏| 356/389 [12:18<01:03,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0655397_72.8795636_2020.parquet


Fetching weather data:  92%|█████████▏| 357/389 [12:20<00:59,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2019.parquet


Fetching weather data:  92%|█████████▏| 358/389 [12:21<00:56,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.1187195_72.9073476_2019.parquet


Fetching weather data:  92%|█████████▏| 359/389 [12:23<00:53,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5917378_88.3909399_2019.parquet


Fetching weather data:  93%|█████████▎| 360/389 [12:25<00:50,  1.76s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2020.parquet


Fetching weather data:  93%|█████████▎| 361/389 [12:27<00:53,  1.91s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.9160258_75.8015394_2020.parquet


Fetching weather data:  93%|█████████▎| 362/389 [12:29<00:54,  2.01s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_21.85545_83.9259_2019.parquet


Fetching weather data:  93%|█████████▎| 363/389 [12:31<00:54,  2.08s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2019.parquet


Fetching weather data:  94%|█████████▎| 364/389 [12:34<00:51,  2.08s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.1265273_80.2716289_2019.parquet


Fetching weather data:  94%|█████████▍| 365/389 [12:35<00:46,  1.95s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3560446_78.4554278_2016.parquet


Fetching weather data:  94%|█████████▍| 366/389 [12:37<00:43,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_8.513992_76.95128_2019.parquet


Fetching weather data:  94%|█████████▍| 367/389 [12:39<00:40,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.7908341_80.8701804_2015.parquet


Fetching weather data:  95%|█████████▍| 368/389 [12:41<00:40,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2015.parquet


Fetching weather data:  95%|█████████▍| 369/389 [12:43<00:39,  1.99s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2018.parquet


Fetching weather data:  95%|█████████▌| 370/389 [12:45<00:36,  1.94s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3839597_78.439861_2018.parquet


Fetching weather data:  95%|█████████▌| 371/389 [12:47<00:34,  1.89s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.98702_80.21767_2020.parquet


Fetching weather data:  96%|█████████▌| 372/389 [12:48<00:31,  1.84s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0085959_72.8192943_2020.parquet


Fetching weather data:  96%|█████████▌| 373/389 [12:50<00:29,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9168583_77.6222833_2020.parquet


Fetching weather data:  96%|█████████▌| 374/389 [12:52<00:26,  1.78s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9775068_77.5707681_2019.parquet


Fetching weather data:  96%|█████████▋| 375/389 [12:54<00:26,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9491453_77.5981385_2019.parquet


Fetching weather data:  97%|█████████▋| 376/389 [12:56<00:25,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_26.8605833_81.0029055_2019.parquet


Fetching weather data:  97%|█████████▋| 377/389 [12:58<00:23,  1.93s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.3839597_78.439861_2019.parquet


Fetching weather data:  97%|█████████▋| 378/389 [13:00<00:20,  1.90s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_28.45087_77.0285_2020.parquet


Fetching weather data:  97%|█████████▋| 379/389 [13:01<00:18,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_19.0549792_72.8402203_2016.parquet


Fetching weather data:  98%|█████████▊| 380/389 [13:03<00:16,  1.82s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.71821_92.71958_2020.parquet


Fetching weather data:  98%|█████████▊| 381/389 [13:05<00:14,  1.79s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_17.5187_78.27933_2019.parquet


Fetching weather data:  98%|█████████▊| 382/389 [13:07<00:13,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_25.55838_91.90169_2020.parquet


Fetching weather data:  98%|█████████▊| 383/389 [13:09<00:11,  1.92s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.18327_80.29182_2019.parquet


Fetching weather data:  99%|█████████▊| 384/389 [13:11<00:09,  1.88s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.96377_77.69939_2017.parquet


Fetching weather data:  99%|█████████▉| 385/389 [13:13<00:07,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_23.709_86.41399_2018.parquet


Fetching weather data:  99%|█████████▉| 386/389 [13:14<00:05,  1.87s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0382184_77.5919_2019.parquet


Fetching weather data:  99%|█████████▉| 387/389 [13:16<00:03,  1.85s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_13.0329814_77.5331801_2019.parquet


Fetching weather data: 100%|█████████▉| 388/389 [13:18<00:01,  1.86s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_12.9491453_77.5981385_2020.parquet


Fetching weather data: 100%|██████████| 389/389 [13:20<00:00,  2.06s/it]

Saved /content/drive/MyDrive/new_weather_data/weather_22.5544966_88.3380132_2019.parquet





Combined weather DataFrame schema:
root
 |-- datetime: timestamp (nullable = true)
 |-- temperature: double (nullable = true)
 |-- wind: double (nullable = true)
 |-- humidity: double (nullable = true)
 |-- lat: double (nullable = true)
 |-- lon: double (nullable = true)

Combined weather DataFrame sample:
+-------------------+-----------+----+--------+--------+--------+
|           datetime|temperature|wind|humidity|     lat|     lon|
+-------------------+-----------+----+--------+--------+--------+
|2020-06-19 16:00:00|       37.3| 4.0|    38.0|28.42572|77.15077|
|2020-06-19 17:00:00|       38.5| 8.4|    39.0|28.42572|77.15077|
|2020-06-19 18:00:00|       37.7|11.4|    42.0|28.42572|77.15077|
|2020-06-19 19:00:00|       36.3| 7.1|    47.0|28.42572|77.15077|
|2020-06-19 20:00:00|       35.8| 9.8|    50.0|28.42572|77.15077|
+-------------------+-----------+----+--------+--------+--------+
only showing top 5 rows



In [None]:
import glob
# Load all saved weather data
files = glob.glob("/content/drive/MyDrive/new_weather_data/*.parquet")

if files:
    # Read the first file to infer schema
    weather_df_combined = spark.read.parquet(files[0])

    # If there are more files, union them
    if len(files) > 1:
        for file in files[1:]:
            try:
                df_to_union = spark.read.parquet(file)
                weather_df_combined = weather_df_combined.union(df_to_union)
            except Exception as e:
                print(f"Error reading or unioning file {file}: {e}")

    # Save the combined weather data to a parquet file
    output_path = "/content/drive/MyDrive/new_combined_weather_data_spark.parquet"
    try:
        weather_df_combined.write.parquet(output_path, mode="overwrite")
        print(f"Successfully saved combined weather data to {output_path}")
    except Exception as e:
        print(f"Error saving combined weather data: {e}")

    print("Combined weather DataFrame schema:")
    weather_df_combined.printSchema()
    print("Combined weather DataFrame sample:")
    weather_df_combined.show(n=5)

else:
    print("No weather data files found to combine.")

Successfully saved combined weather data to /content/drive/MyDrive/new_combined_weather_data_spark.parquet
Combined weather DataFrame schema:
root
 |-- datetime: timestamp (nullable = true)
 |-- temperature: double (nullable = true)
 |-- wind: double (nullable = true)
 |-- humidity: double (nullable = true)
 |-- lat: double (nullable = true)
 |-- lon: double (nullable = true)

Combined weather DataFrame sample:
+-------------------+-----------+----+--------+--------+--------+
|           datetime|temperature|wind|humidity|     lat|     lon|
+-------------------+-----------+----+--------+--------+--------+
|2020-06-19 16:00:00|       37.3| 4.0|    38.0|28.42572|77.15077|
|2020-06-19 17:00:00|       38.5| 8.4|    39.0|28.42572|77.15077|
|2020-06-19 18:00:00|       37.7|11.4|    42.0|28.42572|77.15077|
|2020-06-19 19:00:00|       36.3| 7.1|    47.0|28.42572|77.15077|
|2020-06-19 20:00:00|       35.8| 9.8|    50.0|28.42572|77.15077|
+-------------------+-----------+----+--------+--------+-

In [None]:
from pyspark.sql.functions import date_trunc, round, col


# Round latitude and longitude
joined_df = joined_df.withColumn('lat_round', round(col('latitude'), 3))
joined_df = joined_df.withColumn('lon_round', round(col('longitude'), 3))
weather_df_combined = weather_df_combined.withColumn('lat_round', round(col('lat'), 3))
weather_df_combined = weather_df_combined.withColumn('lon_round', round(col('lon'), 3))

# Perform the left join
merged_df = joined_df.join(
    weather_df_combined.select('datetime', 'lat_round', 'lon_round', 'temperature', 'wind', 'humidity'),
    on=['datetime', 'lat_round', 'lon_round'],
    how='left'
)

# Display the schema and a few rows of the merged DataFrame
print("Merged DataFrame schema:")
merged_df.printSchema()
print("Merged DataFrame sample:")
merged_df.show(n=5)

# Remove pandas display calls if they exist
# display(merged_df.head())
# display(merged_df.info())

Merged DataFrame schema:
root
 |-- Datetime: timestamp (nullable = true)
 |-- lat_round: double (nullable = true)
 |-- lon_round: double (nullable = true)
 |-- StationId: string (nullable = true)
 |-- PM2.5: double (nullable = true)
 |-- PM10: double (nullable = true)
 |-- NO: double (nullable = true)
 |-- NO2: double (nullable = true)
 |-- NOx: double (nullable = true)
 |-- NH3: double (nullable = true)
 |-- CO: double (nullable = true)
 |-- SO2: double (nullable = true)
 |-- O3: double (nullable = true)
 |-- Benzene: double (nullable = true)
 |-- Toluene: double (nullable = true)
 |-- Xylene: double (nullable = true)
 |-- AQI: double (nullable = true)
 |-- AQI_Bucket: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- hour_formatted: string (nullable = true)
 |-- id: integer (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nulla

In [None]:
from pyspark.sql.functions import col

# Filter for rows where temperature is negative
negative_temperature_df = merged_df.filter(col("temperature") < 0)

# Show the count of rows with negative temperature
print(f"Number of rows with negative temperature: {negative_temperature_df.count()}")

# Display the first few rows with negative temperature
print("Sample rows with negative temperature:")
negative_temperature_df.show(n=10)

Number of rows with negative temperature: 0
Sample rows with negative temperature:
+--------+---------+---------+---------+-----+----+---+---+---+---+---+---+---+-------+-------+------+---+----------+----+-----+---+--------------+---+-----------+----+-----+------+--------+---------+-----------+----+--------+
|Datetime|lat_round|lon_round|StationId|PM2.5|PM10| NO|NO2|NOx|NH3| CO|SO2| O3|Benzene|Toluene|Xylene|AQI|AQI_Bucket|year|month|day|hour_formatted| id|StationName|City|State|Status|latitude|longitude|temperature|wind|humidity|
+--------+---------+---------+---------+-----+----+---+---+---+---+---+---+---+-------+-------+------+---+----------+----+-----+---+--------------+---+-----------+----+-----+------+--------+---------+-----------+----+--------+
+--------+---------+---------+---------+-----+----+---+---+---+---+---+---+---+-------+-------+------+---+----------+----+-----+---+--------------+---+-----------+----+-----+------+--------+---------+-----------+----+--------+



In [None]:
from pyspark.sql.functions import col

# Check the number of null values in the 'temperature' column
null_temperature_count = merged_df.filter(col("temperature").isNull()).count()

print(f"Number of null values in the 'temperature' column: {null_temperature_count}")

Number of null values in the 'temperature' column: 0


In [None]:
# Save hours_df_cleaned as Parquet
merged_df.write.parquet("/content/drive/MyDrive/historic_2015_2020_final.parquet", mode="overwrite")

print("DataFrames saved successfully.")

DataFrames saved successfully.


In [None]:
# Save hours_df_cleaned as Parquet
aqi_df = spark.read.parquet("/content/drive/MyDrive/historic_2015_2020_final.parquet", mode="overwrite")

aqi_df.show()

+-------------------+---------+---------+---------+------+------+------+------+------+-----+-----+------+------+-------+-------+------+----+----------+----+-----+---+--------------+------+--------------------+---------+-------------+------+----------+----------+-----------+----+--------+
|           Datetime|lat_round|lon_round|StationId| PM2.5|  PM10|    NO|   NO2|   NOx|  NH3|   CO|   SO2|    O3|Benzene|Toluene|Xylene| AQI|AQI_Bucket|year|month|day|hour_formatted|    id|         StationName|     City|        State|Status|  latitude| longitude|temperature|wind|humidity|
+-------------------+---------+---------+---------+------+------+------+------+------+-----+-----+------+------+-------+-------+------+----+----------+----+-----+---+--------------+------+--------------------+---------+-------------+------+----------+----------+-----------+----+--------+
|2015-01-01 01:00:00|   12.987|   80.218|    TN004|  NULL|  NULL|106.46|120.34|139.05| NULL| 0.63|  7.49| 11.42|   0.11|   NULL|  NUL

In [None]:
aqi_df.printSchema()

root
 |-- Datetime: timestamp (nullable = true)
 |-- lat_round: double (nullable = true)
 |-- lon_round: double (nullable = true)
 |-- StationId: string (nullable = true)
 |-- PM2.5: double (nullable = true)
 |-- PM10: double (nullable = true)
 |-- NO: double (nullable = true)
 |-- NO2: double (nullable = true)
 |-- NOx: double (nullable = true)
 |-- NH3: double (nullable = true)
 |-- CO: double (nullable = true)
 |-- SO2: double (nullable = true)
 |-- O3: double (nullable = true)
 |-- Benzene: double (nullable = true)
 |-- Toluene: double (nullable = true)
 |-- Xylene: double (nullable = true)
 |-- AQI: double (nullable = true)
 |-- AQI_Bucket: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- hour_formatted: string (nullable = true)
 |-- id: integer (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Status: 

In [None]:
from pyspark.sql import functions as F

# Function to compute subindex for each pollutant
def get_subindex(colname, breakpoints):
    expr = None
    for (low, high, sub_low, sub_high) in breakpoints:
        # Linear interpolation formula
        subindex_expr = ((F.col(colname) - low) * (sub_high - sub_low) / (high - low) + sub_low)

        if expr is None:
            expr = F.when((F.col(colname) >= low) & (F.col(colname) <= high), subindex_expr)
        else:
            expr = expr.when((F.col(colname) >= low) & (F.col(colname) <= high), subindex_expr)

    return expr

# Define CPCB breakpoints
bp_pm25  = [(0,30,0,50),(31,60,51,100),(61,90,101,200),(91,120,201,300),(121,250,301,400),(251,500,401,500)]
bp_pm10  = [(0,50,0,50),(51,100,51,100),(101,250,101,200),(251,350,201,300),(351,430,301,400),(431,600,401,500)]
bp_no2   = [(0,40,0,50),(41,80,51,100),(81,180,101,200),(181,280,201,300),(281,400,301,400),(401,1000,401,500)]
bp_o3    = [(0,50,0,50),(51,100,51,100),(101,168,101,200),(169,208,201,300),(209,748,301,500)]
bp_so2   = [(0,40,0,50),(41,80,51,100),(81,380,101,200),(381,800,201,300),(801,1600,301,400),(1601,2000,401,500)]
bp_co    = [(0,1,0,50),(1.1,2,51,100),(2.1,10,101,200),(10.1,17,201,300),(17.1,34,301,400),(34.1,50,401,500)]

# Add subindices
aqi_df = (aqi_df
          .withColumn("si_pm25", get_subindex("`PM2.5`", bp_pm25))
          .withColumn("si_pm10", get_subindex("PM10", bp_pm10))
          .withColumn("si_no2",  get_subindex("NO2", bp_no2))
          .withColumn("si_o3",   get_subindex("O3",  bp_o3))
          .withColumn("si_so2",  get_subindex("SO2", bp_so2))
          .withColumn("si_co",   get_subindex("CO",  bp_co))
)

# AQI = max of all available subindices
aqi_df = aqi_df.withColumn(
    "calc_AQI",
    F.greatest("si_pm25","si_pm10","si_no2","si_o3","si_so2","si_co")
)

# Fill missing AQI only
aqi_df = aqi_df.withColumn(
    "AQI",
    F.when(F.col("AQI").isNull(), F.col("calc_AQI")).otherwise(F.col("AQI"))
)

# AQI Bucket categories
aqi_df = aqi_df.withColumn(
    "AQI_Bucket",
    F.when(F.col("AQI").between(0,50), "Good")
     .when(F.col("AQI").between(51,100), "Satisfactory")
     .when(F.col("AQI").between(101,200), "Moderate")
     .when(F.col("AQI").between(201,300), "Poor")
     .when(F.col("AQI").between(301,400), "Very Poor")
     .when(F.col("AQI") > 400, "Severe")
     .otherwise(F.col("AQI_Bucket"))
)

In [None]:
aqi_df.show(5)

+-------------------+---------+---------+---------+------+----+------+------+------+----+----+----+------+-------+-------+------+------+------------+----+-----+---+--------------+------+--------------------+-------+-------------+------+----------+----------+-----------+----+--------+-------+-------+-----------------+------------------+------+------------------+------------------+
|           Datetime|lat_round|lon_round|StationId| PM2.5|PM10|    NO|   NO2|   NOx| NH3|  CO| SO2|    O3|Benzene|Toluene|Xylene|   AQI|  AQI_Bucket|year|month|day|hour_formatted|    id|         StationName|   City|        State|Status|  latitude| longitude|temperature|wind|humidity|si_pm25|si_pm10|           si_no2|             si_o3|si_so2|             si_co|          calc_AQI|
+-------------------+---------+---------+---------+------+----+------+------+------+----+----+----+------+-------+-------+------+------+------------+----+-----+---+--------------+------+--------------------+-------+-------------+-----

In [None]:
from pyspark.sql.functions import round

# Drop the intermediate subindex columns
aqi_df = aqi_df.drop("si_pm25", "si_pm10", "si_no2", "si_o3", "si_so2", "si_co", "calc_AQI")

# Round off the AQI to 2 decimal places
aqi_df = aqi_df.withColumn("AQI", round("AQI", 2))

# Show the updated DataFrame schema and a few rows
aqi_df.printSchema()
aqi_df.show(5)

root
 |-- Datetime: timestamp (nullable = true)
 |-- lat_round: double (nullable = true)
 |-- lon_round: double (nullable = true)
 |-- StationId: string (nullable = true)
 |-- PM2.5: double (nullable = true)
 |-- PM10: double (nullable = true)
 |-- NO: double (nullable = true)
 |-- NO2: double (nullable = true)
 |-- NOx: double (nullable = true)
 |-- NH3: double (nullable = true)
 |-- CO: double (nullable = true)
 |-- SO2: double (nullable = true)
 |-- O3: double (nullable = true)
 |-- Benzene: double (nullable = true)
 |-- Toluene: double (nullable = true)
 |-- Xylene: double (nullable = true)
 |-- AQI: double (nullable = true)
 |-- AQI_Bucket: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- hour_formatted: string (nullable = true)
 |-- id: integer (nullable = true)
 |-- StationName: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)
 |-- Status: 

In [None]:
# Save updated dataframe
aqi_df.write.mode("overwrite").parquet("/content/drive/MyDrive/historic_2015_2020_with_AQI.parquet")

In [None]:
aqi_df.select("Datetime","StationName","`PM2.5`","PM10","NO2","O3","SO2","CO","AQI","AQI_Bucket").show(10, truncate=False)

+-------------------+---------------------------------+------+----+------+------+-----+----+------+------------+
|Datetime           |StationName                      |PM2.5 |PM10|NO2   |O3    |SO2  |CO  |AQI   |AQI_Bucket  |
+-------------------+---------------------------------+------+----+------+------+-----+----+------+------------+
|2015-01-01 01:00:00|Velachery Res. Area, Chennai     |NULL  |NULL|120.34|11.42 |7.49 |0.63|140.34|Moderate    |
|2015-01-01 01:00:00|Central School, Lucknow          |NULL  |NULL|19.77 |35.0  |NULL |1.05|35.0  |Good        |
|2015-01-01 02:00:00|Central School, Lucknow          |NULL  |NULL|20.13 |35.0  |NULL |0.06|35.0  |Good        |
|2015-01-01 02:00:00|CRRI Mathura Road, Delhi         |655.93|NULL|35.58 |121.38|NULL |0.55|131.11|Moderate    |
|2015-01-01 02:00:00|Shadipur, Delhi                  |NULL  |NULL|47.0  |8.5   |2.0  |NULL|58.54 |Satisfactory|
|2015-01-01 03:00:00|Maninagar, Ahmedabad             |NULL  |NULL|19.32 |NULL  |52.83|0.08|65.8