In [1]:
import pandas as pd

# Assuming chunk2 and chunk3 are already loaded DataFrames
# For example:
df1 = pd.read_csv('chunk2.csv')
df2 = pd.read_csv('chunk3.csv')

# Sample DataFrames for demonstration
# df1 = pd.DataFrame({'Date': ['2022-07-01', '2022-07-15', '2022-08-01'],
#                     'Country': ['USA', 'USA', 'USA'],
#                     'AQI': [50, 60, 70]})

# df2 = pd.DataFrame({'Date': ['2022-07-10', '2022-08-05', '2022-08-15'],
#                     'Country': ['USA', 'USA', 'USA'],
#                     'AQI': [55, 65, 75]})

# Convert 'Date' column to datetime
df1['Date'] = pd.to_datetime(df1['Date'])
df2['Date'] = pd.to_datetime(df2['Date'])

# Combine the DataFrames
combined_df = pd.concat([df1, df2])

# Extract month from the Date
combined_df['Month'] = combined_df['Date'].dt.month

# Group by Month and Country, then calculate the average AQI
monthly_avg_aqi = combined_df.groupby(['Month', 'Country'])['AQI Value'].mean().reset_index()

# Rename the columns for clarity
monthly_avg_aqi.rename(columns={'AQI': 'Average_AQI'}, inplace=True)

# Display the resulting DataFrame
print(monthly_avg_aqi)

      Month                                            Country  AQI Value
0         1                                            Andorra      13.00
1         1                                          Argentina      22.40
2         1                                            Armenia      97.80
3         1                                          Australia      41.00
4         1                                            Austria      66.80
...     ...                                                ...        ...
1207     12                               United Arab Emirates      77.50
1208     12  United Kingdom of Great Britain and Northern I...      58.50
1209     12                           United States of America     109.00
1210     12                                            Vatican      62.75
1211     12                                            Vietnam      62.75

[1212 rows x 3 columns]


In [5]:
import pandas as pd
import json

# Assuming df is your DataFrame
# Example DataFrame
# df = pd.DataFrame({
#     'Month': [1, 1, 2, 2],
#     'Country': ['USA', 'Canada', 'USA', 'Canada'],
#     'Average_AQI': [50, 40, 60, 55]
# })

# Step 1: Extract unique countries
unique_countries = monthly_avg_aqi['Country'].unique()

# Step 2: Create a dictionary with countries as keys
country_dict = {country: index for index, country in enumerate(unique_countries)}

# Step 3: Save the dictionary to a JSON file
with open('countries_dict.json', 'w') as file:
    json.dump(country_dict, file)

# Optional: Print the dictionary to verify
print(country_dict)

{'Andorra': 0, 'Argentina': 1, 'Armenia': 2, 'Australia': 3, 'Austria': 4, 'Azerbaijan': 5, 'Bangladesh': 6, 'Belarus': 7, 'Belgium': 8, 'Bosnia and Herzegovina': 9, 'Brazil': 10, 'Brunei': 11, 'Bulgaria': 12, 'Canada': 13, 'Cape Verde': 14, 'Chile': 15, 'China': 16, 'Colombia': 17, 'Croatia': 18, 'Cyprus': 19, 'Czech Republic': 20, 'Denmark': 21, 'Dominican Republic': 22, 'Ecuador': 23, 'Estonia': 24, 'Ethiopia': 25, 'Finland': 26, 'France': 27, 'French Guiana': 28, 'Georgia': 29, 'Germany': 30, 'Ghana': 31, 'Greece': 32, 'Guadeloupe': 33, 'Guatemala': 34, 'Hong Kong': 35, 'Iceland': 36, 'India': 37, 'Indonesia': 38, 'Ireland': 39, 'Israel': 40, 'Italy': 41, 'Japan': 42, 'Jordan': 43, 'Kazakhstan': 44, 'Kenya': 45, 'Kuwait': 46, 'Kyrgyzstan': 47, 'Laos': 48, 'Latvia': 49, 'Liechtenstein': 50, 'Lithuania': 51, 'Macao': 52, 'Macedonia': 53, 'Madagascar': 54, 'Malaysia': 55, 'Malta': 56, 'Martinique': 57, 'Mexico': 58, 'Moldova': 59, 'Monaco': 60, 'Mongolia': 61, 'Montenegro': 62, 'Myanm

In [7]:
import torch

# Step 1: Load the dictionary from the JSON file
with open('countries_dict.json', 'r') as file:
    country_dict = json.load(file)

# Step 2: Map the country names to their corresponding numeric values
monthly_avg_aqi['Country'] = monthly_avg_aqi['Country'].map(country_dict)

# Step 3: Convert the DataFrame to a tensor
tensor = torch.tensor(monthly_avg_aqi.values, dtype=torch.float32)

# Step 4: Print the shape of the tensor
print(tensor.shape)
torch.save(tensor, 'baseline.pt') 


torch.Size([1212, 3])


In [17]:
data_by_month = {}
for month in range(1, 13):
    month_data = monthly_avg_aqi[monthly_avg_aqi['Month'] == month]
    data_by_month[f'data{month}'] = {
        'Country': month_data['Country'].tolist(),
        'AQI': month_data['AQI Value'].tolist()
    }

# Now you can access each month's data like this:
# data1 = data_by_month['data1']
# data2 = data_by_month['data2']
# etc.

# Or if you prefer separate variables:
data1 = data_by_month['data1']
data2 = data_by_month['data2']
data3 = data_by_month['data3']
data4 = data_by_month['data4']
data5 = data_by_month['data5']
data6 = data_by_month['data6']
data7 = data_by_month['data7']
data8 = data_by_month['data8']
data9 = data_by_month['data9']
data10 = data_by_month['data10']
data11 = data_by_month['data11']
data12 = data_by_month['data12']

In [28]:
import pandas as pd
import folium
from folium.plugins import HeatMap
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import random

data = data1

# Create a DataFrame
df = pd.DataFrame(data)

# Load world geometries from the downloaded shapefile
world = gpd.read_file('ne_110m_admin_0_countries.shp')  # Update this path

# Create a map centered around a specific location
m = folium.Map(location=[20, 0], zoom_start=2)

# Add a title to the map
title_html = '''
             <h3 align="center" style="font-size:20px"><b>AQI Heatmap by Country</b></h3>
             '''
m.get_root().html.add_child(folium.Element(title_html))

# Create a list to hold the heatmap data
heat_data = []

# Generate points within each country's polygon based on AQI
for index, row in df.iterrows():
    country_name = row['Country']
    aqi_value = row['AQI']
    
    # Get the geometry for the country
    country_geom = world[world['ADMIN'] == country_name]  # Use 'ADMIN' for country names
    
    if not country_geom.empty:
        # Generate random points within the country's polygon
        num_points = int(aqi_value)  # Number of points based on AQI
        country_shape = country_geom.geometry.values[0]
        
        for _ in range(num_points):
            # Generate a random point within the country shape
            minx, miny, maxx, maxy = country_shape.bounds
            while True:
                random_point = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
                if country_shape.contains(random_point):
                    heat_data.append([random_point.y, random_point.x])  # Append as [latitude, longitude]
                    break

# Create a heatmap layer
HeatMap(heat_data).add_to(m)

# Save the map to an HTML file
m.save('aqi_heatmap_country_spanning.html')

# Display the map in a Jupyter Notebook (if applicable)
m

In [44]:
import pandas as pd
import folium
from folium.plugins import HeatMap
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import random

# Assuming your data variable is data1 and has columns: "Month", "Country", and "AQI Value"
data = data12

# Create a DataFrame from the data.
df = pd.DataFrame(data)

# (Optional) Rename the "AQI Value" column to "AQI" for consistency.
df.rename(columns={"AQI Value": "AQI"}, inplace=True)

# Load world country geometries from the shapefile.
world = gpd.read_file('ne_110m_admin_0_countries.shp')  # Ensure this path is correct!

# Create a folium map centered on [20, 0].
m = folium.Map(location=[20, 0], zoom_start=2)

# Add a title to the map.
title_html = '''
             <h3 align="center" style="font-size:20px"><b>AQI Heatmap by Country in December</b></h3>
             '''
m.get_root().html.add_child(folium.Element(title_html))

# Normalize the AQI values to determine the number of random points and weight.
min_aqi = df['AQI'].min()
max_aqi = df['AQI'].max()

# Scale the number of points between 5 and 50.
df['points_count'] = ((df['AQI'] - min_aqi) / (max_aqi - min_aqi) * 45).astype(int) + 5
# Scale the weight between 1 and 10 (ensuring an integer weight always).
df['aqi_weight'] = ((df['AQI'] - min_aqi) / (max_aqi - min_aqi) * 9).astype(int) + 1

# Initialize the list to hold heatmap data.
heat_data = []

# Loop over each record in the DataFrame.
for index, row in df.iterrows():
    country_name = row['Country']
    num_points = row['points_count']   # integer count for generating points
    aqi_weight = row['aqi_weight']       # integer weight per point
    
    # Get the geometry of the country using 'ADMIN' field.
    country_geom = world[world['ADMIN'] == country_name]
    if not country_geom.empty:
        country_shape = country_geom.geometry.values[0]
        
        # Generate random points within the country's polygon.
        for _ in range(num_points):
            minx, miny, maxx, maxy = country_shape.bounds
            while True:
                random_point = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
                if country_shape.contains(random_point):
                    # Append the data as [latitude, longitude, weight]
                    heat_data.append([random_point.y, random_point.x, aqi_weight])
                    break

# Define the gradient with string keys to avoid errors during Jinja2 conversion.
gradient = {
    "0": '#00ff00',   # Green for low intensity
    "0.5": '#ffff00', # Yellow for moderate intensity
    "1": '#ff0000'    # Red for high intensity
}

# Create and add the HeatMap layer using the heat data and the defined gradient.
heatmap = HeatMap(heat_data, min_opacity=0.3, max_opacity=0.8, radius=15, gradient=gradient)
m.add_child(heatmap)

# Save the final map to an HTML file.
m.save('aqi_heatmap_country_spanning.html')

# If running in a Jupyter Notebook, display the map.
m