In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyproj>=2.6.1.post1
  Downloading pyproj-3.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m62.0 MB/s[0m eta [36m0:00:00[0m
Collecting fiona>=1.8
  Downloading Fiona-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.1/16.1 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
Collecting munch>=2.3.2
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/content/drive/MyDrive/combined_trajectories.csv')
df.head()

Unnamed: 0,latitude,longitude,altitude,date,time,individual_id,trajectory_id
0,39.984702,116.318417,492.0,2023-10-23,02:53:04,1,1
1,39.984683,116.31845,492.0,2023-10-23,02:53:10,1,1
2,39.984686,116.318417,492.0,2023-10-23,02:53:15,1,1
3,39.984688,116.318385,492.0,2023-10-23,02:53:20,1,1
4,39.984655,116.318263,492.0,2023-10-23,02:53:25,1,1


In [None]:
geometry = gpd.points_from_xy(df['longitude'], df['latitude'],df['altitude'])
gdf = gpd.GeoDataFrame(df, geometry=geometry)

In [None]:
gdf.head()

Unnamed: 0,latitude,longitude,altitude,date,time,individual_id,trajectory_id,geometry
0,39.984702,116.318417,492.0,2023-10-23,02:53:04,1,1,POINT Z (116.31842 39.98470 492.00000)
1,39.984683,116.31845,492.0,2023-10-23,02:53:10,1,1,POINT Z (116.31845 39.98468 492.00000)
2,39.984686,116.318417,492.0,2023-10-23,02:53:15,1,1,POINT Z (116.31842 39.98469 492.00000)
3,39.984688,116.318385,492.0,2023-10-23,02:53:20,1,1,POINT Z (116.31839 39.98469 492.00000)
4,39.984655,116.318263,492.0,2023-10-23,02:53:25,1,1,POINT Z (116.31826 39.98465 492.00000)


In [None]:
gdf['date'] = pd.to_datetime(gdf['date'], errors='coerce')

In [None]:
gdf.head()

Unnamed: 0,latitude,longitude,altitude,date,time,individual_id,trajectory_id,geometry
0,39.984702,116.318417,492.0,2023-10-23,02:53:04,1,1,POINT Z (116.31842 39.98470 492.00000)
1,39.984683,116.31845,492.0,2023-10-23,02:53:10,1,1,POINT Z (116.31845 39.98468 492.00000)
2,39.984686,116.318417,492.0,2023-10-23,02:53:15,1,1,POINT Z (116.31842 39.98469 492.00000)
3,39.984688,116.318385,492.0,2023-10-23,02:53:20,1,1,POINT Z (116.31839 39.98469 492.00000)
4,39.984655,116.318263,492.0,2023-10-23,02:53:25,1,1,POINT Z (116.31826 39.98465 492.00000)


In [None]:
monthly_data = gdf.groupby([gdf['date'].dt.year, gdf['date'].dt.month])['PM2.5'].mean().reset_index()

KeyError: ignored

In [None]:


def analyze_beijing_pollution_data(csv_file):
    # Read the CSV file
    df = pd.read_csv(csv_file)

    # Convert the latitude and longitude columns to a geopandas GeoDataFrame
    geometry = gpd.points_from_xy(df['longitude'], df['latitude'],df['altitude'])
    gdf = gpd.GeoDataFrame(df, geometry=geometry)

    # Aggregate the data by month and grid cell
    monthly_data = gdf.groupby([gdf['date'].dt.year, gdf['date'].dt.month, gdf['grid_cell']])['PM2.5'].mean().reset_index()

    # Pivot the data to create a matrix of monthly averages for each grid cell
    pivot_data = monthly_data.pivot(index='grid_cell', columns=['date'], values='PM2.5')

    # Calculate the z-score of each grid cell for each month
    z_scores = (pivot_data - pivot_data.mean()) / pivot_data.std()

    # Calculate the spatial average of the z-scores for each month
    spatial_averages = z_scores.mean(axis=0)

    # Create a heatmap of the spatial hotspots
    sns.heatmap(z_scores, cmap='coolwarm', center=0, cbar_kws={'label': 'Z-score'})
    plt.title('Spatial Hotspots of Beijing PM2.5 Pollution')
    plt.xlabel('Month')
    plt.ylabel('Grid Cell')
    plt.show()

    # Create a line graph of the temporal hotspots
    plt.plot(spatial_averages)
    plt.title('Temporal Hotspots of Beijing PM2.5 Pollution')
    plt.xlabel('Month')
    plt.ylabel('Z-score')
    plt.show()
