In [6]:
import pandas as pd
import rasterio

# Load air quality station data
station_data = pd.read_csv('Ind_SW_NO2.csv',encoding= 'unicode_escape')

# Load Sentinel data
file_path = 'S5P_NO2_India_2021.tif'
with rasterio.open(file_path) as src:
    raster_data = src.read(1)

# Convert Sentinel pixel values to NO2 concentration
M = 46.0055 # Molar mass of NO2 in g/mol
V = 0.0002  # Vertical column density of NO2 in mol/m2
city_data = []
for index, row in station_data.iterrows():
    longitude = row['Longitude']
    latitude = row['Latitude']
    city_name = row['City']
    row, col = src.index(longitude, latitude)
    pixel_value = raster_data[row][col]
    NO2_concentration = (pixel_value * M) / V
    city_data.append([city_name, NO2_concentration])

# Create DataFrame from Sentinel data
sentinel_data = pd.DataFrame(city_data, columns=['City', 'Sentinel NO2 Reading_2019 (µg/m³)'])

# Merge air quality station data and Sentinel data
merged_data = pd.merge(station_data, sentinel_data, on='City')

# Calculate correlation between air quality station NO2 readings and Sentinel NO2 readings
correlation = merged_data['NO2 Reading_2019 (µg/m³)'].corr(merged_data['Sentinel NO2 Reading_2019 (µg/m³)'])

# Print correlation
print(f"Correlation between air quality station data and Sentinel data: {correlation}")


Correlation between air quality station data and Sentinel data: 0.5489001046938279


In [8]:
import pandas as pd
import rasterio

# Load air quality station data
station_data = pd.read_csv('Ind_SW_NO2.csv',encoding= 'unicode_escape')
station_data = station_data[['City', 'NO2 Reading_2019 (µg/m³)', 'Latitude', 'Longitude']]

# Load Sentinel data
file_path = 'S5P_NO2_India_2019.tif'
with rasterio.open(file_path) as src:
    raster_data = src.read(1)

M = 46.0055 # Molar mass of NO2 in g/mol
V = 0.0002  # Vertical column density of NO2 in mol/m2

# Combine air quality station data with Sentinel data
for index, row in station_data.iterrows():
    city_name = row['City']
    latitude = row['Latitude']
    longitude = row['Longitude']
    row, col = src.index(longitude, latitude)
    pixel_value = raster_data[row][col]
    NO2_concentration = (pixel_value * M) / V
    station_NO2 = station_data.loc[index, 'NO2 Reading_2019 (µg/m³)']
    combined_NO2 = (station_NO2 + NO2_concentration) / 2
    station_data.at[index, 'NO2 Reading_2019 (µg/m³)'] = combined_NO2

# Save the combined data to a new file
station_data.to_excel('Combined_NO2_Data_2019.xlsx', index=False)