In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
import calendar
import concurrent.futures
import json

### Extract sensor ids of interest

In [2]:
response = requests.get("https://data.sensor.community/airrohr/v1/filter/country=DE,AT&type=SDS011")
sensors_dict = json.loads(response.content.decode('utf-8'))

In [4]:
sensors_of_interest = [{"sensor_id": sensor_measurement["sensor"]["id"], 
                        "lon": sensor_measurement["location"]["longitude"], 
                        "lat": sensor_measurement["location"]["latitude"]} for sensor_measurement in sensors_dict]
sensors_df = pd.DataFrame(sensors_of_interest)

In [5]:
sensors_df.to_csv("/mnt/data/processed/sensor_community_ids_de_at_sds011.csv")
# clipping to aoi is done in QGIS

### Extracting archive values for sensors of interest

In [4]:
def generate_urls(year: int, sensor_id: int, sensor_type: str = "sds011"):
    urls = []
    suffix = "csv" if year in [2021, 2022] else "csv.gz"
    for month in range(1, 13):
        days_in_month = calendar.monthrange(year, month)[1]
        for day in range(1, days_in_month + 1):
            formatted_date = f"{year}-{month:02d}-{day:02d}"
            url = f"http://archive.sensor.community/{year}/{formatted_date}/{formatted_date}_{sensor_type}_sensor_{sensor_id}.{suffix}"
            urls.append(url)
    return urls

def process_sensor_data(url, sid):
    try:
        df = pd.read_csv(url, sep=";")
        lat = df.loc[0, "lat"]
        lon = df.loc[0, "lon"]
        p1_value = df["P1"].mean()
        p2_value = df["P2"].mean()
        date = url.split("/")[-2]
        return (date, p1_value, p2_value, lon, lat)
    except Exception as e:
        return None

In [5]:
sensors_of_interest_df  = pd.read_csv("/mnt/data/processed/Sensor_Community/sensor_community_ids_aoi_sds011.csv")
sensors_of_interest_sample_df = sensors_of_interest_df.sample(1, random_state=0)

In [6]:
# Create a GeoJSON structure
geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for i, item in sensors_of_interest_sample_df.iterrows():
    sid = int(item["sensor_id"])
    url_list = generate_urls(2022, sid)
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": []
        },
        "properties": {
            "sensor_id": sid,
            "values": []
        }
    }

    for url in url_list:
        result = process_sensor_data(url, sid)
        if result is not None:
            date, p1_value, p2_value, lon, lat = result
            feature["properties"]["values"].append((date, p1_value, p2_value))
            feature["geometry"]["coordinates"] = [lon, lat]

    geojson_data["features"].append(feature)

# Convert the GeoJSON data to a JSON string
geojson_string = json.dumps(geojson_data, indent=2)

In [9]:
# You can save the GeoJSON string to a file or use it as needed
with open("/mnt/data/processed/Sensor_Community/sensor_sample_values.geojson", "w") as geojson_file:
    geojson_file.write(geojson_string)