In [None]:
import os 
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from src.download.patients import PatientsDownload
from src.download.airdata_downloader import AirdataDownloader
from src.utils.graph_configurations import get_air_data_index_timeline_plot, get_air_data_component_timeline_plot

### Use Case: Von einer Datei alle Patient:innen extrahieren und für einzelne Patient:innen Luftdaten anschauen

In [None]:
# INPUT from the User Interface 
START_DATE = input("Bitte geben Sie den Startzeitpunkt der Datenabfrage ein (Format: YYYY-MM-DD). Es sind Daten ab dem 01.01.2002 verfügbar:  ")
END_DATE = input("Bitte geben Sie den Endzeitpunkt der Datenabfrage ein (Format: YYYY-MM-DD): ")

In [None]:
# Load Patient Data - test data
with open("data/raw/2024-12-02_mii_testdaten_small.json", 'r') as file:
    data = json.load(file)

In [None]:
# Extract Patients from Data + select an interesting patient
patients = PatientsDownload()
patients.extract_patients(data = data)
interesting_patient = patients.get_patient_by_id("595")


In [None]:
# Get Air Quality Data
air_data = AirdataDownloader()

index_data = air_data.get_luftdaten_index_patient(longitude = interesting_patient.address.longitude, latitude = interesting_patient.address.latitude, start_date = START_DATE, end_date = END_DATE)
schadstoffe_data = air_data.get_luftdaten_schadstoffe_patient(longitude = interesting_patient.address.longitude, latitude = interesting_patient.address.latitude, start_date = START_DATE, end_date = END_DATE)

In [None]:
# Visualization of the acquired data: Air Quality Index
get_air_data_index_timeline_plot(index_data)

In [None]:
# Visualization of the acquired data: Feinstaub
get_air_data_component_timeline_plot(data = schadstoffe_data, component_name = "Feinstaub (PM10)")

In [None]:
get_air_data_component_timeline_plot(data = schadstoffe_data, component_name = "Stickstoffdioxid")

### Beispiel: Unvollständige Postleitzahl -> was passiert bei unvollständiger Postleitzahl?

In [None]:
air_data = AirdataDownloader()
patient_incomplete_address = patients.get_patient_by_id("999")
print(f"The patient with id {patient_incomplete_address.id} has the following incomplete postal code: {patient_incomplete_address.address.postal_code}. Still, with the beginning of the postal code a longitude and latitude was calculated: {patient_incomplete_address.address.longitude} and {patient_incomplete_address.address.latitude}")


In [None]:
airdata_index = air_data.get_luftdaten_index_patient(longitude= patient_incomplete_address.address.longitude, latitude= patient_incomplete_address.address.latitude, start_date = "2019-01-01", end_date = "2019-12-31")
get_air_data_index_timeline_plot(airdata_index)

### Beispiel: Keine Antwort von Luftdaten-API

In [None]:
# time gets automatically filled when invalid format, for example invalid end_date '2019-13' is given
airdata_index = air_data.get_luftdaten_index_patient(longitude = interesting_patient.address.longitude, latitude = interesting_patient.address.latitude, start_date = "2019-11-01", end_date = "2019-13")
len(airdata_index) #-> time is adjusted from available start date to current date

### Use Case: über mehrere Patient:innen loopen und einen Datensatz für alle

In [None]:
# INPUT from the User Interface 
START_DATE = input("Bitte geben Sie den Startzeitpunkt der Datenabfrage ein (Format: YYYY-MM-DD). Es sind Daten ab dem 01.01.2002 verfügbar: ")
END_DATE = input("Bitte geben Sie den Endzeitpunkt der Datenabfrage ein (Format: YYYY-MM-DD): ")

In [None]:
# Load Patient Data - test data
with open("data/raw/2024-12-02_mii_testdaten_large.json", 'r') as file:
    data = json.load(file)

In [None]:
# Extract Patients from Data
patients = PatientsDownload()
patients.extract_patients(data = data)

air_data = AirdataDownloader()

# index data
index_data = air_data.get_luftdaten_index_patient_collection(patients = patients.patients, start_date = START_DATE, end_date = END_DATE)

In [None]:
# simple table to show how many patient are in which standort
index_data_deduplicated = index_data.drop_duplicates(subset=['standort', 'patient_id'])
grouped = index_data_deduplicated.groupby('standort')['patient_id'].count().reset_index()
grouped.columns = ['standort', 'num_patients']

print(grouped)

In [None]:
# individual component data
schadstoffe_data = air_data.get_luftdaten_schadstoffe_patient_collection(patients = patients.patients, start_date = START_DATE, end_date = END_DATE)

In [None]:
# simple table to show how many patient are in which standort
schadstoffe_data_deduplicated = schadstoffe_data.drop_duplicates(subset=['standort', 'patient_id'])
grouped = schadstoffe_data.groupby('standort')['patient_id'].count().reset_index()
grouped.columns = ['standort', 'num_patients']

print(grouped)