# Test to get "/ListSensors" data

In [1]:
from breathe_london_api_list_sensors import main, logger
import os
import logging

logging.getLogger().addHandler(logging.StreamHandler())
from dotenv import load_dotenv

load_dotenv()

df, site_codes, output_file = main(
    api_key=os.getenv("API_KEY"), output_file="my_output.csv", site_code_limit=10
)

if df is None:
    logger.error("Failed to fetch data. Please check your API key and try again later.")
else:
    logger.info(f"Data saved to: {output_file}")
    logger.info(f"First few site codes: {site_codes[:5]}")

2024-09-05 17:05:49,617 - INFO - Starting main process
Starting main process
2024-09-05 17:05:49,618 - INFO - Using provided API key
Using provided API key
2024-09-05 17:05:49,618 - INFO - Initiating API request to fetch sensor data
Initiating API request to fetch sensor data
2024-09-05 17:05:49,619 - INFO - Attempt 1 to fetch data
Attempt 1 to fetch data
2024-09-05 17:05:55,943 - INFO - Data fetched successfully
Data fetched successfully
2024-09-05 17:05:55,967 - INFO - Processing fetched sensor data
Processing fetched sensor data
2024-09-05 17:05:55,986 - INFO - Processed data into DataFrame with 591 rows and 54 columns
Processed data into DataFrame with 591 rows and 54 columns
2024-09-05 17:05:55,986 - INFO - Saving data to my_output.csv
Saving data to my_output.csv
2024-09-05 17:05:56,014 - INFO - Full dataset saved to my_output.csv
Full dataset saved to my_output.csv
2024-09-05 17:05:56,015 - INFO - Extracting site codes from DataFrame
Extracting site codes from DataFrame
2024-09-

# Test to hourly "/getClarityData" data

In [4]:
# Import the necessary functions and variables from your script
from breathe_london_api_clarity_data import main, get_clarity_data, API_KEY

# Test the get_clarity_data function
siteCode = "CLDP0001"
species = "IPM25"
endTime = datetime.now()
startTime = endTime - timedelta(days=7)
endTime_str = endTime.strftime("%a %d %b %Y %H:%M:%S")
startTime_str = startTime.strftime("%a %d %b %Y %H:%M:%S")
averaging = "Hourly"

data = get_clarity_data(siteCode, species, startTime_str, endTime_str, averaging)
if data is not None:
    print(data.head())
else:
    print("No data received")

# Test the main function
siteCodes = ["CLDP0001", "CLDP0002"]
species_list = ["IPM25", "INO2"]
days = 30

df = main(siteCodes, species_list, averaging, days)
if df is not None:
    print(df.head())
    print(f"Total data shape: {df.shape}")
    print(f"Latest data timestamp: {df.mod_datetime.max()}")
    print(f"Number of unique sites: {df.SiteCode.nunique()}")
else:
    print("No data received")

2024-09-03 22:00:56,962 - INFO - Fetching data for CLDP0001 and IPM25
Fetching data for CLDP0001 and IPM25
2024-09-03 22:00:57,107 - INFO - Successfully received data for CLDP0001 and IPM25
Successfully received data for CLDP0001 and IPM25
2024-09-03 22:00:57,122 - INFO - Fetching data for CLDP0001 and IPM25
Fetching data for CLDP0001 and IPM25
2024-09-03 22:00:57,266 - INFO - Successfully received data for CLDP0001 and IPM25
Successfully received data for CLDP0001 and IPM25
2024-09-03 22:00:57,282 - INFO - Fetching data for CLDP0001 and INO2
Fetching data for CLDP0001 and INO2


   SiteCode                  DateTime  DurationNS  ScaledValue
0  CLDP0001  2024-08-27T21:00:00.000Z  3600000000     8.680085
1  CLDP0001  2024-08-27T22:00:00.000Z  3600000000     9.417954
2  CLDP0001  2024-08-27T23:00:00.000Z  3600000000     8.936367
3  CLDP0001  2024-08-28T00:00:00.000Z  3600000000     8.964393
4  CLDP0001  2024-08-28T01:00:00.000Z  3600000000    10.191686


2024-09-03 22:00:57,407 - INFO - Successfully received data for CLDP0001 and INO2
Successfully received data for CLDP0001 and INO2
2024-09-03 22:00:57,410 - INFO - Fetching data for CLDP0002 and IPM25
Fetching data for CLDP0002 and IPM25
2024-09-03 22:00:57,562 - INFO - Successfully received data for CLDP0002 and IPM25
Successfully received data for CLDP0002 and IPM25
2024-09-03 22:00:57,566 - INFO - Fetching data for CLDP0002 and INO2
Fetching data for CLDP0002 and INO2
2024-09-03 22:00:57,717 - INFO - Successfully received data for CLDP0002 and INO2
Successfully received data for CLDP0002 and INO2
2024-09-03 22:00:57,729 - INFO - Total data shape: (2878, 6)
Total data shape: (2878, 6)
2024-09-03 22:00:57,732 - INFO - Latest data timestamp: 2024-09-03 21:00:00+00:00
Latest data timestamp: 2024-09-03 21:00:00+00:00
2024-09-03 22:00:57,734 - INFO - Number of unique sites: 2
Number of unique sites: 2
2024-09-03 22:00:57,777 - INFO - Data saved to clarity_data_2024-09-03-22-00-57.csv
Data

   SiteCode                  DateTime  DurationNS  ScaledValue  \
0  CLDP0001 2024-08-04 21:00:00+00:00  3600000000     8.118612   
1  CLDP0001 2024-08-04 22:00:00+00:00  3600000000     7.790823   
2  CLDP0001 2024-08-04 23:00:00+00:00  3600000000     8.156034   
3  CLDP0001 2024-08-05 00:00:00+00:00  3600000000     5.865398   
4  CLDP0001 2024-08-05 01:00:00+00:00  3600000000     5.976015   

               mod_datetime species  
0 2024-08-04 22:00:00+00:00   IPM25  
1 2024-08-04 23:00:00+00:00   IPM25  
2 2024-08-05 00:00:00+00:00   IPM25  
3 2024-08-05 01:00:00+00:00   IPM25  
4 2024-08-05 02:00:00+00:00   IPM25  
Total data shape: (2878, 6)
Latest data timestamp: 2024-09-03 21:00:00+00:00
Number of unique sites: 2


In [3]:
# Test to hourly "/getClarityData" data
import requests
import os
import pandas as pd
from datetime import datetime, timedelta
import pandas as pd
import time
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.getenv('API_KEY')  # Replace with your actual API key

pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # default='warn'

today = pd.Timestamp.today().strftime('%Y-%m-%d-%H-%M-%S')

API_CLARITY_HOURLY_URL = "https://api.breathelondon.org/api/getClarityData/{siteCode}/{species}/{startTime}/{endTime}/{averaging}?key={apiKey}"

def get_clarity_data(siteCode, species, startTime, endTime, averaging):
    try:
    # Format the API URL with required parameters
        formatted_startTime = startTime.replace(" ", "%20")
        formatted_endTime = endTime.replace(" ", "%20")
        url = API_CLARITY_HOURLY_URL.format(siteCode=siteCode, species=species, startTime=formatted_startTime, endTime=formatted_endTime, averaging=averaging, apiKey=API_KEY)
    
        response = requests.get(url)
    
        # Check if request was successful
        if response.status_code == 200:
            sensors = response.json()
            sensors = pd.DataFrame.from_dict(sensors)
            return sensors
        else:
            print(f"Error with status code: {response.status_code}")
            print(response.text)
            return None
    
    except ValueError:
        print("Received an unexpected response:")
        print(response.text)
        return None

# Add sitecodes needed here
siteCodes = ["CLDP0001", "CLDP0002"]  
species1 = "IPM25"
species2 = "INO2"
averaging = "Hourly"

# Calculate start and end times
endTime = datetime.now()  # Current time
startTime = endTime - timedelta(days=365)  # One month before current time

# Convert datetime objects to strings
endTime_str = endTime.strftime("%a %d %b %Y %H:%M:%S")
startTime_str = startTime.strftime("%a %d %b %Y %H:%M:%S")

df_list = []  # List to store data for each site code

for siteCode in siteCodes:
    # Get data for first species
    data1 = get_clarity_data(siteCode, species1, startTime_str, endTime_str, averaging)
    if data1 is not None and isinstance(data1, pd.DataFrame):
        print(f"Received data for {siteCode} and {species1}")
        data1['DateTime'] = pd.to_datetime(data1['DateTime'])
        data1['mod_datetime'] = data1['DateTime'] + pd.Timedelta(hours=1)
        data1['species'] = species1
        df_list.append(data1)  # Add the DataFrame to the list
    else:
        print(f"No data received for {siteCode} and {species1}")

    # Get data for second species
    data2 = get_clarity_data(siteCode, species2, startTime_str, endTime_str, averaging)
    if data2 is not None and isinstance(data2, pd.DataFrame):
        print(f"Received data for {siteCode} and {species2}")
        data2['DateTime'] = pd.to_datetime(data2['DateTime'])
        data2['mod_datetime'] = data2['DateTime'] + pd.Timedelta(hours=1)
        data2['species'] = species2
        df_list.append(data2)  # Add the DataFrame to the list
    else:
        print(f"No data received for {siteCode} and {species2}")

# Concatenate all DataFrames in the list
if df_list:
    df_all = pd.concat(df_list)
    # print(df.head())
else:
    print("No data received for all site codes and species.")

print(df_all.mod_datetime.max()), print(df_all.SiteCode.nunique(),print(df_all.shape))
df_all.head()


Received data for CLDP0001 and IPM25
Received data for CLDP0001 and INO2
Received data for CLDP0002 and IPM25
Received data for CLDP0002 and INO2
2024-09-03 21:00:00+00:00
(34144, 6)
2 None


Unnamed: 0,SiteCode,DateTime,DurationNS,ScaledValue,mod_datetime,species
0,CLDP0001,2023-09-04 20:00:00+00:00,3600000000,9.13294,2023-09-04 21:00:00+00:00,IPM25
1,CLDP0001,2023-09-04 21:00:00+00:00,3600000000,12.687843,2023-09-04 22:00:00+00:00,IPM25
2,CLDP0001,2023-09-04 22:00:00+00:00,3600000000,19.364695,2023-09-04 23:00:00+00:00,IPM25
3,CLDP0001,2023-09-04 23:00:00+00:00,3600000000,20.716615,2023-09-05 00:00:00+00:00,IPM25
4,CLDP0001,2023-09-05 00:00:00+00:00,3600000000,19.515351,2023-09-05 01:00:00+00:00,IPM25
