## UPDATE: The second time I scraped also flood extent data from the Copernicus API since we couldn't use the summary tables we scraped initially due to errors that were later confirmed by Copernicus. It worked but later decided to **manually extract flood extents** from the Copernicus Viewer pages. This allowed me to **select the highest recorded extent** across all available products (observations), regardless of which product was used for the rest of the statistical data. This ensures the true extent of the each flood.

In [6]:
import pandas as pd
import requests
import time
import numpy as np
from tqdm import tqdm
import json 
from datetime import datetime

events = ['EMSR775', 'EMSR773', 'EMSR771', 'EMSR770', 'EMSR768', 'EMSR766', 'EMSR764', 'EMSR763', 'EMSR762', 'EMSR761', 'EMSR759',
          'EMSR758', 'EMSR757', 'EMSR756', 'EMSR755', 'EMSR728', 'EMSR725', 'EMSR722', 'EMSR721', 'EMSR718', 'EMSR713', 'EMSR712',
          'EMSR711', 'EMSR710', 'EMSR708', 'EMSR706', 'EMSR705', 'EMSR701', 'EMSR697', 'EMSR694', 'EMSR693', 'EMSR692', 'EMSR684',
          'EMSR683', 'EMSR680', 'EMSR668', 'EMSR665', 'EMSR664', 'EMSR659', 'EMSR657']

products_data = []

for event in tqdm(events):
    url = f'https://rapidmapping.emergency.copernicus.eu/backend/dashboard-api/public-activations/?format=json&code={event}'
    response = requests.get(url)
    time.sleep(5)  # IMPORTANT
    data = response.json()
    
    aois = data['results'][0]['aois'] 
    if aois:
        for aoi in aois:
            aois_data_generic = {
                "name": aoi.get("name", np.nan),
                "number": aoi.get("number", np.nan),
                "activationCode": aoi.get("activationCode", np.nan)
            }
        
            products = aoi.get('products', [])
            if products:
                for product in products:
                    # Add product-specific information
                    aois_data_products = {
                        "activation_code": product.get("activationCode", np.nan),
                        "aoiName": product.get("aoiName", np.nan),
                        "aoiNumber": product.get("aoiNumber", np.nan),
                        "type": product.get("type", np.nan),
                        "monitoring": product.get("monitoringNumber", np.nan),
                    }
    
                    version = product.get('version') if 'version' in product else None
                    version_data = {
                        "deliveryTime": version.get("deliveryTime", np.nan) if version else np.nan
                    }
        
                    # Check if 'stats' exists, if not, prepare for a row with empty or NaN stats data
                    stats = product.get('stats')
                    if stats:
                        # If 'stats' is available, loop through it
                        for category, subcategories in stats.items():
                            if isinstance(subcategories, dict):  # If subcategories is a dictionary
                                for subcategory, values in subcategories.items():
                                    subcategory_unit = f"{category}_{subcategory}_({values.get('unit', np.nan)})"
                                    affected = values.get('affected', np.nan)
                                    total = values.get('total', np.nan)
                                    
                                    product_stat_all = {
                                        "name": aois_data_generic["name"],
                                        "number": aois_data_generic["number"],
                                        "activationCode": aois_data_generic["activationCode"],
                                        "activation_code": aois_data_products["activation_code"],
                                        "aoiName": aois_data_products["aoiName"],
                                        "aoiNumber": aois_data_products["aoiNumber"],
                                        "type": aois_data_products["type"],
                                        "monitoring": aois_data_products["monitoring"],
                                        "deliveryTime": version_data["deliveryTime"],                                    
                                        "subcategory_unit": subcategory_unit,
                                        "affected": affected,
                                        "total": total
                                    }
                                    products_data.append(product_stat_all)
                            else:
                                print(f"  Subcategory info is not a dict, found: {type(subcategories)}")
                    else:
                        # If 'stats' is not available, still add the row with NaN for affected and subcategory_unit
                        product_stat_all = {
                            "name": aois_data_generic["name"],
                            "number": aois_data_generic["number"],
                            "activationCode": aois_data_generic["activationCode"],
                            "activation_code": aois_data_products["activation_code"],
                            "aoiName": aois_data_products["aoiName"],
                            "aoiNumber": aois_data_products["aoiNumber"],
                            "type": aois_data_products["type"],
                            "monitoring": aois_data_products["monitoring"],
                            "deliveryTime": version_data["deliveryTime"],                                    
                            "subcategory_unit": np.nan,  # Set this to NaN because no stats info
                            "affected": np.nan,  # Set affected to NaN as there are no stats to extract
                            "total": np.nan
                        }
                        products_data.append(product_stat_all)

# Convert the data to a DataFrame for further use
aois_df_2 = pd.DataFrame(products_data)
aois_df_2

100%|██████████████████| 40/40 [04:24<00:00,  6.62s/it]


Unnamed: 0,name,number,activationCode,activation_code,aoiName,aoiNumber,type,monitoring,deliveryTime,subcategory_unit,affected,total
0,Olden,1,EMSR775,EMSR775,Olden,1,DEL,0,2024-11-01T14:12:00,,,
1,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,0,2024-11-01T18:30:00,,,
2,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,1,,,,
3,Aurlandsvangen,3,EMSR775,EMSR775,Aurlandsvangen,3,DEL,0,2024-11-01T17:25:00,,,
4,Over Ardal,4,EMSR775,EMSR775,Over Ardal,4,DEL,0,2024-11-01T20:43:00,,,
5,Voss,5,EMSR775,EMSR775,Voss,5,DEL,0,2024-11-02T02:13:00,,,
6,Sauda,6,EMSR775,EMSR775,Sauda,6,DEL,0,2024-11-01T12:40:00,,,
7,Ardalstangen,7,EMSR775,EMSR775,Ardalstangen,7,DEL,0,,,,
8,Gaupne,8,EMSR775,EMSR775,Gaupne,8,DEL,0,2024-11-07T16:00:00,,,
9,Fardal,9,EMSR775,EMSR775,Fardal,9,DEL,0,2024-11-07T16:00:00,,,


In [3]:
pd.set_option("display.max.rows", None)
aois_df_2

Unnamed: 0,name,number,activationCode,activation_code,aoiName,aoiNumber,type,monitoring,deliveryTime,subcategory_unit,affected,total
0,Olden,1,EMSR775,EMSR775,Olden,1,DEL,0,2024-11-01T14:12:00,,,
1,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,0,2024-11-01T18:30:00,,,
2,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,1,,,,
3,Aurlandsvangen,3,EMSR775,EMSR775,Aurlandsvangen,3,DEL,0,2024-11-01T17:25:00,,,
4,Over Ardal,4,EMSR775,EMSR775,Over Ardal,4,DEL,0,2024-11-01T20:43:00,,,
5,Voss,5,EMSR775,EMSR775,Voss,5,DEL,0,2024-11-02T02:13:00,,,
6,Sauda,6,EMSR775,EMSR775,Sauda,6,DEL,0,2024-11-01T12:40:00,,,
7,Ardalstangen,7,EMSR775,EMSR775,Ardalstangen,7,DEL,0,,,,
8,Gaupne,8,EMSR775,EMSR775,Gaupne,8,DEL,0,2024-11-07T16:00:00,,,
9,Fardal,9,EMSR775,EMSR775,Fardal,9,DEL,0,2024-11-07T16:00:00,,,


In [7]:
aois_df_2.to_excel("new_aois_df_2.xlsx", index=False)

# ANOTHER SCRAPING AFTER COPERNICUS CORRECTED THEIR DATA UPON OUR COMMONUCATION

#### 15 February 2025

In [1]:
import pandas as pd
import requests
import time
import numpy as np
from tqdm import tqdm
import json 
from datetime import datetime

events = ['EMSR775', 'EMSR773', 'EMSR771', 'EMSR770', 'EMSR768', 'EMSR766', 'EMSR764', 'EMSR763', 'EMSR762', 'EMSR761', 'EMSR759',
          'EMSR758', 'EMSR757', 'EMSR756', 'EMSR755', 'EMSR728', 'EMSR725', 'EMSR722', 'EMSR721', 'EMSR718', 'EMSR713', 'EMSR712',
          'EMSR711', 'EMSR710', 'EMSR708', 'EMSR706', 'EMSR705', 'EMSR701', 'EMSR697', 'EMSR694', 'EMSR693', 'EMSR692', 'EMSR684',
          'EMSR683', 'EMSR680', 'EMSR668', 'EMSR665', 'EMSR664', 'EMSR659', 'EMSR657']

products_data = []

for event in tqdm(events):
    url = f'https://rapidmapping.emergency.copernicus.eu/backend/dashboard-api/public-activations/?format=json&code={event}'
    response = requests.get(url)
    time.sleep(5)  # IMPORTANT
    data = response.json()
    
    aois = data['results'][0]['aois'] 
    if aois:
        for aoi in aois:
            aois_data_generic = {
                "name": aoi.get("name", np.nan),
                "number": aoi.get("number", np.nan),
                "activationCode": aoi.get("activationCode", np.nan)
            }
        
            products = aoi.get('products', [])
            if products:
                for product in products:
                    # Add product-specific information
                    aois_data_products = {
                        "activation_code": product.get("activationCode", np.nan),
                        "aoiName": product.get("aoiName", np.nan),
                        "aoiNumber": product.get("aoiNumber", np.nan),
                        "type": product.get("type", np.nan),
                        "monitoring": product.get("monitoringNumber", np.nan),
                    }
    
                    version = product.get('version') if 'version' in product else None
                    version_data = {
                        "deliveryTime": version.get("deliveryTime", np.nan) if version else np.nan
                    }
        
                    # Check if 'stats' exists, if not, prepare for a row with empty or NaN stats data
                    stats = product.get('stats')
                    if stats:
                        # If 'stats' is available, loop through it
                        for category, subcategories in stats.items():
                            if isinstance(subcategories, dict):  # If subcategories is a dictionary
                                for subcategory, values in subcategories.items():
                                    subcategory_unit = f"{category}_{subcategory}_({values.get('unit', np.nan)})"
                                    affected = values.get('affected', np.nan)
                                    total = values.get('total', np.nan)
                                    
                                    product_stat_all = {
                                        "name": aois_data_generic["name"],
                                        "number": aois_data_generic["number"],
                                        "activationCode": aois_data_generic["activationCode"],
                                        "activation_code": aois_data_products["activation_code"],
                                        "aoiName": aois_data_products["aoiName"],
                                        "aoiNumber": aois_data_products["aoiNumber"],
                                        "type": aois_data_products["type"],
                                        "monitoring": aois_data_products["monitoring"],
                                        "deliveryTime": version_data["deliveryTime"],                                    
                                        "subcategory_unit": subcategory_unit,
                                        "affected": affected,
                                        "total": total
                                    }
                                    products_data.append(product_stat_all)
                            else:
                                print(f"  Subcategory info is not a dict, found: {type(subcategories)}")
                    else:
                        # If 'stats' is not available, still add the row with NaN for affected and subcategory_unit
                        product_stat_all = {
                            "name": aois_data_generic["name"],
                            "number": aois_data_generic["number"],
                            "activationCode": aois_data_generic["activationCode"],
                            "activation_code": aois_data_products["activation_code"],
                            "aoiName": aois_data_products["aoiName"],
                            "aoiNumber": aois_data_products["aoiNumber"],
                            "type": aois_data_products["type"],
                            "monitoring": aois_data_products["monitoring"],
                            "deliveryTime": version_data["deliveryTime"],                                    
                            "subcategory_unit": np.nan,  # Set this to NaN because no stats info
                            "affected": np.nan,  # Set affected to NaN as there are no stats to extract
                            "total": np.nan
                        }
                        products_data.append(product_stat_all)

# Convert the data to a DataFrame for further use
aois_df_3 = pd.DataFrame(products_data)
aois_df_3

100%|██████████████████| 40/40 [04:21<00:00,  6.54s/it]


Unnamed: 0,name,number,activationCode,activation_code,aoiName,aoiNumber,type,monitoring,deliveryTime,subcategory_unit,affected,total
0,Olden,1,EMSR775,EMSR775,Olden,1,DEL,0,2024-11-01T14:12:00,,,
1,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,0,2024-11-01T18:30:00,,,
2,Oyane,2,EMSR775,EMSR775,Oyane,2,DEL,1,,,,
3,Aurlandsvangen,3,EMSR775,EMSR775,Aurlandsvangen,3,DEL,0,2024-11-01T17:25:00,,,
4,Over Ardal,4,EMSR775,EMSR775,Over Ardal,4,DEL,0,2024-11-01T20:43:00,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
7666,DaugavaRiver,1,EMSR657,EMSR657,DaugavaRiver,1,DEL,5,2023-04-06T18:59:00,Transportation_Secondary Road_(km),8.0,207.7
7667,DaugavaRiver,1,EMSR657,EMSR657,DaugavaRiver,1,DEL,5,2023-04-06T18:59:00,Transportation_Airfield runways_(km),0.9,12.4
7668,DaugavaRiver,1,EMSR657,EMSR657,DaugavaRiver,1,DEL,5,2023-04-06T18:59:00,Transportation_Long-distance railways_(km),0.5,316.6
7669,DaugavaRiver,1,EMSR657,EMSR657,DaugavaRiver,1,DEL,5,2023-04-06T18:59:00,Estimated population_None_(),300.0,83000.0


In [2]:
aois_df_3.to_excel("final_scraped_data.xlsx", index=False)