In [6]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import os
from tqdm import tqdm


def fetch_ooni_data_by_day(probe_cc, category_code, year, url="https://api.ooni.io/api/v1/measurements"):
    
    start_date = datetime(year, 1, 1)
    end_date = datetime(year, 12, 31)
    
    
    data = []
    day = start_date
    
    
    while day <= end_date:
        
        params = {
            "category_code": category_code,
            "since": day.strftime("%Y-%m-%d") + "T00:00:00",
            "until": day.strftime("%Y-%m-%d") + "T23:59:59",
            "probe_cc": probe_cc,
            "test_name": "web_connectivity",
            "limit": 10000,
        }
        
        
        response = requests.get(url, params=params)
        response.raise_for_status()  
        
        
        results = response.json().get('results', [])
        
        
        data.extend(results)
        
        
        print(f"## Getting data for {day.strftime('%Y-%m-%d')}")
        print(f"Fetched {len(results)} records. Total: {len(data)}")
        print(datetime.now().strftime("%H:%M:%S"))
        
       
        day += timedelta(days=1)
    
    
    df = pd.DataFrame(data)
    
    
    return df

In [7]:
EU_countries = ["DE","FR",'AT','BE','BG','HR','CY','DK', 'EE','FI','GR','HU','IE','IT','LT','LU','MT','NL','PL','PT','RO','SK','SI','ES','SE']
years = [2024]


def extract_blocking_type(row):
    if 'scores' in row and row['scores'] is not None:
        if 'analysis' in row['scores'] and 'blocking_type' in row['scores']['analysis']:
            return row['scores']['analysis']['blocking_type']
    return None  
        



for country in tqdm(EU_countries, desc="Fetching data per country"):
    print(f"### Fetching data for {country}")
    for year in years:
        
        directory = 'OONI_Project'
        file_name = f'ooni_data_{country}_GMB_{str(year)}.csv'
        file_path = os.path.join(directory, file_name)
        
        if os.path.exists(file_path):
            print(f"Country: {country} Year: {year} Status: Fetched")
            continue
        else: 
            data = fetch_ooni_data_by_day(country,'GMB',year, url="https://api.ooni.io/api/v1/measurements")
            data['blocking_type'] = data.apply(extract_blocking_type, axis=1)
            if not data.empty:
            
                data['measurement_start_time'] = pd.to_datetime(data['measurement_start_time'])
            
                data['year'] = data['measurement_start_time'].dt.year
                data['month'] = data['measurement_start_time'].dt.month
                data['day'] = data['measurement_start_time'].dt.day
            data.drop(['measurement_start_time','scores'],inplace=True,axis=1)
            data['valid'] = ~(data['anomaly'] | data['confirmed'] | data['failure'])
        
            
            if not os.path.exists(directory):
                os.makedirs(directory)
                print(f"Directory '{directory}' created.")
            else:
                print(f"Directory '{directory}' already exists.")
            
            data.to_csv(file_path, index=False)
            print(f"DataFrame saved to '{file_path}'")

Fetching data per country: 100%|██████████████| 25/25 [00:00<00:00, 8603.35it/s]

### Fetching data for DE
Country: DE Year: 2024 Status: Fetched
### Fetching data for FR
Country: FR Year: 2024 Status: Fetched
### Fetching data for AT
Country: AT Year: 2024 Status: Fetched
### Fetching data for BE
Country: BE Year: 2024 Status: Fetched
### Fetching data for BG
Country: BG Year: 2024 Status: Fetched
### Fetching data for HR
Country: HR Year: 2024 Status: Fetched
### Fetching data for CY
Country: CY Year: 2024 Status: Fetched
### Fetching data for DK
Country: DK Year: 2024 Status: Fetched
### Fetching data for EE
Country: EE Year: 2024 Status: Fetched
### Fetching data for FI
Country: FI Year: 2024 Status: Fetched
### Fetching data for GR
Country: GR Year: 2024 Status: Fetched
### Fetching data for HU
Country: HU Year: 2024 Status: Fetched
### Fetching data for IE
Country: IE Year: 2024 Status: Fetched
### Fetching data for IT
Country: IT Year: 2024 Status: Fetched
### Fetching data for LT
Country: LT Year: 2024 Status: Fetched
### Fetching data for LU
Country: LU Yea


