In [None]:
%pip install selenium webdriver_manager streamlit pandas requests plotly

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

In [2]:
# Example 1: Setting up the environment
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Setup the webdriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Basic usage
driver.get("https://ec.europa.eu/eurostat/en/web/main/data/database")
print(driver.title)
driver.quit()

Database - Eurostat


In [2]:
import streamlit as st
import pandas as pd
import requests
import plotly.express as px

# Eurostat API base URL
BASE_URL = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/"

def fetch_eurostat_data(dataset_code, params):
    url = f"{BASE_URL}{dataset_code}"
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        st.error(f"Error fetching data: {response.status_code}")
        return None

def process_data(data):
    if not data or 'value' not in data:
        return pd.DataFrame()
    
    df = pd.DataFrame(data['value'].items(), columns=['id', 'value'])
    
    # Split the id into separate columns
    id_cols = data['dimension']['id']
    df[id_cols] = df['id'].str.split(',', expand=True)
    
    # Replace codes with labels
    for col in id_cols:
        df[col] = df[col].map(data['dimension'][col]['category']['label'])
    
    return df

st.title("Eurostat Data Explorer")

# Sidebar for user input
st.sidebar.header("Data Selection")
dataset_code = st.sidebar.text_input("Enter Eurostat dataset code", "nama_10_gdp")

# Fetch and display the data
if st.sidebar.button("Fetch Data"):
    with st.spinner("Fetching data..."):
        data = fetch_eurostat_data(dataset_code, params={'format': 'JSON'})
        if data:
            df = process_data(data)
            st.write(df)

            # Simple visualization
            if not df.empty:
                st.subheader("Data Visualization")
                x_axis = st.selectbox("Select X-axis", df.columns)
                y_axis = st.selectbox("Select Y-axis", df.columns)
                
                fig = px.scatter(df, x=x_axis, y=y_axis, hover_data=df.columns)
                st.plotly_chart(fig)

# Add information about the application
st.sidebar.markdown("---")
st.sidebar.info("This application uses the Eurostat API to fetch and visualize statistical data.")
st.sidebar.info("Enter a dataset code in the input box above and click 'Fetch Data' to start.")

# For debugging: Print the first 500 characters of the raw API response
if st.sidebar.button("Debug: Show Raw API Response"):
    debug_data = fetch_eurostat_data(dataset_code, params={'format': 'JSON'})
    if debug_data:
        st.code(str(debug_data)[:500], language='json')

In [None]:
import pandas as pd
import numpy as np
import requests
import plotly.express as px
from ipywidgets import widgets
from IPython.display import display, clear_output

BASE_URL = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/"

def fetch_eurostat_data(dataset_code, params):
    url = f"{BASE_URL}{dataset_code}"
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data: {response.status_code}")
        return None

def process_data(data):
    if not data or 'value' not in data:
        print("No data or 'value' key not found in the response.")
        return pd.DataFrame()
    
    if 'dimension' not in data:
        print("'dimension' key not found in the response.")
        return pd.DataFrame()
    
    dimensions = list(data['dimension'].keys())
    print(f"Dimensions found: {dimensions}")
    
    rows = []
    for id_str, value in data['value'].items():
        id_parts = id_str.split(',')
        row = dict(zip(dimensions, id_parts))
        row['value'] = value
        rows.append(row)
    
    df = pd.DataFrame(rows)
    
    for dim in dimensions:
        if dim in df.columns:  # Check if the dimension exists in the DataFrame
            if 'category' in data['dimension'][dim] and 'label' in data['dimension'][dim]['category']:
                df[dim] = df[dim].map(data['dimension'][dim]['category']['label'])
    
    return df

def on_fetch_click(b):
    clear_output()
    display(dataset_input)
    display(fetch_button)
    
    print("Fetching data...")
    data = fetch_eurostat_data(dataset_input.value, params={'format': 'JSON'})
    if data:
        df = process_data(data)
        if not df.empty:
            print(f"Data shape: {df.shape}")
            print(f"Columns: {df.columns.tolist()}")
            display(df.head())
            
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            x_axis = widgets.Dropdown(options=df.columns, description='X-axis:')
            y_axis = widgets.Dropdown(options=numeric_cols, description='Y-axis:')
            
            def update_plot(x, y):
                fig = px.scatter(df, x=x, y=y, hover_data=df.columns)
                fig.show()
            
            widgets.interact(update_plot, x=x_axis, y=y_axis)
        else:
            print("No data to display.")
    else:
        print("Failed to fetch data.")

dataset_input = widgets.Text(value='nama_10_gdp', description='Dataset Code:')
fetch_button = widgets.Button(description='Fetch Data')
fetch_button.on_click(on_fetch_click)

display(dataset_input)
display(fetch_button)

def show_raw_data(dataset_code):
    data = fetch_eurostat_data(dataset_code, params={'format': 'JSON'})
    if data:
        print("Dimensions:")
        print(data.get('dimension', {}).keys())
        print("\nFirst few values:")
        print(list(data.get('value', {}).items())[:5])
        print("\nExample of dimension structure:")
        for dim in data.get('dimension', {}):
            print(f"\n{dim}:")
            print(data['dimension'][dim])
            break
    else:
        print("Failed to fetch data.")

# Uncomment the following line to debug
show_raw_data('nama_10_gdp')