In [1]:
import pandas as pd
import plotly.express as px
import json
import numpy as np
import os

def load_json(filepath):
    with open(filepath, 'r') as file:
        return json.load(file)

def normalize_country_name(country, standardization):
    for standard_name, variants in standardization['countries'].items():
        if country.lower() in [v.lower() for v in variants]:
            return standard_name
    return country.lower()

def load_and_prepare_data(metadata_path, data_path, indicators_path):
    standardization = load_json(metadata_path)
    indicators = load_json(indicators_path)
    data_dict = {}

    for category, indicator_dict in indicators.items():
        for indicator, details in indicator_dict.items():
            csv_file = f"{data_path}/{category}/{indicator}_world_bank.csv"
            if os.path.exists(csv_file):
                df = pd.read_csv(csv_file, index_col='date', parse_dates=True)
                df.columns = [normalize_country_name(col, standardization) for col in df.columns]
                df = df.interpolate(method='linear', axis=0)  # Linear interpolation
                data_dict[indicator] = df
    
    return data_dict, indicators

def format_value(value):
    """Format value with appropriate scale and unit"""
    if value >= 1_000_000_000_000:
        return f"{round(value / 1_000_000_000_000, 1)}T"
    elif value >= 1_000_000_000:
        return f"{round(value / 1_000_000_000, 1)}B"
    elif value >= 1_000_000:
        return f"{round(value / 1_000_000, 1)}M"
    elif value >= 1_000:
        return f"{round(value / 1_000, 1)}K"
    else:
        return f"{round(value, 1):.0f}"

def plot_indicator_map(data_dict, indicators, indicator, date, log=False, unit_measure=""):
    if indicator not in data_dict:
        print(f"Indicator {indicator} not found in the data dictionary.")
        return
    
    df = data_dict[indicator]
    if date not in df.index:
        print(f"Date {date} not found in the data for indicator {indicator}.")
        return
    
    date_str = pd.to_datetime(date).strftime('%Y-%m-%d')
    data_at_date = df.loc[date].reset_index()
    data_at_date.columns = ['country', 'value']

    # Apply logarithmic transformation if specified
    if log:
        data_at_date['log_value'] = np.log1p(data_at_date['value'])
        value_column = 'log_value'
        tickvals = [np.log1p(10**i) for i in range(int(np.log10(data_at_date['value'].min())), int(np.log10(data_at_date['value'].max()))+1)]
        ticktext = [format_value(10**i) for i in range(int(np.log10(data_at_date['value'].min())), int(np.log10(data_at_date['value'].max()))+1)]
    else:
        value_column = 'value'
        tickvals = data_at_date['value']
        ticktext = data_at_date['value'].apply(format_value)
    
    # Get the title for the indicator
    title = None
    for category, indicator_dict in indicators.items():
        if indicator in indicator_dict:
            title = indicator_dict[indicator]['title']
            break

    if not title:
        title = indicator.capitalize()
    
    # Format values for hover data
    data_at_date['formatted_value'] = data_at_date['value'].apply(format_value)
    
    fig = px.choropleth(
        data_at_date,
        locations='country',
        locationmode='country names',
        color=value_column,
        custom_data=['formatted_value'],  # Include the formatted value for hover data
        color_continuous_scale=px.colors.sequential.Plasma,
        labels={value_column: ''},  # Exclude the label of the color bar
        title=f'{title} ({unit_measure})' if unit_measure else f'{title}'
    )
    
    fig.update_layout(
        geo=dict(
            showframe=False,
            showcoastlines=False,
            projection_type='equirectangular'
        ),
        margin={"r":5,"t":50,"l":5,"b":0},
        height=400,
        width=600,
        coloraxis_colorbar=dict(
            thickness=15,
            len=0.5,
            yanchor="middle",
            y=0.5,
            tickvals=tickvals,
            ticktext=ticktext
        )
    )
    
    # Update hover template to show absolute value and unit measure
    fig.update_traces(
        hovertemplate="<b>%{location}</b><br>Value: %{customdata[0]} " + unit_measure + "<extra></extra>"
    )
    
    fig.show()

if __name__ == "__main__":
    # Load data
    metadata_path = 'standardization.json'
    indicators_path = 'indicators.json'
    data_path = 'data'
    data_dict, indicators = load_and_prepare_data(metadata_path, data_path, indicators_path)

    # Plot indicator map
    plot_indicator_map(data_dict, indicators, 'gdp_current_usd', '2020-01-01', log=True, unit_measure="USD")


In [5]:
import pandas as pd
import plotly.express as px
import json
import os

def load_json(filepath):
    with open(filepath, 'r') as file:
        return json.load(file)

def normalize_country_name(country, standardization):
    for standard_name, data in standardization['countries'].items():
        if country.lower() in [alias.lower() for alias in data['aliases']]:
            return standard_name
    return country.lower()

def load_and_prepare_data(metadata_path, data_path, indicators_path):
    standardization = load_json(metadata_path)
    indicators = load_json(indicators_path)
    data_dict = {}

    for category, indicator_dict in indicators.items():
        for indicator, details in indicator_dict.items():
            csv_file = f"{data_path}/{category}/{indicator}_world_bank.csv"
            if os.path.exists(csv_file):
                df = pd.read_csv(csv_file, index_col='date', parse_dates=True)
                df.columns = [normalize_country_name(col, standardization) for col in df.columns]
                df = df.interpolate(method='linear', axis=0)  # Linear interpolation
                data_dict[indicator] = df
    
    return data_dict, indicators, standardization

def format_value(value):
    """Format value with appropriate scale and unit"""
    if value >= 1_000_000_000_000:
        return f"{round(value / 1_000_000_000_000, 1)}T"
    elif value >= 1_000_000_000:
        return f"{round(value / 1_000_000_000, 1)}B"
    elif value >= 1_000_000:
        return f"{round(value / 1_000_000, 1)}M"
    elif value >= 1_000:
        return f"{round(value / 1_000, 1)}K"
    else:
        return f"{round(value, 1):.0f}"

def plot_time_series(data_dict, indicators, indicator, countries=None, unit_measure="", category_title=None):
    if indicator not in data_dict:
        print(f"Indicator {indicator} not found in the data dictionary.")
        return

    df = data_dict[indicator]

    if countries is not None:
        df = df[countries]

    # Load standardization data to get country categorization
    standardization = load_json('standardization.json')
    
    # Add categorization to the melted dataframe
    df_melted = df.reset_index().melt(id_vars='date', var_name='country', value_name='value')
    df_melted['formatted_value'] = df_melted['value'].apply(format_value)

    # Get the title for the indicator
    title = None
    for category, indicator_dict in indicators.items():
        if indicator in indicator_dict:
            title = indicator_dict[indicator]['title']
            break

    if not title:
        title = indicator.capitalize()

    if category_title and category_title != "none":
        # Create a mapping of countries to their categories
        category_index = 0 if category_title == "demographic" else 1
        country_category = {country: data['categorization'][category_index] 
                            for country, data in standardization['countries'].items()}

        # Add category column to the dataframe
        df_melted['category'] = df_melted['country'].apply(lambda x: country_category.get(x, 'Unknown'))

        fig = px.line(
            df_melted,
            x='date',
            y='value',
            color='country',
            line_dash='category',  # Group by category
            custom_data=['formatted_value'],  # Include the formatted value for hover data
            labels={'value': f'Value ({unit_measure})', 'category': 'Category'},
            title=f'{title} Time Series by {category_title.capitalize()} ({unit_measure})' if unit_measure else f'{title} Time Series by {category_title.capitalize()}'
        )
    else:
        fig = px.line(
            df_melted,
            x='date',
            y='value',
            color='country',
            custom_data=['formatted_value'],  # Include the formatted value for hover data
            labels={'value': f'Value ({unit_measure})'},
            title=f'{title} Time Series ({unit_measure})' if unit_measure else f'{title} Time Series'
        )

    # Update hover template to show absolute value and unit measure
    fig.update_traces(
        hovertemplate="<b>%{fullData.name}</b><br>Date: %{x}<br>Value: %{customdata[0]} " + unit_measure + "<extra></extra>"
    )

    fig.show()

if __name__ == "__main__":
    # Load data
    metadata_path = 'standardization.json'
    indicators_path = 'indicators.json'
    data_path = 'data'
    data_dict, indicators, standardization = load_and_prepare_data(metadata_path, data_path, indicators_path)

    # Plot time series for selected countries and category
    plot_time_series(data_dict, indicators, 'gdp_current_usd', countries=['brazil', 'china', 'united_states'], unit_measure="USD", category_title="demographic")
