# Read the data

In [1]:
import os
import pandas as pd

from fastapi import FastAPI
from typing import List
from models import CountryData, Pillar, Area, Indicator, Metric
import re

  from pandas.core import (


In [2]:
filepath = "../data/TPI ASCOR data - 13012025/ASCOR_assessments_results.xlsx" # Specify the correct path to the file
df_assessments = pd.read_excel(filepath)

# Convert the date columns to datetime type so we can filter by year later
df_assessments['Assessment date'] = pd.to_datetime(df_assessments['Assessment date'])
df_assessments['Publication date'] = pd.to_datetime(df_assessments['Publication date'])

  df_assessments['Assessment date'] = pd.to_datetime(df_assessments['Assessment date'])


# Define the function

In [3]:
country = 'Italy'
assessment_year = 2024

data = df_assessments[(df_assessments["Country"] == country) & (df_assessments["Assessment date"].dt.year == assessment_year)]

#remember which columns are area, indicator, metric
area_cols = [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("area")]
indicator_cols = [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("indicator")]
metric_cols =  [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("metric")] 

#remove unecessary columns
data = data[[col for col in data.columns if col.startswith(("area", "indicator", "metric"))]]

#rename columns so they align with output
remap_column_names = {col: re.sub(".*?\s", "", col) for col in data.columns}
data = data.rename(columns=remap_column_names)


#get flat Pandas series of country data
data = data.iloc[0]
data = data.fillna("")

data

EP.1                         Partial
EP.1.a                           Yes
EP.1.b                            No
EP.1.c                            No
EP.2                         Partial
                      ...           
CF.4                                
CF.4.i       2.24 MW/US$ billion GDP
CF.4.ii     20.56 MW/US$ billion GDP
CF.4.iii     0.00 MW/US$ billion GDP
CF.4.iv      0.12 MW/US$ billion GDP
Name: 55, Length: 74, dtype: object

In [4]:
#get metric
metrics = [{'name': metric, 'value': data[f'{metric}']} for metric in metric_cols]

#get indicator
indicators = [{'name': indicator, 'assessment': data[f"{indicator}"],
                'metrics': next((met for met in metrics if met["name"].startswith(indicator)), "")} for indicator in indicator_cols]   

In [5]:
def get_country_data(country: str, assessment_year: int):
    data = df_assessments[(df_assessments["Country"] == country) & (df_assessments["Assessment date"].dt.year == assessment_year)]

    #remember which columns are area, indicator, metric
    area_cols = [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("area")]
    indicator_cols = [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("indicator")]
    metric_cols =  [re.sub(".*?\s", "", col) for col in data.columns if col.startswith("metric")] 

    #remove unecessary columns
    data = data[[col for col in data.columns if col.startswith(("area", "indicator", "metric"))]]

    #rename columns so they align with output
    remap_column_names = {col: re.sub(".*?\s", "", col) for col in data.columns}
    data = data.rename(columns=remap_column_names)

    #get flat Pandas series of country data
    data = data.iloc[0]
    data = data.fillna("")

    #get metric
    metrics = [{'name': metric, 'value': data[f'{metric}']} for metric in metric_cols]

    #get indicator
    indicators = [{'name': indicator, 'assessment': data[f"{indicator}"],
                   'metrics': next((met for met in metrics if met["name"].startswith(indicator)), "")} for indicator in indicator_cols]   

    #get area
    areas = [{'name': area, 'assessment': data[f"{area}"],
              'indicators': [ind for ind in indicators if ind["name"].startswith(area)]} for area in area_cols]

    #get pillar
    pillars = [{'name': pillar, 'areas': areas} for pillar in ["EP","CP","CF"]]

    output_dict = {'pillars': [pillar for pillar in pillars]}

    output = CountryData(**output_dict)

    return output

In [6]:
unique_values = [df_assessments['Country'].unique().tolist()]

In [7]:
countries = list((set(sum(unique_values, []))))

# Test how the function behaves

In [8]:
get_country_data("Italy", 2024)

CountryData(pillars=[Pillar(name='EP', areas=[Area(name='EP.1', assessment='Partial', indicators=[Indicator(name='EP.1.a', assessment='Yes', metrics=''), Indicator(name='EP.1.b', assessment='No', metrics=''), Indicator(name='EP.1.c', assessment='No', metrics='')]), Area(name='EP.2', assessment='Partial', indicators=[Indicator(name='EP.2.a', assessment='Yes', metrics=Metric(name='EP.2.a.i', value='-30%')), Indicator(name='EP.2.b', assessment='Yes', metrics=Metric(name='EP.2.b.i', value='0%')), Indicator(name='EP.2.c', assessment='No', metrics=Metric(name='EP.2.c.i', value='86%')), Indicator(name='EP.2.d', assessment='No', metrics=Metric(name='EP.2.d.i', value='317%'))]), Area(name='EP.3', assessment='Partial', indicators=[Indicator(name='EP.3.a', assessment='Yes', metrics=Metric(name='EP.3.a.i', value='2050')), Indicator(name='EP.3.b', assessment='Yes', metrics=''), Indicator(name='EP.3.c', assessment='No', metrics='')]), Area(name='CP.1', assessment='No', indicators=[Indicator(name='CP

# Testing out Pydantic Models

In [9]:
from pydantic import BaseModel, Field
from typing import Literal