**Objective:** Document my thought process as I develop the function for '/v1/country-data/' endpoint

In [10]:
import pandas as pd
import os


# Read the data

In [13]:
df_assessments = pd.read_excel(r"C:\Users\kbhatia2\Desktop\DS205\ascor-api\data\TPI ASCOR data - 13012025\ASCOR_assessments_results.xlsx")
df_assessments['Assessment date'] = pd.to_datetime(df_assessments['Assessment date'])
df_assessments['Publication date'] = pd.to_datetime(df_assessments['Publication date'])


  df_assessments['Assessment date'] = pd.to_datetime(df_assessments['Assessment date'])


# The function I am designing

In [25]:
def get_country_data(country: str, assessment_year: int):

    selected_row = (
        (df_assessments["Country"] == country) &
        (df_assessments['Assessment date'].dt.year == assessment_year)
    )

    # Filter the data
    data = df_assessments[selected_row]

    if data.empty:
        raise ValueError("There are no data points for the combination of: {country}, {assessment_year}")

    # Selected and filter columns
    area_columns = [col for col in df_assessments.columns if col.startswith("area")]
    data = data[area_columns]
    
    # JSON does not allow for NaN or NULL. 
    # The equivalent is just to leave an empty string instead
    data = data.fillna('')

    #Rename columns
    data['country'] = country
    data['assessment_year'] = assessment_year

    remap_area_column_names = {
        col: col.replace('area ', '')
        for col in area_columns
    }

    data = data.rename(columns=remap_area_column_names)

    # Grab just the first element (there should only be one anyway)
    # and return it as a dictionary
    output_dict = data.iloc[0].to_dict()

    return output_dict

Test how the function behaves

In [27]:
get_country_data('Italy', 2024)

{'EP.1': 'Partial',
 'EP.2': 'Partial',
 'EP.3': 'Partial',
 'CP.1': 'No',
 'CP.2': 'Partial',
 'CP.3': 'Partial',
 'CP.4': 'Partial',
 'CP.5': 'Partial',
 'CP.6': 'Partial',
 'CF.1': 'No',
 'CF.2': 'Exempt',
 'CF.3': 'Partial',
 'CF.4': '',
 'country': 'Italy',
 'assessment_year': 2024}

In [28]:
# Testing out pydantic models

In [29]:
from pydantic import BaseModel
class CountryData(BaseModel):
    country: str
    assessment_year: int

In the end we want our APi to rpduce an **INstance** of the countrydata object like this:

In [31]:
output =CountryData(country="United Kingdom", assessment_year=2024)

output

CountryData(country='United Kingdom', assessment_year=2024)

In reality all I have (usually) is a dict or list that looks like this

In [35]:
output_dict = {"country": "United Kingdom", "assessment_year": 2024}
output_dict

{'country': 'United Kingdom', 'assessment_year': 2024}

The ** operator allows to pass a dictionary to a class or a function so that each key becomes a parameter/argument

In [37]:
CountryData(**output_dict)

CountryData(country='United Kingdom', assessment_year=2024)

# start working on the deeply hierarchical structure

eventually I want to serve this
{
    "pillars": [
    {
        "name": "EP",
        "areas": [
            {
                "name": "EP.1",
                "assessment": "Partial",
                "indicators": [
                    {
                        "name": "EP.1.a",
                        "assessment": "Yes",
                        "metrics": ""
                    },
                    ...
                    {
                        "name": "EP.2.1",
                        "assessment": "Yes",
                        "metrics": {
                            "name": "EP.2.a.1",
                            "value": "-25%"
                        }
                    }
                ]
            }
        ]
    },
    {
        "name": "CP",
        "areas": [
            {
                "name": "CP.1",
                "assessment": "Partial",
                "indicators": [
                    ...
                ]
            },
            {
                ...
            }
        ]
    },
    {
        "name": "CF",
        "areas": [
            {
                "name": "CF.1",
                "assessment": "Partial",
                "indicators": [
                    ...
                ]
            },
            {
                ...
            }
        ]
    }
    ]
}

for now I will focus on the metric part`

In [38]:
# This is the data model for metrics
class Metric(BaseModel):
    name: str
    value: str

Before I actrually write the code to the api lrt me see how id have to filter the data

In [43]:
#Filter for just the metrics columns
selected_columns = [col for col in df_assessments.columns if col.startswith('metric')]
selected_columns

['metric EP.2.a.i',
 'metric EP.2.b.i',
 'metric EP.2.c.i',
 'metric EP.2.d.i',
 'metric EP.3.a.i',
 'metric CP.2.b.i',
 'metric CP.2.c.i',
 'metric CP.3.a.i',
 'metric CP.3.b.i',
 'metric CP.3.c.i',
 'metric CP.3.d.i',
 'metric CP.3.d.ii',
 'metric CP.4.b.i',
 'metric CP.4.d.i',
 'metric CP.4.e.i',
 'metric CP.6.a.i',
 'metric CF.1.a.i',
 'metric CF.1.b.i',
 'metric CF.4.i',
 'metric CF.4.ii',
 'metric CF.4.iii',
 'metric CF.4.iv']

filter dp to include only those columns

In [47]:
#Get a random data point
data = df_assessments.iloc[0]
data[selected_columns]

metric EP.2.a.i                            '-25%
metric EP.2.b.i      No or unsuitable disclosure
metric EP.2.c.i                              62%
metric EP.2.d.i                             822%
metric EP.3.a.i                             2050
metric CP.2.b.i                              28%
metric CP.2.c.i                  US$ 10.64/tCO₂e
metric CP.3.a.i      No or unsuitable disclosure
metric CP.3.b.i                            0.34%
metric CP.3.c.i                            0.79%
metric CP.3.d.i                            0.26%
metric CP.3.d.ii                           1.72%
metric CP.4.b.i     4.28 MJ/US$ PPP-adjusted GDP
metric CP.4.d.i                              27%
metric CP.4.e.i                              22%
metric CP.6.a.i                             0.94
metric CF.1.a.i                            0.02%
metric CF.1.b.i                            0.02%
metric CF.4.i            43.5 MW/US$ billion GDP
metric CF.4.ii          88.42 MW/US$ billion GDP
metric CF.4.iii     

How would the metric object be constructed

In [49]:
Metric(name = 'metric EP.2.a.i', value = '-25%')

Metric(name='metric EP.2.a.i', value='-25%')

In [55]:
data_as_dict = data [selected_columns].to_dict()

list_metrics = []
for name, value in data[selected_columns].to_dict().items():
    list_metrics.append(Metric(name = name, value = value))

list_metrics

[Metric(name='metric EP.2.a.i', value="'-25%"),
 Metric(name='metric EP.2.b.i', value='No or unsuitable disclosure'),
 Metric(name='metric EP.2.c.i', value='62%'),
 Metric(name='metric EP.2.d.i', value='822%'),
 Metric(name='metric EP.3.a.i', value='2050'),
 Metric(name='metric CP.2.b.i', value='28%'),
 Metric(name='metric CP.2.c.i', value='US$ 10.64/tCO₂e'),
 Metric(name='metric CP.3.a.i', value='No or unsuitable disclosure'),
 Metric(name='metric CP.3.b.i', value='0.34%'),
 Metric(name='metric CP.3.c.i', value='0.79%'),
 Metric(name='metric CP.3.d.i', value='0.26%'),
 Metric(name='metric CP.3.d.ii', value='1.72%'),
 Metric(name='metric CP.4.b.i', value='4.28 MJ/US$ PPP-adjusted GDP'),
 Metric(name='metric CP.4.d.i', value='27%'),
 Metric(name='metric CP.4.e.i', value='22%'),
 Metric(name='metric CP.6.a.i', value='0.94'),
 Metric(name='metric CF.1.a.i', value='0.02%'),
 Metric(name='metric CF.1.b.i', value='0.02%'),
 Metric(name='metric CF.4.i', value='43.5 MW/US$ billion GDP'),
 Metr

## How to group together metrics with their relevant indicators


In [None]:
indicator_columns = [col for col in df_assessments.columns if col.startswith('indicator')]

for indicator in indicator_columns:
   ##find the metric associated wit this epcific indicator
   metric = [col for col in df_assessments.columns if col.startswith('metric') and indicator in col]