In [1]:
import pandas as pd
import os
import numpy as np

In [79]:
# Load the data
df = pd.read_excel("../data/TPI ASCOR data - 13012025/ASCOR_assessments_results.xlsx")

# Convert the date columns to datetime type
df['Assessment date'] = pd.to_datetime(df['Assessment date'], format='%d/%m/%Y')
df['Publication date'] = pd.to_datetime(df['Publication date'], format='%d/%m/%Y')

# Only keep the year in the "Assessment date" column
df['Assessment date'] = df['Assessment date'].dt.year #converts the column entries to integers!

# Filter for relevant rows
df = df.loc[(df["Country"] == "Germany") & (df["Assessment date"] == 2024)]

# Filter out all NaNs
df.replace(np.nan, "", inplace=True)

# Rename the columns to replace the . with a _
df.rename(columns=lambda x: x.replace(".", "_"), inplace=True)


In [80]:
# Drop all columns which don't contain a EP,CP or CF
df_filtered = df.filter(regex="EP|CP|CF")

# Drop all columns which contain a "source" and a "year" in their names because they don't fit the "metrics", "indicators", "areas" and "pillars" logic
columns_to_drop = [col for col in df_filtered.columns if any(x in col for x in ["source", "year"])]
df_filtered = df_filtered.drop(columns=columns_to_drop)

In [102]:
# Rename the columns so that the EP/CP/CF code is at the beginning of the column name
def rename_columns(col_name):
    # Split the column name into parts
    parts = col_name.split()
    
    # Check if there are multiple parts and if the last part starts with EP, CP, or CF
    if len(parts) > 1 and any(parts[-1].startswith(code) for code in ["EP", "CP", "CF"]):
        # Reorder: code + string
        return f"{parts[-1]} {' '.join(parts[:-1])}"
    else:
        # Return the original column name if it doesn't match the pattern
        return col_name

# Apply the function to rename the columns
df_filtered.columns = [rename_columns(col) for col in df_filtered.columns]

In [103]:
data = df_filtered.iloc[0]
data.index

Index(['EP_1 area', 'EP_1_a indicator', 'EP_1_b indicator', 'EP_1_c indicator',
       'EP_2 area', 'EP_2_a indicator', 'EP_2_a_i metric', 'EP_2_b indicator',
       'EP_2_b_i metric', 'EP_2_c indicator', 'EP_2_c_i metric',
       'EP_2_d indicator', 'EP_2_d_i metric', 'EP_3 area', 'EP_3_a indicator',
       'EP_3_a_i metric', 'EP_3_b indicator', 'EP_3_c indicator', 'CP_1 area',
       'CP_1_a indicator', 'CP_1_b indicator', 'CP_2 area', 'CP_2_a indicator',
       'CP_2_b indicator', 'CP_2_b_i metric', 'CP_2_c indicator',
       'CP_2_c_i metric', 'CP_3 area', 'CP_3_a indicator', 'CP_3_a_i metric',
       'CP_3_b indicator', 'CP_3_b_i metric', 'CP_3_c indicator',
       'CP_3_c_i metric', 'CP_3_d indicator', 'CP_3_d_i metric',
       'CP_3_d_ii metric', 'CP_4 area', 'CP_4_a indicator', 'CP_4_b indicator',
       'CP_4_b_i metric', 'CP_4_c indicator', 'CP_4_d_i metric',
       'CP_4_d indicator', 'CP_4_e indicator', 'CP_4_e_i metric', 'CP_5 area',
       'CP_5_a indicator', 'CP_5_b indi

In [104]:
pillars = []
pillars_values = ["EP", "CP", "CF"]

for pillar in pillars_values:
    areas = []
    for area in data.index:
        if area.startswith(pillar) and "area" in area.split():
            indicators = []
            for indicator in data.index:
                if indicator.startswith(area.split()[0]) and "indicator" in indicator.split():
                    metrics = []
                    for metric in data.index:
                        if metric.startswith(indicator.split()[0]) and "metric" in metric.split():
                            metrics.append({
                                "name": metric.split()[0],
                                "value": data[metric]
                            })
                    indicators.append({
                        "name": indicator.split()[0],
                        "assessment": data[indicator],
                        "metrics": metrics
                    })
            areas.append({
                "name": area.split()[0],
                "assessment": data[area],
                "indicators": indicators
            })
    pillars.append({
        "name": pillar,
        "areas": areas
    })

In [105]:
pillars

[{'name': 'EP',
  'areas': [{'name': 'EP_1',
    'assessment': 'Partial',
    'indicators': [{'name': 'EP_1_a', 'assessment': 'Yes', 'metrics': []},
     {'name': 'EP_1_b', 'assessment': 'No', 'metrics': []},
     {'name': 'EP_1_c', 'assessment': 'No', 'metrics': []}]},
   {'name': 'EP_2',
    'assessment': 'Partial',
    'indicators': [{'name': 'EP_2_a',
      'assessment': 'Yes',
      'metrics': [{'name': 'EP_2_a_i', 'value': '-45%'}]},
     {'name': 'EP_2_b',
      'assessment': 'No',
      'metrics': [{'name': 'EP_2_b_i',
        'value': 'No or unsuitable disclosure'}]},
     {'name': 'EP_2_c',
      'assessment': 'No',
      'metrics': [{'name': 'EP_2_c_i', 'value': '31%'}]},
     {'name': 'EP_2_d',
      'assessment': 'No',
      'metrics': [{'name': 'EP_2_d_i', 'value': '394%'}]}]},
   {'name': 'EP_3',
    'assessment': 'Yes',
    'indicators': [{'name': 'EP_3_a',
      'assessment': 'Yes',
      'metrics': [{'name': 'EP_3_a_i', 'value': '2045'}]},
     {'name': 'EP_3_b', 'ass