In [1]:
import pandas as pd
from datetime import datetime
from pathlib import Path
from dateutil.relativedelta import relativedelta
import os

In [44]:
missing_permids =[
    "5080149298",
    "5064650117",
    "5046041361",
    "5034819615",
    "5000645180",
    "4296092115",
    "4295890178",
    "4295889397",
    "4295866913",
    "4295859326",
    "15765"
]

target_columns = [
    "issuer_name",
    "permid",
    "clarityid",
    "company_inheriting",
    "parent_company",
    "str_001_s",
    "str_002_ec",
    "str_003_ec",
    "str_003b_ec",
    "str_004_asec",
    "str_005_ec",
    "cs_001_sec",
    "cs_003_sec",
    "cs_002_ec",
    "str_006_sec",
    "art_8_basicos"
]

In [3]:
#get crossreference
crossreference = pd.read_csv(r"C:\Users\n740789\Documents\Projects_local\DataSets\crossreference\Aladdin_Clarity_Issuers_20250301.csv", 
                             dtype={"CLARITY_AI": str})
crossreference.columns = ["brs_id", "brs_issuer_name", "permid", "msci_id", "sustainalytics_id"]
# drop from crossreference columns msci_id and susctainalytics_id
crossreference.drop(columns=["msci_id", "sustainalytics_id"], inplace=True)

In [18]:
cross_filtered = crossreference[crossreference["permid"].isin(missing_permids)]

In [20]:
cross_filtered

Unnamed: 0,brs_id,brs_issuer_name,permid
3989,H82067,ANHEUSER BUSCH INBEV SA,4295859326
35762,F31276,BP OIL ESPANA,4295889397
74308,H14063,RELX FINANCE BV,5046041361
79454,R32112,BOLIDEN AB,4295890178
90954,R66206,COMPAGNIE DE FINANCEMENT FONCIER SA,4296092115
116100,J63531,COMPASS GROUP FINANCE NETHERLANDS BV,5064650117
124475,J44454,EDP SERVICIOS FINANCIEROS ESPANA SA,5034819615
126526,J88119,ACCIONA ENERGIA FINANCIACION FILIALES SA,5080149298


In [4]:
# Get Date
DATE = datetime.now().strftime("%Y%m")
YEAR = DATE[:4]
date_obj = datetime.strptime(DATE, "%Y%m")
prev_date_obj = date_obj - relativedelta(months=1)
DATE_PREV = prev_date_obj.strftime("%Y%m")

In [5]:
DATAFEED_DIR = Path(r"C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED")
DF_FILE_PATH = (
    DATAFEED_DIR
    / "datafeeds_with_ovr"
    / f"{DATE_PREV}_df_issuer_level_with_ovr.csv"
)

In [6]:
df = pd.read_csv(
    DF_FILE_PATH,
    usecols=target_columns,
    dtype={"permid": str, "clarityid":str},
    )

In [45]:
df_filtered = df[df["clarityid"].isin(missing_permids)]

In [46]:
# add brs_id to df_filtered
df_filtered = df_filtered.merge(crossreference[["permid","brs_id"]], how="left", on="permid")


In [47]:
df_filtered

Unnamed: 0,issuer_name,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_004_asec,clarityid,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos,permid,str_003b_ec,brs_id
0,BP France SAS,True,BP PLC,OK,EXCLUDED,OK,EXCLUDED,4295866913,OK,OK,OK,EXCLUDED,OK,OK,4295866913,OK,
1,BP Oil Espana SA,True,BP PLC,OK,EXCLUDED,OK,EXCLUDED,4295889397,OK,OK,OK,EXCLUDED,OK,OK,4295889397,OK,F31276
2,Fresenius SE & Co KGaA,False,-,OK,OK,OK,OK,15765,OK,OK,OK,OK,OK,OK,5001439044,OK,R65799
3,Aria Energy Operating LLC,True,BP PLC,OK,EXCLUDED,OK,EXCLUDED,5000645180,OK,OK,OK,EXCLUDED,OK,OK,5000645180,OK,


In [48]:
target_columns = [
    "issuer_name",
    "permid",
    "clarityid",
    "brs_id",
    "company_inheriting",
    "parent_company",
    "str_001_s",
    "str_002_ec",
    "str_003_ec",
    "str_003b_ec",
    "str_004_asec",
    "str_005_ec",
    "cs_001_sec",
    "cs_003_sec",
    "cs_002_ec",
    "str_006_sec",
    "art_8_basicos"
]

# sort columns following target_columns
df_filtered = df_filtered[target_columns]

In [49]:
final = df_filtered[~(df_filtered.brs_id.isna())].copy()

In [50]:
final

Unnamed: 0,issuer_name,permid,clarityid,brs_id,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_003b_ec,str_004_asec,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos
1,BP Oil Espana SA,4295889397,4295889397,F31276,True,BP PLC,OK,EXCLUDED,OK,OK,EXCLUDED,OK,OK,OK,EXCLUDED,OK,OK
2,Fresenius SE & Co KGaA,5001439044,15765,R65799,False,-,OK,OK,OK,OK,OK,OK,OK,OK,OK,OK,OK


In [24]:
# define columns to process, i.e. strategies columns
columns_to_process = [    
     "str_001_s",
     "str_002_ec",
     "str_003_ec",
     "str_003b_ec",
     "str_004_asec",
     "str_005_ec",
     "cs_001_sec",
     "cs_003_sec",
     "cs_002_ec",
     "str_006_sec",
     "art_8_basicos"
 ]

def process_csv_files(
    df: pd.DataFrame,
    id_column: str = "brs_id",
    columns_to_process: list[str] = columns_to_process,
    output_dir: str =r"C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list",
):

    # keep from df id_columns + columns_to_process
    df = df[[id_column] + columns_to_process]

    # get date string yyyymmdd
    date = datetime.now()
    date_str = date.strftime("%y%m%d")

    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Set constant comment
    COMMENT = "manual override upload"

    # Process each column
    for column in columns_to_process:
        # Get unique values in the column
        unique_values = df[column].unique()

        # For each unique value, create a separate CSV file
        for value in unique_values:
            # Filter rows for current value
            filtered_df = df[df[column] == value]

            # Create output dataframe with required format
            output_df = pd.DataFrame(
                {
                    "ID Type": ["Issuer"] * len(filtered_df),
                    "ID": filtered_df["brs_id"],
                    "Start Date": [""] * len(filtered_df),
                    "Comment": [COMMENT] * len(filtered_df),
                }
            )

            # Create filename
            filename = f"{date_str}_{column}_{value}.csv"
            filepath = os.path.join(output_dir, filename)

            # Save to CSV
            output_df.to_csv(filepath, index=False)
            print(f"Created file: {filepath}")

In [30]:
df_inherit = df[df["issuer_name"].str.upper().str.contains("VOLKSWAGEN FINANCIAL SERVICES", na=False)]

In [31]:
df_inherit

Unnamed: 0,issuer_name,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_004_asec,clarityid,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos,permid,str_003b_ec
5335,Volkswagen Financial Services NV,True,Volkswagen AG,OK,EXCLUDED,EXCLUDED,OK,156287,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,OK,4296552157,OK
8311,Volkswagen Financial Services AG,True,Volkswagen AG,OK,EXCLUDED,EXCLUDED,OK,246450,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,OK,4296540381,OK
18192,Volkswagen Financial Services Compania Financi...,True,Volkswagen AG,OK,OK,OK,OK,173000877,OK,OK,OK,OK,OK,OK,5052540632,OK


In [32]:
# add brs_id to df_inherit
df_inherit = df_inherit.merge(crossreference[["permid","brs_id"]], how="left", on="permid")

# sort columns following target_columns
df_inherit = df_inherit[target_columns]

In [36]:
df_inherit = df_inherit[df_inherit.brs_id=="R62247"].copy()

In [37]:
df_inherit

Unnamed: 0,issuer_name,permid,clarityid,brs_id,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_003b_ec,str_004_asec,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos
1,Volkswagen Financial Services AG,4296540381,246450,R62247,True,Volkswagen AG,OK,EXCLUDED,EXCLUDED,OK,OK,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,OK


In [38]:
# stack vertically final and df_inherit into new df
final = pd.concat([final, df_inherit])

In [39]:
final

Unnamed: 0,issuer_name,permid,clarityid,brs_id,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_003b_ec,str_004_asec,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos
1,BP Oil Espana SA,4295889397,4295889397,F31276,True,BP PLC,OK,EXCLUDED,OK,OK,EXCLUDED,OK,OK,OK,EXCLUDED,OK,OK
1,Volkswagen Financial Services AG,4296540381,246450,R62247,True,Volkswagen AG,OK,EXCLUDED,EXCLUDED,OK,OK,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,EXCLUDED,OK


In [40]:
# change brs_id F31276 to Z10102
final.loc[final.brs_id=="F31276", "brs_id"] = "Z10102"

In [52]:
final.loc[final.brs_id=="R65799", "str_005_ec"] = "EXCLUDED"

In [53]:
final

Unnamed: 0,issuer_name,permid,clarityid,brs_id,company_inheriting,parent_company,str_001_s,str_002_ec,str_003_ec,str_003b_ec,str_004_asec,str_005_ec,cs_001_sec,cs_003_sec,cs_002_ec,str_006_sec,art_8_basicos
1,BP Oil Espana SA,4295889397,4295889397,F31276,True,BP PLC,OK,EXCLUDED,OK,OK,EXCLUDED,OK,OK,OK,EXCLUDED,OK,OK
2,Fresenius SE & Co KGaA,5001439044,15765,R65799,False,-,OK,OK,OK,OK,OK,EXCLUDED,OK,OK,OK,OK,OK


In [42]:
process_csv_files(final)

Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_001_s_OK.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_002_ec_EXCLUDED.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_003_ec_OK.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_003_ec_EXCLUDED.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_003b_ec_OK.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\brs_lists_to_upload\strategies_upload_list\250320_str_004_asec_EXCLUDED.csv
Created file: C:\Users\n740789\Documents\clarity_data_quality_controls\exc