In [1]:
import sys
import os
import warnings
from pathlib import Path
from typing import List, Tuple
from itertools import chain

import numpy as np
import pandas as pd

from utils.dataloaders import (
    load_clarity_data,
    load_aladdin_data,
    load_crossreference,
    load_portfolios,
    load_overrides,
    save_excel
)
from utils.zombie_killer import main as zombie_killer

2025-03-25 21:23:00,274 - utils.get_date - INFO - Date format is valid. Date set to 202503.
Output directory is set to: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\sri_data\zombie_list


In [2]:
# Import the centralized configuration
from config import get_config

# Get the common configuration for the Pre-OVR-Analysis script.
config = get_config("pre-ovr-analysis", interactive=False)
logger = config["logger"]
DATE = config["DATE"]
YEAR = config["YEAR"]
DATE_PREV = config["DATE_PREV"]
REPO_DIR = config["REPO_DIR"]
DATAFEED_DIR = config["DATAFEED_DIR"]
SRI_DATA_DIR = config["SRI_DATA_DIR"]
paths = config["paths"]

# Use the paths from config
df_1_path = paths["PRE_DF_WOVR_PATH"]
df_2_path = paths["CURRENT_DF_WOUTOVR_PATH"]
CROSSREFERENCE_PATH = paths["CROSSREFERENCE_PATH"]
BMK_PORTF_STR_PATH = paths["BMK_PORTF_STR_PATH"]
OVR_PATH = paths["OVR_PATH"]
COMMITTEE_PATH = paths["COMMITTEE_PATH"]

# Define the output directory and file based on the configuration.
OUTPUT_DIR = config["OUTPUT_DIR"]
OUTPUT_FILE = OUTPUT_DIR / f"{DATE}_pre_ovr_analysis.xlsx"

2025-03-25 21:23:04,636 - utils.get_date - INFO - Date format is valid. Date set to 202503.
Output directory is set to: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\sri_data\pre-ovr-analysis


In [3]:
# Ignore workbook warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [4]:
# check that the date constants are set correctly
print(f"{DATE} and {YEAR} and {DATE_PREV}.")

202503 and 2025 and 202502.


In [5]:
# DEFINE TEST COLUMNS
# let's define necessary column lists

clarity_test_col = [
    "str_001_s",
    "str_002_ec",
    "str_003_ec",
    "str_003b_ec",
    "str_004_asec",
    "str_005_ec",
    "art_8_basicos",
    "str_006_sec",
    "str_007_sect",
    "cs_001_sec",
    "cs_003_sec",
    "cs_002_ec",
    "gp_esccp",
    "gp_esccp_22",
    "gp_esccp_25",
    "gp_esccp_30",
]
columns_to_read = ["permid", "isin", "issuer_name"] + clarity_test_col
brs_test_cols = [
    "str_001_s",
    "str_002_ec",
    "str_003_ec",
    "str_003b_ec",
    "str_004_asec",
    "str_005_ec",
    "str_006_sec",
    "str_007_sect",
    "str_008_sec",
    "scs_001_sec",
    "scs_002_ec",
    "scs_003_sec",
    "gp_essccp",
    "gp_esccp_22",
    "gp_esccp_25",
    "gp_esccp_30",
    "aladdin_id",
]

rename_dict = {
    "cs_001_sec": "scs_001_sec",
    "cs_002_ec": "scs_002_ec",
    "cs_003_sec": "scs_003_sec",
    "gp_esccp": "gp_essccp",
    "art_8_basicos" : "str_008_sec"
}

delta_test_cols = [
    'gp_esccp_22',
    'gp_esccp_25',
    'gp_esccp_30',
    'gp_essccp',
    'scs_001_sec',
    'scs_002_ec',
    'scs_003_sec',
    'str_001_s',
    'str_002_ec',
    'str_003_ec',
    'str_003b_ec',
    'str_004_asec',
    'str_005_ec',
    'str_006_sec',
    'str_007_sect',
    'str_008_sec'
    ]

Define Functions

In [6]:
def prepare_dataframes(
    base_df: pd.DataFrame, new_df: pd.DataFrame, target_index:str = "permid"
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Prepare DataFrames by setting the index and filtering for common indexes.
    Logs info about common, new, and missing indexes.
    """
    # Set index to 'permid' if it exists, otherwise assume it's already the index.
    logger.info(f"Setting index to {target_index}.")
    if target_index in base_df.columns:
        base_df = base_df.set_index(target_index)
    else:
        logger.warning("df1 does not contain a 'permid' column. Using current index.")

    if target_index in new_df.columns:
        new_df = new_df.set_index(target_index)
    else:
        logger.warning("df2 does not contain a 'permid' column. Using current index.")

    common_indexes = base_df.index.intersection(new_df.index)
    new_indexes = new_df.index.difference(base_df.index)
    missing_indexes = base_df.index.difference(new_df.index)

    logger.info(f"Number of common indexes: {len(common_indexes)}")

    return (
        base_df.loc[common_indexes],
        new_df.loc[common_indexes],
        new_df.loc[new_indexes],
        base_df.loc[missing_indexes],
    )

In [7]:
def compare_dataframes(
    df1: pd.DataFrame, df2: pd.DataFrame, test_col: List[str] = delta_test_cols
) -> pd.DataFrame:
    """Compare DataFrames and create a delta DataFrame."""
    delta = df2.copy()
    for col in test_col:
        if col in df1.columns and col in df2.columns:
            logger.info(f"Comparing column: {col}")
            # Create a mask for differences between the two DataFrames
            diff_mask = df1[col] != df2[col]
            # Update the delta DataFrame with the differences
            delta.loc[~diff_mask, col] = np.nan
    return delta


def get_exclusion_list(
    row: pd.Series,
    df1: pd.DataFrame,
    test_col: List[str] = delta_test_cols,
) -> List[str]:
    """Get list of columns that changed to EXCLUDED."""
    return [
        col
        for col in test_col
        if row[col] == "EXCLUDED" and df1.loc[row.name, col] != "EXCLUDED"
    ]


def get_inclusion_list(
    row: pd.Series,
    df1: pd.DataFrame,
    test_col: List[str] = delta_test_cols,
) -> List[str]:
    """Get list of columns that changed from EXCLUDED to any other value."""
    return [
        col
        for col in test_col
        if row[col] != "EXCLUDED" and df1.loc[row.name, col] == "EXCLUDED"
    ]


def check_new_exclusions(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    delta: pd.DataFrame,
    test_col: List[str] = delta_test_cols,
    suffix_level: str = "",
) -> pd.DataFrame:
    """Check for new exclusions and update the delta DataFrame."""
    delta["new_exclusion"] = False
    for col in test_col:
        if col in df1.columns and col in df2.columns:
            logger.info(f"Checking for new exclusions in column: {col}")
            mask = (df1[col] != "EXCLUDED") & (df2[col] == "EXCLUDED")
            delta.loc[mask, "new_exclusion"] = True
            logger.info(f"Number of new exclusions in {col}: {mask.sum()}")
    delta[f"exclusion_list{suffix_level}"] = delta.apply(
        lambda row: get_exclusion_list(row, df1, test_col), axis=1
    )
    return delta


def check_new_inclusions(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    delta: pd.DataFrame,
    test_col: List[str] = delta_test_cols,
    suffix_level: str = "",
) -> pd.DataFrame:
    """Check for new inclusions and update the delta DataFrame."""
    delta["new_inclusion"] = False
    for col in test_col:
        if col in df1.columns and col in df2.columns:
            logger.info(f"Checking for new inclusions in column: {col}")
            mask = (df1[col] == "EXCLUDED") & (df2[col] != "EXCLUDED")
            delta.loc[mask, "new_inclusion"] = True
            logger.info(f"Number of new inclusions in {col}: {mask.sum()}")
    delta[f"inclusion_list{suffix_level}"] = delta.apply(
        lambda row: get_inclusion_list(row, df1, test_col), axis=1
    )
    return delta


def finalize_delta(
    delta: pd.DataFrame,
    test_col: List[str] = delta_test_cols,
    target_index: str = "permid",
) -> pd.DataFrame:
    """Finalize the delta DataFrame by removing unchanged rows and resetting the index."""
    delta = delta.dropna(subset=test_col, how="all")
    delta.reset_index(inplace=True)
    delta[target_index] = delta[target_index].astype(str)
    logger.info(f"Final delta shape: {delta.shape}")
    return delta

In [8]:
# LOAD DATA
# clarity data
df_1 = load_clarity_data(df_1_path, columns_to_read)
df_2 = load_clarity_data(df_2_path, columns_to_read)

2025-03-25 21:23:04,717 - utils.dataloaders - INFO - Loading Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\datafeeds_with_ovr\202502_df_issuer_level_with_ovr.csv
2025-03-25 21:23:05,167 - utils.dataloaders - INFO - Successfully loaded Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\datafeeds_with_ovr\202502_df_issuer_level_with_ovr.csv
2025-03-25 21:23:05,169 - utils.dataloaders - INFO - Loading Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\ficheros_tratados\2025\20250301_Equities_feed_IssuerLevel_sinOVR.csv
2025-03-25 21:23:05,598 - utils.dataloaders - INFO - Successfully loaded Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\ficheros_tratados\2025\20250301_Equities_feed_IssuerLevel_sinOVR.csv


In [9]:
# let's rename columns in df_1 and df_2 using the rename_dict
df_1.rename(columns=rename_dict, inplace=True)
df_2.rename(columns=rename_dict, inplace=True)


In [10]:
# aladdin /brs data / perimetros
brs_carteras = load_aladdin_data(BMK_PORTF_STR_PATH, "portfolio_carteras")    
brs_benchmarks = load_aladdin_data(BMK_PORTF_STR_PATH, "portfolio_benchmarks")
crosreference = load_crossreference(CROSSREFERENCE_PATH)

2025-03-25 21:23:05,625 - utils.dataloaders - INFO - Loading portfolio_carteras data from C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.xlsx
2025-03-25 21:23:24,599 - utils.dataloaders - INFO - Cleaning columns and converting data types for portfolio_carteras
2025-03-25 21:23:24,601 - utils.dataloaders - INFO - Converting column 'aladdin_id' to string.
2025-03-25 21:23:24,604 - utils.dataloaders - INFO - Converting column 'portfolio_id' to string.
2025-03-25 21:23:24,607 - utils.dataloaders - INFO - Successfully loaded Aladdin data from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.xlsx
2025-03-25 21:23:24,608 - utils.dataloaders - INFO - Loading portfolio_benchmarks data from C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.

In [11]:
# add aladdin_id to df_1 and df_2
logger.info("Adding aladdin_id to clarity dfs")
df_1 = df_1.merge(crosreference[["permid", "aladdin_id"]], on="permid", how="left")
df_2 = df_2.merge(crosreference[["permid", "aladdin_id"]], on="permid", how="left")

2025-03-25 21:24:21,648 - pre-ovr-analysis - INFO - Adding aladdin_id to clarity dfs


In [12]:
# get BRS data at issuer level
brs_carteras_issuerlevel = brs_carteras.drop_duplicates(subset=["aladdin_id"]).copy()
# drop row with empty aladdin_id
brs_carteras_issuerlevel = brs_carteras_issuerlevel[brs_carteras_issuerlevel["aladdin_id"].notnull()]

In [13]:
brs_carteras.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76092 entries, 0 to 76091
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   issuer_name               60605 non-null  object
 1   aladdin_id                76092 non-null  object
 2   security_description      76088 non-null  object
 3   portfolio_full_name       76088 non-null  object
 4   portfolio_id              76092 non-null  object
 5   str_001_s                 34743 non-null  object
 6   str_002_ec                34743 non-null  object
 7   str_003b_ec               33375 non-null  object
 8   str_003_ec                34743 non-null  object
 9   str_004_asec              33566 non-null  object
 10  str_004_asec_sust._bonds  824 non-null    object
 11  str_005_ec                34731 non-null  object
 12  str_006_sec               34731 non-null  object
 13  str_007_sect              34731 non-null  object
 14  str_008_sec           

In [14]:
# sri/ESG Team data
overrides = load_overrides(OVR_PATH)
# rename column brs_id to aladdin_id
overrides.rename(columns={"brs_id": "aladdin_id"}, inplace=True)

loading overrides columns ['clarityid', 'permid', 'brs_id', 'ovr_target', 'ovr_value']
2025-03-25 21:24:21,998 - utils.dataloaders - INFO - Loading overrides from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\sri_data\overrides\overrides_db.xlsx


In [15]:
# Load portfolios & benchmarks dicts
(
    portfolio_dict,
    benchmark_dict,
) = load_portfolios(path_pb=BMK_PORTF_STR_PATH, path_committe=COMMITTEE_PATH)
logger.info(f"df_1 shape: {df_1.shape}, df_2 shape: {df_2.shape}")

2025-03-25 21:24:22,598 - utils.dataloaders - INFO - Loading portfolios from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.xlsx
2025-03-25 21:24:39,136 - utils.dataloaders - INFO - Loading benchmarks from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.xlsx
2025-03-25 21:25:36,921 - utils.dataloaders - INFO - Loading strategy data for portfolios from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\sri_data\portfolios_committees\portfolio_lists.xlsx
2025-03-25 21:25:37,029 - utils.dataloaders - INFO - Loading strategy data for benchmarks from: C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\sri_data\portfolios_committees\portfolio_lists.xlsx
2025-03-25 21:25:37,220 - pre-ovr-analysis - INFO - df_1 shape: (69264, 20), df_2 shape: (69278, 20)


In [16]:
common_cols = set(df_1.columns) & set(df_2.columns) & set(brs_carteras_issuerlevel.columns)
common_cols = sorted(list(common_cols))

START PRE-OVR ANALISIS

In [17]:
# PREPARE DATA CLARITY LEVEL
(
    df_1, 
    df_2,
    new_issuers_clarity,
    out_issuer_clarity,
) = prepare_dataframes(df_1, df_2)

# log size of new and missing issuers
logger.info(f"Number of new issuers: {new_issuers_clarity.shape[0]}")
logger.info(f"Number of missing issuers: {out_issuer_clarity.shape[0]}")

2025-03-25 21:25:37,240 - pre-ovr-analysis - INFO - Setting index to permid.
2025-03-25 21:25:37,344 - pre-ovr-analysis - INFO - Number of common indexes: 69213
2025-03-25 21:25:37,413 - pre-ovr-analysis - INFO - Number of new issuers: 65
2025-03-25 21:25:37,415 - pre-ovr-analysis - INFO - Number of missing issuers: 51


In [18]:
# PREPARE DATA BRS LEVEL
(
    brs_df, 
    clarity_df,
    in_clarity_but_not_in_brs,
    in_brs_but_not_in_clarity,
) = prepare_dataframes(brs_carteras_issuerlevel, df_2, target_index="aladdin_id")

# log size of new and missing issuers
logger.info(f"Number issuers in clarity but not Aladdin: {in_clarity_but_not_in_brs.shape[0]}")
logger.info(f"Number issuers in Aladdin but not Clarity: {in_brs_but_not_in_clarity.shape[0]}")

2025-03-25 21:25:37,423 - pre-ovr-analysis - INFO - Setting index to aladdin_id.
2025-03-25 21:25:37,538 - pre-ovr-analysis - INFO - Number of common indexes: 2197
2025-03-25 21:25:37,605 - pre-ovr-analysis - INFO - Number issuers in clarity but not Aladdin: 67016
2025-03-25 21:25:37,607 - pre-ovr-analysis - INFO - Number issuers in Aladdin but not Clarity: 1079


In [19]:
# COMPARE DATA
logger.info("comparing clarity dataframes")
delta_clarity = compare_dataframes(df_1, df_2)
delta_clarity = check_new_exclusions(df_1, df_2, delta_clarity)
delta_clarity = check_new_inclusions(df_1, df_2, delta_clarity)
delta_clarity = finalize_delta(delta_clarity)
logger.info("checking impact compared to BRS data")
delta_brs = compare_dataframes(brs_df, clarity_df)
delta_brs = check_new_exclusions(brs_df, clarity_df, delta_brs, suffix_level="_brs")
delta_brs = check_new_inclusions(brs_df, clarity_df, delta_brs, suffix_level="_brs")
delta_brs = finalize_delta(delta_brs, target_index="aladdin_id")


2025-03-25 21:25:37,616 - pre-ovr-analysis - INFO - comparing clarity dataframes
2025-03-25 21:25:37,648 - pre-ovr-analysis - INFO - Comparing column: gp_esccp_22
2025-03-25 21:25:37,662 - pre-ovr-analysis - INFO - Comparing column: gp_esccp_25
2025-03-25 21:25:37,672 - pre-ovr-analysis - INFO - Comparing column: gp_esccp_30
2025-03-25 21:25:37,681 - pre-ovr-analysis - INFO - Comparing column: gp_essccp
2025-03-25 21:25:37,690 - pre-ovr-analysis - INFO - Comparing column: scs_001_sec
2025-03-25 21:25:37,700 - pre-ovr-analysis - INFO - Comparing column: scs_002_ec
2025-03-25 21:25:37,710 - pre-ovr-analysis - INFO - Comparing column: scs_003_sec
2025-03-25 21:25:37,720 - pre-ovr-analysis - INFO - Comparing column: str_001_s
2025-03-25 21:25:37,729 - pre-ovr-analysis - INFO - Comparing column: str_002_ec
2025-03-25 21:25:37,738 - pre-ovr-analysis - INFO - Comparing column: str_003_ec
2025-03-25 21:25:37,747 - pre-ovr-analysis - INFO - Comparing column: str_003b_ec
2025-03-25 21:25:37,758 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  delta[target_index] = delta[target_index].astype(str)


2025-03-25 21:25:45,575 - pre-ovr-analysis - INFO - Checking for new inclusions in column: str_005_ec
2025-03-25 21:25:45,577 - pre-ovr-analysis - INFO - Number of new inclusions in str_005_ec: 0
2025-03-25 21:25:45,579 - pre-ovr-analysis - INFO - Checking for new inclusions in column: str_006_sec
2025-03-25 21:25:45,580 - pre-ovr-analysis - INFO - Number of new inclusions in str_006_sec: 0
2025-03-25 21:25:45,581 - pre-ovr-analysis - INFO - Checking for new inclusions in column: str_007_sect
2025-03-25 21:25:45,583 - pre-ovr-analysis - INFO - Number of new inclusions in str_007_sect: 0
2025-03-25 21:25:45,583 - pre-ovr-analysis - INFO - Checking for new inclusions in column: str_008_sec
2025-03-25 21:25:45,586 - pre-ovr-analysis - INFO - Number of new inclusions in str_008_sec: 0
2025-03-25 21:25:45,749 - pre-ovr-analysis - INFO - Final delta shape: (2197, 23)


In [20]:
logger.info("Getting zombie analysis df")
zombie_df = zombie_killer()

2025-03-25 21:25:45,758 - pre-ovr-analysis - INFO - Getting zombie analysis df
2025-03-25 21:25:45,760 - utils.dataloaders - INFO - Loading Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\ficheros_tratados\2025\20250301_Equities_feed_IssuerLevel_sinOVR.csv
2025-03-25 21:25:46,196 - utils.dataloaders - INFO - Successfully loaded Clarity data from: C:\Users\n740789\Documents\Projects_local\DataSets\DATAFEED\ficheros_tratados\2025\20250301_Equities_feed_IssuerLevel_sinOVR.csv
2025-03-25 21:25:46,197 - utils.dataloaders - INFO - Loading portfolio_carteras data from C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\bmk_portf_str\202503_strategies_snt world_portf_bmks.xlsx
2025-03-25 21:26:06,101 - utils.dataloaders - INFO - Cleaning columns and converting data types for portfolio_carteras
2025-03-25 21:26:06,102 - utils.dataloaders - INFO - Converting column 'aladdin_id' to string.
2025-03-25 21:26:06,105 - utils.dataloaders - I

In [24]:
# print first 10 k v in portfolios_dict
import json

filtered_entries = [(k, v) for k, v in portfolio_dict.items() if v.get("strategy_name")]

# Print the first 10 filtered entries
for k, v in filtered_entries[:10]:
    print(k, json.dumps(v, indent=2))

CL19778 {
  "aladdin_id": [
    "C05702",
    "C05702"
  ],
  "strategy_name": "str006"
}
CPE00035 {
  "aladdin_id": [
    "I66408",
    "E99416",
    "788350",
    "059456",
    "066050",
    "072730",
    "J54377",
    "R73574",
    "D43041",
    "D43041",
    "172967",
    "202597",
    "202597",
    "128005",
    "G37009",
    "R71411",
    "36827E",
    "C05702",
    "C05702",
    "C05702",
    "R44788",
    "R66333",
    "G32459",
    "J95157",
    "R79669",
    "C05332",
    "R74554",
    "H92117",
    "I03819",
    "R57886",
    "R94740",
    "R93558",
    "G88159",
    "C05702",
    "G68355"
  ],
  "strategy_name": "str003b"
}
CPE00169 {
  "aladdin_id": [
    "I45020",
    "G16611",
    "E94490",
    "K10973",
    "C05702",
    "C05702",
    "F79320",
    "G18629",
    "G36494",
    "G05261",
    "I69133",
    "G21293",
    "128009",
    "R57882",
    "H92117",
    "I03819",
    "G66335",
    "G34089"
  ],
  "strategy_name": "str003b"
}
CPE00264 {
  "aladdin_id": [
    "G06219

In [25]:
overrides.head()

Unnamed: 0,clarityid,permid,aladdin_id,ovr_target,ovr_value
0,150114,5000014265,R65372,str_001_s,OK
1,66251,4295884955,R63005,str_001_s,OK
2,262152,4297202523,R89955,str_001_s,OK
3,15657,4295869482,251525,str_001_s,FLAG
4,171531900,5044066787,G88795,str_001_s,FLAG


In [None]:
def override_dict(
        df:pd.DataFrame=overrides,
        id_col:str="aladdin_id",
        str_col:str="ovr_target",
        ovr_col:str="ovr_value",
        )

In [26]:
delta_brs.head()

Unnamed: 0,aladdin_id,isin,issuer_name,str_001_s,str_002_ec,str_003_ec,str_004_asec,str_005_ec,scs_001_sec,gp_essccp,...,str_007_sect,gp_esccp_22,gp_esccp_25,gp_esccp_30,str_008_sec,str_003b_ec,new_exclusion,exclusion_list_brs,new_inclusion,inclusion_list_brs
0,F79892,PL11BTS00015,11 Bit Studios SA,OK,OK,OK,OK,,,,...,,,OK,OK,OK,OK,False,[],False,[]
1,G70839,XS1571982468,2I Rete Gas SpA,OK,OK,OK,EXCLUDED,,,,...,EXCLUDED,,OK,OK,OK,OK,True,"[str_004_asec, str_007_sect]",False,[]
2,R58161,GB00B16PRC61,3i Group PLC,OK,OK,OK,OK,,,,...,,,OK,OK,OK,OK,True,[scs_003_sec],False,[]
3,604059,US88579YBF79,3M Co,FLAG,OK,FLAG,FLAG,,,,...,FLAG,,OK,OK,FLAG,OK,True,[str_006_sec],False,[]
4,J46523,BRRRRPDBS031,3R Petroleum Oleo e Gas SA,OK,OK,OK,EXCLUDED,,,,...,EXCLUDED,,OK,OK,OK,OK,True,"[str_004_asec, str_007_sect]",False,[]


In [22]:
# SAVE RESULTS
date_analysis = datetime.now().strftime("%Y%m%d")


NameError: name 'datetime' is not defined

In [None]:
# create dict of df and df name
dfs_dict = {
    "zombie_analysis": zombie_df,
    "preovr_analysis_brs": delta_brs,
    "preovr_analysis_clarity": delta_clarity,
}
# save to excel
save_excel(dfs_dict, OUTPUT_DIR, file_name="pre_ovr_analysis")
