In [22]:
import pandas as pd
import numpy as np

import sys
import os

# Add the project root (one level up from notebooks/) to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [23]:
from scripts.utils.dataloaders import (
    load_clarity_data,
    load_crossreference,
    load_overrides,
    load_aladdin_data
)
from scripts.utils.config import get_config
from scripts.utils.clarity_data_quality_control_functions import get_issuer_level_df

In [24]:
# Get the common configuration for the Pre-OVR-Analysis script.
config = get_config(
    "notebook-datafeeds-explorer", 
    interactive=False,
    auto_date= False,
    fixed_date= "202505",)


logger = config["logger"]
DATE = config["DATE"]
YEAR = config["YEAR"]
DATE_PREV = config["DATE_PREV"]
REPO_DIR = config["REPO_DIR"]
DATAFEED_DIR = config["DATAFEED_DIR"]
SRI_DATA_DIR = config["SRI_DATA_DIR"]
paths = config["paths"]
# Use the paths from config
DF_PREV_PATH = paths["PRE_DF_WOVR_PATH"]
DF_NEW_PATH = paths["CURRENT_DF_WOUTOVR_PATH"]
CROSSREFERENCE_PATH = paths["CROSSREFERENCE_PATH"]
BMK_PORTF_STR_PATH = paths["BMK_PORTF_STR_PATH"]
OVR_PATH = paths["OVR_PATH"]
#COMMITTEE_PATH = paths["COMMITTEE_PATH"]
## Define the output directory and file based on the configuration.
#OUTPUT_DIR = config["OUTPUT_DIR"]
#OUTPUT_FILE = OUTPUT_DIR / f"{DATE}_pre_ovr_analysis.xlsx"

In [40]:
target_permid = [
"5073622246",
"4295875200",
"4296393129",
"4296978549",
"5041079662",
]
target_cols = ["permid", "aladdin_id","issuer_name"]
targat_cols_brs = ["aladdin_id", "issuer_name"]
target_aladdin_id = [
    "000375",
    "003001",
    "007699",
    "010199",
    "055262",
    "059456",
    "072730",
    "M58534",
    "R48483",
    "F05671"]

In [None]:
datafeed = load_clarity_data(DF_NEW_PATH)
datafeed.loc[:,"permid"] = datafeed["permid"].astype(str)

In [None]:
for c in datafeed.columns:
    if c in ["aladdin_id","permid"]:
        print(f"{c}")

In [None]:
brs_carteras = get_issuer_level_df(load_aladdin_data(BMK_PORTF_STR_PATH, "portfolio_carteras"), "aladdin_id")
brs_benchmarks = get_issuer_level_df(load_aladdin_data(BMK_PORTF_STR_PATH, "portfolio_benchmarks"), "aladdin_id")

In [28]:
for df in [brs_carteras, brs_benchmarks]:
    df.loc[:,"aladdin_id"] = df["aladdin_id"].astype(str)

In [29]:
brs_carteras_filtered = brs_carteras[brs_carteras.aladdin_id.isin(target_aladdin_id)][targat_cols_brs]
brs_benchmarks_filtered = brs_benchmarks[brs_benchmarks.aladdin_id.isin(target_aladdin_id)][targat_cols_brs]

In [None]:
for df_name, df in zip(["carteras","benchamrs"],[brs_carteras_filtered, brs_benchmarks_filtered]):
    print(f"Dataframe: {df_name}'s columns:\n{df.info()}\n\n")

In [None]:
raw_crossreference = load_crossreference(CROSSREFERENCE_PATH)
raw_crossreference.loc[:,"aladdin_id"] = raw_crossreference["aladdin_id"].astype(str)
raw_crossreference.loc[:,"permid"] = raw_crossreference["permid"].astype(str)

In [None]:
cross_filtered = raw_crossreference[raw_crossreference.aladdin_id.isin(target_aladdin_id)]
cross_filtered.info()

In [None]:
cross_filtered.head()

In [45]:
cross_alddin = r"C:\Users\n740789\Documents\clarity_data_quality_controls\excel_books\aladdin_data\crossreference\Aladdin_Clarity_Issuers_20250501_aladdin.csv"
cross_mayo = pd.read_csv(cross_alddin)

In [46]:
cross_mayo.columns = cross_mayo.columns.str.lower()
cross_mayo.rename(columns={"aladdin_issuer":"aladdin_id", "clarity_ai":"permid"}, inplace=True)

In [47]:
cross_mayo_filtered = cross_mayo[cross_mayo.aladdin_id.isin(target_aladdin_id)]

In [None]:
cross_mayo_filtered

In [None]:
cross_mayo.head()

In [14]:
crossreference = raw_crossreference.drop_duplicates(subset=["permid"]).dropna(subset=["permid"])


In [None]:
crossreference.info()

In [16]:
datafeed = datafeed.merge(crossreference[["aladdin_id","permid"]], how="left", on="permid")
datafeed.loc[:,"aladdin_id"] = datafeed["aladdin_id"].astype(str)

In [None]:
for i, name in enumerate(datafeed.columns):
    if i < 5:
        print(f"{name}")

In [None]:
for c in datafeed.columns:
    if c in ["aladdin_id","permid"]:
        print(f"{c}")

In [None]:
for c in datafeed.columns:
    if "str_00" in c:
        print(c)

In [20]:
filtered_df = datafeed[(datafeed["aladdin_id"].isin(target_aladdin_id)) | (datafeed["permid"].isin(target_permid))][target_cols]

In [None]:
filtered_df.info()