In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
## Read in files to get clean base dataframe
directory = "Data"
df_list = []

for root, dirs, files in os.walk(directory):
    if "INFOTABLE.tsv" in files and "OTHERMANAGER2.tsv" in files and "OTHERMANAGER.tsv" in files:
        intotable_path = os.path.join(root, "INFOTABLE.tsv")
        manager1_path = os.path.join(root, "OTHERMANAGER.tsv")
        manager2_path = os.path.join(root, "OTHERMANAGER2.tsv")
        manager1_df = pd.read_csv(manager1_path, sep="\t", usecols=["ACCESSION_NUMBER", "NAME"])
        manager2_df = pd.read_csv(manager2_path, sep="\t", usecols=["ACCESSION_NUMBER", "NAME"])

        manager_df = pd.concat([manager1_df, manager2_df], ignore_index=True)
        manager_df.rename(columns={'NAME':'FUND'}, inplace=True)

        infotable_df = pd.read_csv(intotable_path, sep = "\t", 
                                   usecols=["ACCESSION_NUMBER","NAMEOFISSUER","TITLEOFCLASS",
                                            "CUSIP", "VALUE", "SSHPRNAMT"])
        # Add date
        sub_dir = os.path.basename(root)
        datetime = sub_dir.split("-")[1].split("_")[0]
        infotable_df.insert(0, "Datetime", datetime)
        infotable_df["Datetime"] = pd.to_datetime(infotable_df["Datetime"])
        
        merged_df = pd.merge(infotable_df, manager_df, on="ACCESSION_NUMBER", how="inner") #fund name non-optional
        merged_df.drop(columns=["ACCESSION_NUMBER"], inplace=True)
        df_list.append(merged_df)


df_full = pd.concat(df_list, ignore_index=True)

df_full.set_index(["FUND", "Datetime"], inplace=True)
## 

In [13]:
df_full

Unnamed: 0_level_0,Unnamed: 1_level_0,NAMEOFISSUER,TITLEOFCLASS,CUSIP,VALUE,SSHPRNAMT,RANK
FUND,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
METLIFE INC,2024-05-31,CREDICORP LTD,COM,G2519Y108,757896,5055,21823.0
METLIFE INC,2024-05-31,PAGSEGURO DIGITAL LTD,COM CL A,G68707101,119700,9599,39409.0
METLIFE INC,2024-05-31,XP INC,CL A,G98239109,48438,1858,42606.0
METLIFE INC,2024-05-31,INTERCORP FINL SVCS INC,SHS,P5626F128,1336645,60895,17103.0
METLIFE INC,2024-05-31,COPA HOLDINGS SA,CL A,P31076105,1680442,15807,15493.0
...,...,...,...,...,...,...,...
BANK VONTOBEL AG,2024-08-31,THERMO FISHER SCIENTIFIC INC,COM,883556102,207539,391,8885.0
Vontobel Swiss Financial Advisers AG,2024-08-31,THERMO FISHER SCIENTIFIC INC,COM,883556102,207539,391,8914.0
Bank Vontobel Europe AG,2024-08-31,THERMO FISHER SCIENTIFIC INC,COM,883556102,207539,391,8885.0
Vontobel Asset Management Ltd,2024-08-31,THERMO FISHER SCIENTIFIC INC,COM,883556102,207539,391,8885.0


In [6]:
# ADD RANK COLUMN
df_full['RANK'] = df_full.groupby(['FUND', 'Datetime'])['VALUE'].rank(ascending=False, method = 'dense')

In [8]:
date = '2024-08-31'

In [12]:
df_full.loc["METLIFE INC"].sort_values("RANK")

Unnamed: 0_level_0,NAMEOFISSUER,TITLEOFCLASS,CUSIP,VALUE,SSHPRNAMT,RANK
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-05-31,ISHARES TR,IBOXX INV CP ETF,464287242,921469348,8406800,1.0
2024-08-31,MICROSOFT CORP,COM,594918104,1040306071,2327567,1.0
2024-08-31,APPLE INC,COM,037833100,952286948,4521351,2.0
2024-05-31,ISHARES TR,IBOXX INV CP ETF,464287242,893382191,8473700,2.0
2024-08-31,NVIDIA CORP,COM,67066G104,945075811,7649958,3.0
...,...,...,...,...,...,...
2024-05-31,GUARDANT HEALTH INC,COM,40131M109,0,0,44113.0
2024-05-31,MULLEN AUTOMOTIVE INC,COM,62526P208,0,1,44113.0
2024-05-31,0,0,000000000,0,0,44113.0
2024-05-31,NATIONAL GEN HLDGS CORP,COM,636220303,0,0,44113.0


In [15]:
df_full['TotalHoldingsMarketValue'] = df_full.groupby(['FUND', 'Datetime'])['VALUE'].transform('sum')
df_full['Percentage'] = (df_full['VALUE'] / df_full['TotalHoldingsMarketValue']) * 100

In [21]:
df_full.loc["METLIFE INC"].sort_values("Percentage",ascending=False)

Unnamed: 0_level_0,NAMEOFISSUER,TITLEOFCLASS,CUSIP,VALUE,SSHPRNAMT,RANK,TotalHoldingsMarketValue,Percentage,Previous Percentage
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-08-31,MICROSOFT CORP,COM,594918104,1040306071,2327567,1.0,2.111874e+10,4.925985,0.037682
2024-08-31,APPLE INC,COM,037833100,952286948,4521351,2.0,2.111874e+10,4.509203,0.003477
2024-08-31,NVIDIA CORP,COM,67066G104,945075811,7649958,3.0,2.111874e+10,4.475057,0.002031
2024-08-31,AMAZON COM INC,COM,023135106,555798595,2876060,4.0,2.111874e+10,2.631779,0.001350
2024-08-31,SPDR S&P 500 ETF TR TR UNIT,TR UNIT,78462F103,506156511,966890,5.0,2.111874e+10,2.396717,0.000648
...,...,...,...,...,...,...,...,...,...
2024-05-31,MULLEN AUTOMOTIVE INC,COM,62526P208,0,1,44113.0,2.184377e+11,0.000000,0.000816
2024-08-31,ENCORE WIRE CORP,COM,292562105,0,0,2951.0,2.111874e+10,0.000000,0.002645
2024-05-31,GUARDANT HEALTH INC,COM,40131M109,0,0,44113.0,2.184377e+11,0.000000,0.000051
2024-08-31,C AND F FINANCIAL CORP,COM,12466Q104,0,0,2951.0,2.111874e+10,0.000000,0.000222


In [20]:
df_full['Previous Percentage'] = df_full.groupby('FUND')['Percentage'].shift(1)


In [29]:
df_full.index.value_counts()

FUND                                 Datetime  
PARAMETRIC PORTFOLIO ASSOCIATES LLC  2024-08-31    184147
EATON VANCE MANAGEMENT               2024-08-31    149761
MORGAN STANLEY & CO. LLC             2024-08-31    145843
ATLANTA CAPITAL MANAGEMENT CO L L C  2024-08-31    144173
Morgan Stanley Bank, N.A.            2024-08-31    138909
                                                    ...  
EDGAR LOMAX CO/VA                    2024-08-31         1
Oskie Capital Management, LLC        2024-08-31         1
                                     2024-05-31         1
OpenView Advisors, LLC               2024-08-31         1
 Burgundy Asset Management Ltd.      2024-08-31         1
Name: count, Length: 6862, dtype: int64