# 13F InfoTable with YearQuarter
This notebook reads Excel workbooks from `data/extracted_13F_HR/blackrock`, loads only the `InfoTable` sheet, and adds a `YearQuarter` column derived from the filename (e.g., `2024Q3`).


In [23]:
from pathlib import Path
import pandas as pd
import re

DATA_DIR = Path('../data/extracted_13F_HR/blackrock')
print('Data dir exists:', DATA_DIR.exists())
print('Sample files:', [p.name for p in sorted(DATA_DIR.glob('*.xlsx'))[:3]])


Data dir exists: True
Sample files: ['20240930.xlsx', '20241231.xlsx', '20250331.xlsx']


In [24]:
def year_quarter_from_filename(name: str):
    m = re.search(r'(\d{4})(\d{2})(\d{2})', name)
    if not m:
        return None
    year = int(m.group(1))
    month = int(m.group(2))
    q = (month - 1) // 3 + 1
    return f'{year}Q{q}'

files = sorted(DATA_DIR.glob('*.xlsx'))
frames = []
for f in files:
    yq = year_quarter_from_filename(f.name)
    df = None
    try:
        df = pd.read_excel(f, sheet_name='InfoTable', engine='openpyxl')
    except Exception as e:
        try:
            df = pd.read_excel(f, sheet_name='Information Table', engine='openpyxl')
        except Exception as e2:
            print(f'Skipping {f.name}: {e2}')
            continue
    df['YearQuarter'] = yq
    frames.append(df)

result = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
print('Rows:', len(result))
result.head()


Rows: 199316


Unnamed: 0,issuer_name,class_title,cusip,value_usd_quarter_end,shares_or_principal,shares_type,discretion,other_manager_seq,vote_sole,vote_shared,vote_none,YearQuarter
0,1 800 FLOWERS COM INC,CL A,68243Q106,1769262,223110,SH,SOLE,2.0,124034,0,99076,2024Q3
1,1 800 FLOWERS COM INC,CL A,68243Q106,761,96,SH,SOLE,4.0,96,0,0,2024Q3
2,1 800 FLOWERS COM INC,CL A,68243Q106,2990,377,SH,SOLE,7.0,377,0,0,2024Q3
3,1 800 FLOWERS COM INC,CL A,68243Q106,793222,100028,SH,SOLE,18.0,74838,0,25190,2024Q3
4,1 800 FLOWERS COM INC,CL A,68243Q106,5000087,630528,SH,SOLE,19.0,630528,0,0,2024Q3


In [28]:
result[result.class_title == "ORD"]

Unnamed: 0,issuer_name,class_title,cusip,value_usd_quarter_end,shares_or_principal,shares_type,discretion,other_manager_seq,vote_sole,vote_shared,vote_none,YearQuarter
2208,AMCOR PLC,ORD,G0250X107,18175892,1604227,SH,SOLE,2.0,747009,0,857218,2024Q3
2209,AMCOR PLC,ORD,G0250X107,19248140,1698865,SH,SOLE,4.0,470617,0,1228248,2024Q3
2210,AMCOR PLC,ORD,G0250X107,14588780,1287624,SH,SOLE,7.0,1179290,0,108334,2024Q3
2211,AMCOR PLC,ORD,G0250X107,501013,44220,SH,SOLE,13.0,44220,0,0,2024Q3
2212,AMCOR PLC,ORD,G0250X107,2076415,183267,SH,SOLE,15.0,183267,0,0,2024Q3
...,...,...,...,...,...,...,...,...,...,...,...,...
186606,RADWARE LTD,ORD,M81873107,242203,8227,SH,SOLE,34.0,8227,0,0,2025Q2
186607,RADWARE LTD,ORD,M81873107,174285,5920,SH,SOLE,40.0,5920,0,0,2025Q2
186608,RADWARE LTD,ORD,M81873107,348393,11834,SH,SOLE,43.0,11725,0,109,2025Q2
189871,SILICOM LTD,ORD,M84116108,256596,16591,SH,SOLE,2.0,16591,0,0,2025Q2


In [25]:
# Build JSON mapping of unique class_title -> definition
import json

def parse_series(title):
    m = re.search(r'(?:SERIES|SER|SR)\s*([A-Z0-9]+)', title, flags=re.I)
    return m.group(1).upper() if m else None

def parse_class(title):
    m = re.search(r'(?:CLASS|CL)\s*([A-Z])', title, flags=re.I)
    return m.group(1).upper() if m else None

COMMON_TOKENS = {
    'COM': 'Common Stock',
    'COMMON': 'Common Stock',
    'ORD': 'Ordinary Shares (non-U.S. common)',
    'ORDINARY': 'Ordinary Shares (non-U.S. common)',
    'CAP STK': 'Capital Stock (equity)',
}
PREF_TOKENS = {'PFD','PREF','PREFERRED','DEP SHS','DEPOSITARY SHS'}
DEBT_TOKENS = {'NOTE','NOTES','NT','BOND','DEB','DEBENTURE','SR NT','SNR NT','SENIOR NOTES'}
DR_TOKENS = {'ADR','ADS','DEPOSITARY','DEP SHS','DEPOSITARY SHS','GDR','CDI'}
UNIT_TOKENS = {'UNIT','UNITS'}
WARRANT_TOKENS = {'WARRANT','WTS'}
RIGHT_TOKENS = {'RIGHT','RIGHTS','RT','RTS'}
ETF_TOKENS = {'ETF','ETN','INDEX','TRACKER'}
TRUST_TOKENS = {'TR','TRUST','BEN INT','BENEFICIAL','FUND'}
LP_TOKENS = {'LP','L P'}
REIT_TOKENS = {'REIT'}

def pick_base(title):
    t = title.upper()
    words = set(re.split(r'[^A-Z0-9]+', t))
    def has_any(s):
        return any(w in words for w in s)
    if has_any(ETF_TOKENS):
        return 'Exchange-Traded Fund/Note'
    if t.startswith('*W EXP') or has_any(WARRANT_TOKENS):
        return 'Warrant (right to buy equity before expiry)'
    if has_any(UNIT_TOKENS):
        return 'Unit (bundle of securities, often share + warrant)'
    if has_any(DR_TOKENS):
        return 'Depositary Receipts/Shares (US-traded representation of foreign equity)'
    if has_any(DEBT_TOKENS):
        return 'Corporate Debt Security (notes/debentures/bonds)'
    if has_any(PREF_TOKENS):
        return 'Preferred Stock (may be depositary shares)'
    if has_any(TRUST_TOKENS):
        return 'Trust/Fund/Beneficial Interest units'
    if has_any(REIT_TOKENS):
        return 'REIT equity/units'
    if has_any(LP_TOKENS):
        return 'Limited Partnership units'
    if has_any(COMMON_TOKENS):
        return 'Common/Ordinary Equity'
    return 'Security (unspecified class)'

def describe(title):
    base = pick_base(title)
    parts = [base]
    cls = parse_class(title)
    if cls:
        parts.append(f'Class {cls}')
    ser = parse_series(title)
    if ser:
        parts.append(f'Series {ser}')
    t = title.upper()
    if any(k in t for k in ['CV','CONV','CONVERTIBLE']):
        parts.append('Convertible')
    if 'MAND' in t:
        parts.append('Mandatory convertible')
    if 'CUM' in t and 'NON' not in t:
        parts.append('Cumulative')
    if 'NON' in t and 'CUM' in t:
        parts.append('Non-cumulative')
    if 'PERP' in t or 'PERPETUAL' in t:
        parts.append('Perpetual')
    if 'NEW' in t:
        parts.append('New issue / post corporate action')
    if 'DEP SHS' in t or 'DEPOSITARY SHS' in t:
        parts.append('Depositary shares (fractional interest)')
    return ', '.join(parts)

unique_titles = sorted(set(str(v).strip() for v in result['class_title'].dropna()))
mapping = {u: describe(u) for u in unique_titles}
out_dir = Path('data/metadata'); out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / 'class_title_definitions.json'
with open(out_path, 'w') as f:
    json.dump(mapping, f, indent=2, ensure_ascii=False)
print('Wrote', out_path, 'entries:', len(mapping))
list(mapping.items())[:10]


Wrote data/metadata/class_title_definitions.json entries: 1255


[('*W EXP 02/10/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 02/27/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 04/02/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 04/25/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 04/30/203', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 05/07/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 06/05/203', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 06/08/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 07/01/202', 'Warrant (right to buy equity before expiry)'),
 ('*W EXP 07/07/202', 'Warrant (right to buy equity before expiry)')]