In [2]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


In [3]:
def parse_13f_xml(xml_url):
    headers = {
        "User-Agent": "Your Name (your.email@example.com)"
    }

    response = requests.get(xml_url, headers=headers)
    response.raise_for_status()

    root = ET.fromstring(response.content)

    # SEC 13F XML namespace
    ns = {'ns': 'http://www.sec.gov/edgar/document/thirteenf/informationtable'}

    data = []

    for info in root.findall("ns:infoTable", ns):
        row = {
            "nameOfIssuer": info.findtext("ns:nameOfIssuer", default="", namespaces=ns),
            "cusip": info.findtext("ns:cusip", default="", namespaces=ns),
            "value ($1000s)": info.findtext("ns:value", default="", namespaces=ns),
            "shares": info.findtext("ns:shrsOrPrnAmt/ns:sshPrnamt", default="", namespaces=ns),
            "investmentDiscretion": info.findtext("ns:investmentDiscretion", default="", namespaces=ns),
            "votingAuthority (Sole)": info.findtext("ns:votingAuthority/ns:Sole", default="", namespaces=ns)
        }
        data.append(row)

    return pd.DataFrame(data)


In [4]:
xml_url = "https://www.sec.gov/Archives/edgar/data/1067983/000095012325002701/39042.xml"
df = parse_13f_xml(xml_url)

# Preview
print("‚úÖ Berkshire Hathaway Holdings:")
print(df.head())

# Optional: Clean up types
df["value ($1000s)"] = pd.to_numeric(df["value ($1000s)"], errors="coerce")
df["shares"] = pd.to_numeric(df["shares"], errors="coerce")


‚úÖ Berkshire Hathaway Holdings:
    nameOfIssuer      cusip value ($1000s)    shares investmentDiscretion  \
0  ALLY FINL INC  02005N100      458035497  12719675                 DFND   
1  ALLY FINL INC  02005N100      100967539   2803875                 DFND   
2  ALLY FINL INC  02005N100      152257482   4228200                 DFND   
3  ALLY FINL INC  02005N100      112963370   3137000                 DFND   
4  ALLY FINL INC  02005N100      174153363   4836250                 DFND   

  votingAuthority (Sole)  
0               12719675  
1                2803875  
2                4228200  
3                3137000  
4                4836250  


In [5]:
# Top 10 holdings by value
top = df.sort_values(by="value ($1000s)", ascending=False).head(10)
print("\nüèÜ Top 10 Holdings:")
print(top[["nameOfIssuer", "value ($1000s)", "shares"]])



üèÜ Top 10 Holdings:
             nameOfIssuer  value ($1000s)     shares
9     AMERICAN EXPRESS CO     44239827546  149061045
20              APPLE INC     38250906745  152747012
58           COCA COLA CO     17602317108  282722729
17              APPLE INC     15411595055   61542988
34         BANK AMER CORP     13659660000  310800000
94   OCCIDENTAL PETE CORP     13053055436  264178414
68         KRAFT HEINZ CO     10000245261  325634818
48       CHEVRON CORP NEW      8919178401   61579525
14              APPLE INC      8695584080   34724000
111         CHUBB LIMITED      7469434519   27033784
