In [0]:
# %sql
# -- Drop the table if it exists
# DROP TABLE IF EXISTS stocks_ai.stocks_holders_data.stocks_institutional_holders;

# -- Drop the schema if it exists
# DROP SCHEMA IF EXISTS stocks_ai.stocks_holders_data;
!pip install yfinance



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, FloatType, LongType, TimestampType
from datetime import datetime
import yfinance as yf
import pandas as pd

spark = SparkSession.builder.getOrCreate()


bronze_table = "stocks_ai.stocks_holders_data.stock_institutional_holders"

# Create schema if not exists
spark.sql("CREATE SCHEMA IF NOT EXISTS stocks_ai.stocks_holders_data")

# Define schema
schema = StructType([
    StructField("Date_Reported", TimestampType(), True),
    StructField("Holder", StringType(), True),
    StructField("pctHeld", FloatType(), True),
    StructField("Shares", LongType(), True),
    StructField("Value", LongType(), True),
    StructField("pctChange", FloatType(), True),
    StructField("ticker", StringType(), True)  # add ticker for multi-ticker support
])

stock_names_df = spark.sql("SELECT * FROM stocks_ai.stocks_name_ticker.stock_names")
stock_list = [row['ticker'] for row in stock_names_df.collect()]


all_data = []

for ticker in stock_list:
    t = yf.Ticker(ticker)
    df = t.institutional_holders

    if df is not None and not df.empty:
        df["ticker"] = ticker.upper()
        all_data.append(df)


if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)

    # Convert to Spark DF
    spark_df = spark.createDataFrame(combined_df, schema=schema)

    # Overwrite the table
    spark_df.write.format("delta").mode("overwrite").saveAsTable(bronze_table)
    print("Data written successfully.")
else:
    print("No institutional holder data to write.")


Data written successfully.


Date_Reported,Holder,pctHeld,Shares,Value,pctChange,ticker
2025-03-31T00:00:00.000Z,Vanguard Group Inc,0.0938,1400790809,286370238130,0.0036000002,AAPL
2025-03-31T00:00:00.000Z,Blackrock Inc.,0.0763,1140202870,233097022982,0.0149,AAPL
2025-03-31T00:00:00.000Z,State Street Corporation,0.0399,596025766,121848344124,0.00090000004,AAPL
2025-03-31T00:00:00.000Z,"Geode Capital Management, LLC",0.0234,349807156,71512718330,0.028299998,AAPL
2025-03-31T00:00:00.000Z,"FMR, LLC",0.022,328100180,67075059369,-0.0396,AAPL
2025-03-31T00:00:00.000Z,"Berkshire Hathaway, Inc",0.0201,300000000,61330407714,0.0,AAPL
2025-03-31T00:00:00.000Z,Morgan Stanley,0.0162,241221800,49314104479,0.0124,AAPL
2025-03-31T00:00:00.000Z,Price (T.Rowe) Associates Inc,0.014400001,214703055,43892753002,-0.024600001,AAPL
2025-03-31T00:00:00.000Z,JPMORGAN CHASE & CO,0.013300001,198745598,40630495189,0.086,AAPL
2024-12-31T00:00:00.000Z,NORGES BANK,0.0125,187160162,38262030144,0.0542,AAPL
