In [1]:
pip install yfinance

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd

# ---------------------------------------------------
# 1. Date Range
# ---------------------------------------------------
start = "2019-01-01"
end   = "2025-11-30"

# ---------------------------------------------------
# 2. Tickers to Download
# ---------------------------------------------------
tickers = [
    # Theaters
    "AMC", "CNK", "IMAX",
    
    # Legacy Studios / Media
    "DIS", "CMCSA", "PSKY", "WBD", "FOXA",
    
    # Diversified (not movie-driven)
    "SONY",
    
    # Streaming
    "NFLX", "ROKU",
    
    # Benchmark
    "^GSPC"
]

# ---------------------------------------------------
# 3. Sub-Industry Classification
# ---------------------------------------------------
subindustry_map = {
    # Theater Chains
    "AMC":  "Theater",
    "CNK":  "Theater",
    "IMAX": "Theater",

    # Legacy Studios / Media
    "DIS":   "Studio",
    "CMCSA": "Studio",
    "PSKY":  "Studio",
    "WBD":   "Studio",
    "FOXA":  "Studio",

    # Diversified (Gaming/Electronics/Music/Media)
    "SONY": "Diversified",

    # Streaming Pure-Plays
    "NFLX": "Streamer",
    "ROKU": "Streamer",

    # Benchmark
    "^GSPC": "Benchmark"
}

# ---------------------------------------------------
# 4. Download from Yahoo Finance
# ---------------------------------------------------
raw = yf.download(
    tickers=tickers,
    start=start,
    end=end,
    group_by="ticker",
    auto_adjust=False,
    progress=False
)

# ---------------------------------------------------
# 5. Convert to Tidy Long Format
# ---------------------------------------------------
frames = []
for t in tickers:
    tmp = raw[t].copy()
    tmp["Ticker"] = t
    tmp["Date"] = tmp.index
    frames.append(tmp)

df = pd.concat(frames, ignore_index=True)

# Standardize column names
df = df.rename(columns={
    "Adj Close": "Adj_Close",
    "Open": "Open",
    "High": "High",
    "Low": "Low",
    "Close": "Close",
    "Volume": "Volume"
})

# ---------------------------------------------------
# 6. Compute Metrics (returns, vol, drawdown)
# ---------------------------------------------------
df = df.sort_values(["Ticker", "Date"])
df["SubIndustry"] = df["Ticker"].map(subindustry_map)

# Daily returns
df["return"] = (
    df.groupby("Ticker")["Adj_Close"]
      .pct_change(fill_method=None)
)

# Rolling 21-day volatility
df["vol_21d"] = (
    df.groupby("Ticker")["return"]
      .rolling(21)
      .std()
      .reset_index(level=0, drop=True)
)

# Rolling max and drawdown
df["rolling_max"] = (
    df.groupby("Ticker")["Adj_Close"].cummax()
)
df["drawdown"] = df["Adj_Close"] / df["rolling_max"] - 1

# ---------------------------------------------------
# 7. Save Final CSV
# ---------------------------------------------------
df.to_csv("../data/movie_media_clean.csv", index=False)

In [3]:
import yfinance as yf
import pandas as pd

# 1. Tickers + sector names
sector_etfs = ["XLK", "XLE", "XLF", "XLV", "XLP", "XLI", "^GSPC"]
sector_map = {
    "XLK": "Technology",
    "XLE": "Energy",
    "XLF": "Financials",
    "XLV": "Health Care",
    "XLP": "Consumer Staples",
    "XLI": "Industrials",
    "^GSPC": "S&P 500"
}

# 2. Download data
raw = yf.download(
    tickers=sector_etfs,
    start="2019-01-01",
    end="2025-11-30",
    group_by="ticker",
    auto_adjust=False,
    progress=False
)

# 3. Tidy to long format
frames = []
for t in sector_etfs:
    tmp = raw[t].copy()
    tmp["Ticker"] = t
    tmp["Date"] = tmp.index
    frames.append(tmp)

df = pd.concat(frames, ignore_index=True)

# 4. Add sector + metrics
df["Sector"] = df["Ticker"].map(sector_map)
df["return"] = df.groupby("Ticker")["Adj Close"].pct_change()
df["vol_21d"] = (
    df.groupby("Ticker")["return"]
      .rolling(21)
      .std()
      .reset_index(0, drop=True)
)
df["rolling_max"] = df.groupby("Ticker")["Adj Close"].cummax()
df["drawdown"] = df["Adj Close"] / df["rolling_max"] - 1

# 5. Save
df.to_csv("../data/sector_etfs_clean.csv", index=False)