# **Top Canadian Entertainment Stocks**

### **Core Companies (primary focus is entertainment/media content creation & direct distribution)**

1. Cineplex (CGX.TO) – Cinema & entertainment venues

2. Corus Entertainment (CJR-B.TO) – Broadcasting & TV production

3. WildBrain (WILD.TO) – Animation & children’s media content

### **Extra Companies (related sectors, diversified operations, or indirect entertainment role)**

1. Rogers Communications (RCI-B.TO) – Telecom + media & sports

2. Quebecor (QBR-B.TO) – Telecom + media

3. Cogeco Communications (CCA.TO) – Cable & internet provider

4. IMAX Corporation (IMAX) – Cinema technology provider

5. Stingray Group (RAY-A.TO) – Music/media services

6. Spin Master (TOY.TO) – Toys & entertainment tie-ins

In [None]:
import yfinance as yf
import pandas as pd

# (1) Choose symbols
UNIVERSE = {
    # Core dataset
    "Cineplex":                    "CGX.TO",
    "Corus Entertainment (CJR.B)": "CJR-B.TO",
    "WildBrain":                   "WILD.TO",
    "Rogers Communications":       "RCI-B.TO",
    "Quebecor":                    "QBR-B.TO",
    "Cogeco Communications":       "CCA.TO",
    "IMAX Corporation":            "IMAX",      
    "Stingray Group":              "RAY-A.TO",  
    "Spin Master":                 "TOY.TO",    
}

#Core companies
CORE_COMPANIES = {
    "Cineplex",
    "Corus Entertainment (CJR.B)",
    "WildBrain"
}

PERIOD   = "5y"  #2020-08-13 → 2025-08-12
INTERVAL = "1d"

# (2) Robust single fetch via .history()
def fetch_history(symbol: str, period: str = PERIOD, interval: str = INTERVAL) -> pd.DataFrame:
    """
    Fetch OHLCV history using yfinance.Ticker(...).history(...).
    Returns a DataFrame with Date as a column and standard OHLCV columns present.
    """
    t = yf.Ticker(symbol)
    df = t.history(period=period, interval=interval, auto_adjust=False)
    if df is None or df.empty:
        return pd.DataFrame()
    df = df.reset_index()

    # Harmonize column names and guarantee presence of standard columns
    df = df.rename(columns={c: str(c).strip() for c in df.columns})
    for c in ["Open", "High", "Low", "Close", "Adj Close", "Volume"]:
        if c not in df.columns:
            df[c] = pd.NA
    return df

# (3) Download all, build raw
frames, status = [], []

for company, symbol in UNIVERSE.items():
    try:
        df = fetch_history(symbol)
        if not df.empty and (df["Close"].notna().any() or df["Adj Close"].notna().any()):
            df["Ticker"]   = symbol
            df["Company"]  = company
            # Rough exchange detection
            if symbol.endswith(".TO"):
                exch = "TSX"
            elif symbol.endswith(".L"):
                exch = "LSE"
            else:
                exch = "NYSE/NASDAQ"
            df["Exchange"] = exch
            # Tag Core vs Extra
            df["Category"] = "Core" if company in CORE_COMPANIES else "Extra"

            frames.append(df)
            status.append({"Company": company, "Ticker": symbol, "Rows": int(len(df)), "Status": "OK"})
        else:
            status.append({"Company": company, "Ticker": symbol, "Rows": 0, "Status": "EMPTY"})
    except Exception as e:
        status.append({"Company": company, "Ticker": symbol, "Rows": 0, "Status": f"ERROR: {e}"})

if not frames:
    raise RuntimeError("No data downloaded. Check your network.")

raw = pd.concat(frames, ignore_index=True)

# (4) Clean / standardize 
keep_cols = [
    "Date", "Open", "High", "Low", "Close", "Adj Close", "Volume",
    "Ticker", "Company", "Exchange", "Category"
]
for c in keep_cols:
    if c not in raw.columns:
        raw[c] = pd.NA
raw = raw[keep_cols].copy()

raw["Date"] = pd.to_datetime(raw["Date"], errors="coerce")
for c in ["Open", "High", "Low", "Close", "Adj Close", "Volume"]:
    raw[c] = pd.to_numeric(raw[c], errors="coerce")

raw = raw.dropna(subset=["Date"]).sort_values(["Company", "Date"]).reset_index(drop=True)

# (5) Tidy + features
# Unified price for features: prefer Adj Close, fallback Close
raw["Price"] = raw["Adj Close"].combine_first(raw["Close"])

if not raw["Price"].notna().any():
    cols = list(raw.columns) + ["Daily_Return", "MA5", "MA20", "Above_MA20"]
    pd.DataFrame(columns=cols).to_csv("entertainment_stocks.csv", index=False)
else:
    tidy = raw.sort_values(["Ticker", "Date"]).reset_index(drop=True)

    # Features
    tidy["Daily_Return"] = tidy.groupby("Ticker")["Price"].pct_change()
    tidy["MA5"]  = tidy.groupby("Ticker")["Price"].transform(lambda s: s.rolling(5,  min_periods=5).mean())
    tidy["MA20"] = tidy.groupby("Ticker")["Price"].transform(lambda s: s.rolling(20, min_periods=20).mean())
    tidy["Above_MA20"] = (tidy["Price"] > tidy["MA20"]).astype(float)

    # Drop first-day NaNs from pct_change
    tidy = tidy.dropna(subset=["Daily_Return"]).reset_index(drop=True)

    # Save final file
    tidy.to_csv("entertainment_stocks.csv", index=False)

# (6) Print download status
print("\nDownload Status:")
print(pd.DataFrame(status).sort_values(["Status", "Company"]).to_string(index=False))
print("\nFile created: entertainment_stocks.csv")


Download Status:
                    Company   Ticker  Rows Status
                   Cineplex   CGX.TO  1255     OK
      Cogeco Communications   CCA.TO  1255     OK
Corus Entertainment (CJR.B) CJR-B.TO  1255     OK
           IMAX Corporation     IMAX  1255     OK
                   Quebecor QBR-B.TO  1255     OK
      Rogers Communications RCI-B.TO  1255     OK
                Spin Master   TOY.TO  1255     OK
             Stingray Group RAY-A.TO  1255     OK
                  WildBrain  WILD.TO  1255     OK

File created: entertainment_stocks.csv
