In [10]:
#PACKAGES

import pandas as pd
import yfinance as yf
import talib
import numpy as np
import requests
from io import StringIO
from datetime import date, datetime, timedelta
import praw
import emoji
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob
import matplotlib.pyplot as plt

In [11]:
#TICKERS

url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
headers = {"User-Agent": "Mozilla/5.0"}
html = requests.get(url, headers=headers).text
sp500 = pd.read_html(StringIO(html))[0]
tickers = sp500['Symbol'].tolist()
ticker_map = {
    "BF.B": "BF-B",
    "BRK.B": "BRK-B"
}
tickers = [ticker_map.get(t, t) for t in tickers] 
tickers = sorted(tickers)

In [12]:
#DATE

start_date = date.today()
end_date = start_date.replace(year=start_date.year - 1)

In [13]:
#MARKET

def calculate_market(tickers, start_date, end_date):
    data = yf.download(
        tickers,
        start=end_date,
        end=start_date,
        interval="1d",
        group_by="ticker",
        auto_adjust=True,
        threads=True
    )

    df_list = []
    for ticker in tickers:
        try:
            df_temp = data[ticker].copy()
            df_temp["Symbol"] = ticker
            df_temp.reset_index(inplace=True)
            df_list.append(df_temp)
        except KeyError:
            print(f"Missing data for {ticker}, skipping...")

    market = pd.concat(df_list, ignore_index=True)
    market.rename(
        columns={
            "Date": "Date",
            "Open": "Open",
            "High": "High",
            "Low": "Low",
            "Close": "Close",
            "Volume": "Volume",
        },
        inplace=True,
    )
    
    market = market[
        ["Date", "Symbol", "Open", "High", "Low", "Close", "Volume"]
    ]

    print(f"Market variables calculated for {market['Symbol'].nunique()} symbols.")
    return market

market = calculate_market(tickers, start_date, end_date)

[*********************100%***********************]  503 of 503 completed


Market variables calculated for 503 symbols.


In [14]:
#TECHNICAL

def calculate_technical(market):
    tech_list = []

    for symbol, df in market.groupby("Symbol", group_keys=False):
        df = df.copy()

        #trend
        df["SMA"] = talib.SMA(df["Close"], timeperiod=200)
        df["EMA"] = talib.EMA(df["Close"], timeperiod=100)
        macd, macd_signal, _ = talib.MACD(df["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
        df["MACD"] = macd
        df["ADX"] = talib.ADX(df["High"], df["Low"], df["Close"], timeperiod=30)

        #momentum
        df["RSI"] = talib.RSI(df["Close"], timeperiod=60)
        df["ROC"] = talib.ROC(df["Close"], timeperiod=90)

        #volume
        df["OBV"] = talib.OBV(df["Close"], df["Volume"])
        df["MFI"] = talib.MFI(df["High"], df["Low"], df["Close"], df["Volume"], timeperiod=30)

        df = df[[
            "Date", "Symbol",
            "SMA", "EMA", "MACD", "ADX",
            "RSI", "ROC", "OBV", "MFI"
        ]]

        tech_list.append(df)

    technical = pd.concat(tech_list, ignore_index=True)
    print(f"Technical indicators calculated for {technical['Symbol'].nunique()} symbols.")
    return technical

technical = calculate_technical(market)
last_technical = technical.sort_values(["Symbol", "Date"]).groupby("Symbol", as_index=False).tail(1).reset_index(drop=True)
last_technical = last_technical.drop(columns=["Date"])

Technical indicators calculated for 503 symbols.


In [15]:
#SAVE 

last_technical.to_csv("/Data/technical.csv", index=False)