In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import requests
import fredapi
import os

def Fred_API_key():
    return '12e72fe8659b5ba78d8dd6f1bb8d97ee'

tickers = ['MMM', 'STX']
series_tickers = [
            'TB3MS',    # 01 3-Month Treasury Bill Secondary Market Rate, Discount Basis
           'DGS10']
quarterly_list = ['GDPC1']

DATA_DIR = "./Data/Sources/"

In [3]:
def extend_quarterly_to_monthly(df):
    """
    Extends quarterly data to monthly using forward-fill.

    Parameters:
        df (DataFrame): A Pandas DataFrame containing quarterly data. 
                        It must have a column named 'date' with datetime values.

    Returns:
        DataFrame: A DataFrame with monthly data, where the values from the
                   quarterly data are forward-filled to fill missing months.

    Example:
        If quarterly data includes rows for March, June, September, and December,
        this function will fill intermediate months (e.g., April and May) with
        the March data values.
    """
    # Ensure 'date' column is of datetime type
    df['date'] = pd.to_datetime(df['date'], errors='coerce')

    # Drop rows with invalid or missing 'date' values
    df = df.dropna(subset=['date'])

    # Generate a complete range of monthly start dates
    all_months = pd.date_range(start=df['date'].min(), end=df['date'].max(), freq='MS')

    # Reindex to include all months and forward-fill missing data
    df = (
        df.set_index("date")
        .reindex(all_months)
        .ffill()
        .infer_objects()
        .reset_index()
    )

    # Rename the index column to 'date'
    df.rename(columns={"index": "date"}, inplace=True)

    return df
def get_FredData(series_id, api_key, start,end, data_freq = 'm'):
    """
    Fetches data from the FRED (Federal Reserve Economic Data) API or a local CSV file.

    Parameters:
        series_id (str): The FRED series identifier (e.g., 'GDP' for Gross Domestic Product).
        api_key (str): Your API key for accessing the FRED API.
        start (str): The start date for the data in 'YYYY-MM-DD' format.
        end (str): The end date for the data in 'YYYY-MM-DD' format.
        data_freq (str): The frequency of data retrieval ('m' for monthly, 'q' for quarterly).
                         Defaults to 'm'.

    Returns:
        DataFrame: A DataFrame with the requested data containing two columns:
                   - 'date': The observation dates as datetime values.
                   - 'value': The series data for the corresponding dates.

                   If the series is quarterly, the data is extended to monthly using forward-fill.

    Notes:
        - If the request to the FRED API is successful, the data is fetched and converted into
          a Pandas DataFrame.
        - If the series is defined as quarterly, it will be transformed into monthly data.
        - Errors during API requests are printed to the console.

    Example:
        data = get_FredData("GDP", "your_api_key", "1950-01-01", "2020-12-31")
    # https://www.youtube.com/watch?v=M_jswxN3iwI&t=18s
    """
    base_url    = 'https://api.stlouisfed.org/fred/series/observations'
    if series_id in quarterly_list:
        data_freq = 'q'
    obs_params = {
        'series_id': series_id,
        'api_key': api_key,
        'file_type': 'json',
        'observation_start': start,
        'observation_end': end,
        'frequency': data_freq
        }
    # make request
    response = requests.get(base_url,
                            params=obs_params)
    # Check response
    df = None
    if response.status_code == 200:
        print(f'Lets download {series_id}')
        data = response.json()
        df = pd.DataFrame(data['observations'])
        #df['date'] = pd.to_datetime(df['date'])
        #df.value = df.value.astype('float')
        df = df[['date','value']]
        #df.to_csv(file_path, index=False)        
        if series_id in quarterly_list:
            df = extend_quarterly_to_monthly(df)         
    else:
        print(f"Error {response.status_code}: {response.text}")  # Print error message            
    if series_id in quarterly_list:
        df = extend_quarterly_to_monthly(df)
    return df

def get_economic_data(start_date  = '1957-03-01', end_date = '2020-12-31'):
    """
    Retrieves economic data for a specified date range either from a local CSV file
    or by downloading it from the FRED API.

    Returns:
        DataFrame: A DataFrame containing economic data for multiple series. 
                   Derived indicators like the credit spread ('csp') are also computed.

    Notes:
        - If a local file named 'EconomicData.csv' exists in the data directory, 
          the data is loaded from this file.
        - If the file does not exist, the function fetches data from the FRED API
          for a predefined list of series identifiers (`series_tickers`) and merges the results.

    Additional Computations:
        - The function computes the credit spread ('csp') if both 'BAA' and 'AAA' series
          are present in the data. The credit spread is calculated as:
            csp = BAA - AAA

    Example:
        df = get_economic_data()

    Dependencies:
        - Requires a valid FRED API key via the `Fred_API_key()` function.
        - Requires the list of series identifiers in `series_tickers`.
        - Requires a defined `DATA_DIR` for file operations.
    """
    fname = 'EconomicData.csv'
    file_path = os.path.join(DATA_DIR,fname)
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, parse_dates=["date"])  # Load from CSV
        return df
    else:    
        API_KEY = Fred_API_key()
        df = None
        for series_id in series_tickers:
            temp = get_FredData(series_id,API_KEY,start_date,end_date)
            if temp is not None:
                temp = temp.rename(columns={'value': series_id})
                if df is None:
                    df = temp
                else:
                    df = df.merge(temp, on='date', how = 'inner')
    
    #   Computes derived indicators like credit spread."""
    if 'BAA' in df.columns and 'AAA' in df.columns:
        df['csp'] = df['BAA'] - df['AAA']  # 15 Credit Spread
    return df

In [None]:
start  = '2000-03-01'
end    = '2009-12-31'

GDPC1 = get_FredData(series_id='GDPC1', 
             api_key=Fred_API_key(),
            start=start,end=end, data_freq = 'q')
GDPC1.head()

Lets download GDPC1


Unnamed: 0,date,value
0,2000-01-01,13878.147
1,2000-02-01,13878.147
2,2000-03-01,13878.147
3,2000-04-01,14130.908
4,2000-05-01,14130.908


In [160]:
TB3MS = get_FredData(series_id='INDPRO', 
             api_key=Fred_API_key(),
            start=start,end=end)
TB3MS.head()

Lets download INDPRO


Unnamed: 0,date,value
0,2000-03-01,92.083
1,2000-04-01,92.6659
2,2000-05-01,92.9347
3,2000-06-01,93.0018
4,2000-07-01,92.8373


In [157]:
TB3MS['date'] = pd.to_datetime(TB3MS['date'], errors='coerce')
GDPC1['date'] = pd.to_datetime(GDPC1['date'], errors='coerce')
temp = TB3MS.merge(GDPC1, on='date', how = 'inner')
temp.head()

Unnamed: 0,date,value_x,value_y
0,2000-03-01,5.69,13878.147
1,2000-04-01,5.66,14130.908
2,2000-05-01,5.79,14130.908
3,2000-06-01,5.69,14130.908
4,2000-07-01,5.96,14145.312


In [42]:
def returns(df, col,lag=1):
    """
    Computes lagged returns for a specified column in the DataFrame.

    Parameters:
        df (DataFrame): A Pandas DataFrame containing the data.
        col (str): The column name for which returns are calculated.
        lag (int): The lag period for calculating returns (default is 1).

    Returns:
        Series: A Pandas Series containing the lagged returns, calculated as:
                (current_value / previous_value) - 1.
    """
    return df[col]/df[col].shift(lag) - 1

def rolling_var(data1,N):
    """
    Computes the rolling variance over a specified window size.

    Parameters:
        data1 (Series): A Pandas Series containing the data.
        N (int): The rolling window size.

    Returns:
        Series: A Pandas Series with the rolling variance over the window `N`.
    """
    return data1.rolling(N).var()

def rolling_cov(data1, data2, N):
    """
    Computes the rolling covariance between two data series over a specified window size.

    Parameters:
        data1 (Series): A Pandas Series containing the first data series.
        data2 (Series): A Pandas Series containing the second data series.
        N (int): The rolling window size.

    Returns:
        Series: A Pandas Series with the rolling covariance over the window `N`.
    """
    return data1.rolling(N).cov(data2)

import statsmodels.api as sm

def regress_on_mkt(data, snp500, ticker, window_size=60):
    """
    Perform rolling regression to compute the price delay ratio based on market return lags.

    Parameters:
        data (DataFrame): Input data containing stock and market returns.
        ticker (str): Stock ticker for which price delay is computed.
        window_size (int): Size of the rolling window for regression.

    Returns:
        DataFrame: Original data with added columns for price delay ratios.
    """

    snpTicker = '^GSPC'
    # Check for the required column availability
    if f'{snpTicker}.Ret' not in snp500.columns:
        raise ValueError(f"Column '{snpTicker}.Ret' is missing from the input data.")
    if f'{ticker}.Ret' not in data.columns:
        raise ValueError(f"Column '{ticker}.Ret' is missing from the input data.")
    if f"{ticker}.mom1m" not in data.columns:
        raise ValueError(f"Column '{ticker}.mom1m' is missing from the input data.")
    # Ensure data is properly copied to avoid modifying the original DataFrame
    data1 = data[['date', 'MMM.Ret', 'MMM.mom1m']].copy()
    data1 = data1.merge(snp500[['date',f'{snpTicker}.Ret']], 
                        on='date', how = 'inner')
    # Add market return lags
    for lg in range(1, 5):  # Updated to include lag 4, as it's used in the 4-lag regression
        data1[f'mkt.ret.lag{lg}'] = data1[f'{snpTicker}.Ret'].shift(lg)

    # Drop rows with NaN values
    data1 = data1.dropna()
    # Initialize result storage
    price_delay_series = []
    dates = list(data1['date'])

    # Apply rolling regression
    for i in range(window_size, len(data1)):
        # Select rolling window data
        window_df = data1.iloc[i - window_size:i]
        # 1-lag regression
        X1 = sm.add_constant(window_df["mkt.ret.lag1"])
        y = window_df[f"{ticker}.mom1m"]
        model1 = sm.OLS(y, X1).fit()
        R2_1 = model1.rsquared

        # 4-lag regression
        X4 = sm.add_constant(window_df[['mkt.ret.lag1', 'mkt.ret.lag2', 
                                        'mkt.ret.lag3', 'mkt.ret.lag4']])
        model4 = sm.OLS(y, X4).fit()
        R2_4 = model4.rsquared

        # Compute price delay ratio
        price_delay_ratio = 1 - (R2_1 / R2_4) if R2_4 > 0 else None
        price_delay_series.append((dates[i], price_delay_ratio))

    # Convert results to DataFrame
    price_delay_df = pd.DataFrame(price_delay_series, 
                                  columns=["date", f"{ticker}.pricedelay"])
    # set date as datetime
    price_delay_df['date'] = pd.to_datetime(price_delay_df['date'])
    # Ensure alignment based on dates
    price_delay_df.set_index("date", inplace=True)
    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
    print(price_delay_df.head())
    data.set_index('date', inplace=True)
    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
    print(data.head())
    # Perform a join instead of concat to match dates
    result = data.join(price_delay_df, how="left")

    # Reset index after the join
    result.reset_index(inplace=True)

    return result


def tech_indicators(data, ticker, snp500):
    """
    Calculates various technical indicators for a given ticker and S&P500 market data.

    Parameters:
        data (DataFrame): A DataFrame containing the stock data for the ticker.
        ticker (str): The stock ticker symbol (e.g., 'AAPL').
        snp500 (DataFrame): A DataFrame containing S&P500 data, including market returns.

    Returns:
        DataFrame: A DataFrame with the following additional calculated indicators:
            - {ticker}.M18: 18-period rolling mean of the closing price.
            - {ticker}.M36: 36-period rolling mean of the closing price.
            - {ticker}.S18: 18-period rolling standard deviation of the closing price.
            - {ticker}.S36: 36-period rolling standard deviation of the closing price.
            - {ticker}.E9 or {ticker}.E18: Exponential moving averages with spans of 9 or 18.
            - {ticker}BBU: Upper Bollinger Band (18-period mean + 2 std deviations).
            - {ticker}BBL: Lower Bollinger Band (18-period mean - 2 std deviations).
            - {ticker}.direction: Directional change in the closing price.
            - {ticker}.momXm: Momentum indicators (1, 6, 12, and 36-month lags).
            - {ticker}.chmom: Change in 12-month momentum.
            - {ticker}.dolvol: Dollar volume (Closing price * Volume).
            - {ticker}.var: Rolling variance of the 1-month momentum over 36 periods.
            - {ticker}.cov: Rolling covariance of 1-month momentum with S&P500 returns.
            - {ticker}.beta: Beta (covariance divided by variance).
            - {ticker}.betasq: Beta squared.

    Notes:
        - Bollinger Bands and other indicators are calculated using 18-period metrics.
        - The function calculates beta and its square over a 3-year (36-period) rolling window.

    Example:
        df = tech_indicators(stock_data, 'AAPL', sp500_data)
    """         
    # Compute additional metrics
    df = data.copy()
    close = ticker+'.Close'
    for m in [18,36]:
        df[f'{ticker}.M{m}'] = df[close].rolling(m).mean()
        df[f'{ticker}.S{m}'] = df[close].rolling(m).std()
        m = 9 if m == 18 else 18
        df[f'{ticker}.E{m}'] = df[close].ewm(span=m, adjust=False).mean()
    df[f'{ticker}BBU'] = df[f'{ticker}.M18'] + 2 * df[f'{ticker}.S18']
    df[f'{ticker}BBL'] = df[f'{ticker}.M18'] - 2 * df[f'{ticker}.S18']
    df[f'{ticker}.direction'] = df[close]-df[close].shift(1)
    for m in [1,6,12,36]:
        df[f'{ticker}.mom{m}m'] = returns(df,close,m)
    df[f'{ticker}.chmom'] = df[f'{ticker}.mom1m']-df[f'{ticker}.mom1m'].shift(12)
    df[f'{ticker}.dolvol'] = df[close]*df[f'{ticker}.Volume']
    df[f'{ticker}.var'] = rolling_var(df[f'{ticker}.mom1m'],36) # over 3 year peiod
    df[f'{ticker}.cov'] = rolling_cov(df[f'{ticker}.mom1m'], snp500['^GSPC.Ret'],36) # over 3 year peiod
    df[f'{ticker}.beta'] = df[f'{ticker}.cov']/df[f'{ticker}.var'] # over 3 year peiod
    df[f'{ticker}.betasq'] = df[f'{ticker}.beta']*df[f'{ticker}.beta']
    # ticker_df['xs_ret'] = ticker_df['mom1m'] - macro['DGS10']
    # ticker_df = pd.concat([ticker_df,snp500], axis=1)
    return regress_on_mkt(df,snp500, ticker, window_size=36)

In [5]:
def fetch_yahoo_data(ticker= 'MMM', START_DATE='2000-03-01', END_DATE = '2015-12-31',
                     technicals=False, snp500=None):
    """
    Fetch monthly stock market data from Yahoo Finance and compute additional metrics.

    Parameters:
        ticker (str): The stock ticker symbol (e.g., 'AAPL').
        START_DATE (str): Start date for the data in 'YYYY-MM-DD' format.
        END_DATE (str): End date for the data in 'YYYY-MM-DD' format.

    Returns:
        DataFrame: A Pandas DataFrame containing the following columns:
            - date: The date of the data point.
            - {ticker}: Adjusted closing price for the ticker.
            - {ticker}.M18: 18-month rolling mean of adjusted closing price.
            - {ticker}.M36: 36-month rolling mean of adjusted closing price.
            - {ticker}.S18: 18-month rolling standard deviation.
            - {ticker}.S36: 36-month rolling standard deviation.
            - {ticker}BBU: Upper Bollinger Band (mean + 2 standard deviations).
            - {ticker}BBL: Lower Bollinger Band (mean - 2 standard deviations).
            - {ticker}.E12: 12-period exponential moving average.
            - {ticker}.E26: 26-period exponential moving average.
            - {ticker}.Ret: Logarithmic returns.
            - {ticker}.Vol: Rolling annualized volatility (252 trading days).

    Returns None if no data is available.
    """
    try:
        # Download data from Yahoo Finance
        df = yf.download(ticker, start=START_DATE, end=END_DATE, interval="1mo")
        
        if not df.empty:
            # Prepare the DataFrame
            close = ticker+'.Close'
            df.columns = df.columns.droplevel('Ticker')
            new_colz = {old_col: ticker+'.'+old_col for old_col in list(df.columns)}
            df = df.rename(columns = new_colz)
            df =df.reset_index()
            df.rename(columns={"Date": "date"}, inplace=True) 
            df[f'{ticker}.Ret'] = np.log(df[close] / df[close].shift(1))
            df[f'{ticker}.Vol'] = df[f'{ticker}.Ret'].rolling(12).std() * np.sqrt(12) 
            if technicals:
                df = tech_indicators(data=df,ticker=ticker,snp500=snp500)   
            return df
        else:
            print("No data available for the given ticker and date range.")
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [10]:
start_date = '2000-03-01'
end_date = '2015-12-31'
snp500 = fetch_yahoo_data('^GSPC',start_date,end_date)
snp500.head()

[*********************100%***********************]  1 of 1 completed


Price,date,^GSPC.Close,^GSPC.High,^GSPC.Low,^GSPC.Open,^GSPC.Volume,^GSPC.Ret,^GSPC.Vol
0,2000-03-01,1498.579956,1552.869995,1346.619995,1366.420044,26156200000,,
1,2000-04-01,1452.430054,1527.189941,1339.400024,1498.579956,20106460000,-0.03128,
2,2000-05-01,1420.599976,1481.51001,1361.089966,1452.430054,19898300000,-0.022159,
3,2000-06-01,1454.599976,1488.930054,1420.599976,1420.599976,21738300000,0.023652,
4,2000-07-01,1430.829956,1517.319946,1413.890015,1454.599976,19089100000,-0.016476,


In [44]:
mmm = fetch_yahoo_data('MMM',start_date,end_date,technicals=True,snp500=snp500)
mmm.head()

[*********************100%***********************]  1 of 1 completed


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            MMM.pricedelay
date                      
2003-08-01        0.416530
2003-09-01        0.423831
2003-10-01        0.394224
2003-11-01        0.430739
2003-12-01        0.426360
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Price       MMM.Close   MMM.High    MMM.Low   MMM.Open  MMM.Volume   MMM.Ret  \
date                                                                           
2000-03-01  18.523571  19.791592  16.353554  18.235978   104198154       NaN   
2000-04-01  18.118330  20.562866  17.843809  18.693513    82476160 -0.022120   
2000-05-01  17.935314  19.098756  17.412419  18.131399    62180994 -0.010153   
2000-06-01  17.472658  18.498914  16.933215  18.117356    72224527 -0.026134   
2000-07-01  18.959410  19.288339  17.472656  17.472656    57964139  0.081663   

Price       MMM.Vol  MMM.M18  MMM.S18     MMM.E9  ...  MMM.mom1m  MMM.mom6m  \
date                                         

Unnamed: 0,date,MMM.Close,MMM.High,MMM.Low,MMM.Open,MMM.Volume,MMM.Ret,MMM.Vol,MMM.M18,MMM.S18,...,MMM.mom6m,MMM.mom12m,MMM.mom36m,MMM.chmom,MMM.dolvol,MMM.var,MMM.cov,MMM.beta,MMM.betasq,MMM.pricedelay
0,2000-03-01,18.523571,19.791592,16.353554,18.235978,104198154,,,,,...,,,,,1930122000.0,,,,,
1,2000-04-01,18.11833,20.562866,17.843809,18.693513,82476160,-0.02212,,,,...,,,,,1494330000.0,,,,,
2,2000-05-01,17.935314,19.098756,17.412419,18.131399,62180994,-0.010153,,,,...,,,,,1115236000.0,,,,,
3,2000-06-01,17.472658,18.498914,16.933215,18.117356,72224527,-0.026134,,,,...,,,,,1261954000.0,,,,,
4,2000-07-01,18.95941,19.288339,17.472656,17.472656,57964139,0.081663,,,,...,,,,,1098966000.0,,,,,


In [45]:
mmm.tail()

Unnamed: 0,date,MMM.Close,MMM.High,MMM.Low,MMM.Open,MMM.Volume,MMM.Ret,MMM.Vol,MMM.M18,MMM.S18,...,MMM.mom6m,MMM.mom12m,MMM.mom36m,MMM.chmom,MMM.dolvol,MMM.var,MMM.cov,MMM.beta,MMM.betasq,MMM.pricedelay
185,2015-08-01,85.485428,92.161158,80.589891,90.687688,70833578,-0.062716,0.145172,89.705864,6.68654,...,-0.146585,0.01086,0.649054,-0.082864,6055239000.0,0.001727,0.000892,0.516657,0.266935,0.238852
186,2015-09-01,85.856277,88.145457,81.992532,84.469445,78041033,0.004329,0.144677,90.076576,6.238833,...,-0.129073,0.025753,0.648931,0.014583,6700313000.0,0.001727,0.000908,0.525431,0.276078,0.152972
187,2015-10-01,95.206787,96.097022,83.918356,86.122746,65023890,0.103377,0.158535,90.855535,5.931164,...,0.01864,0.048035,0.929124,0.023576,6190716000.0,0.001831,0.001034,0.564675,0.318858,0.137631
188,2015-11-01,94.825272,96.950927,94.231771,95.388479,50716380,-0.004015,0.15356,91.501101,5.676401,...,-0.002598,0.002627,0.850622,-0.045108,4809195000.0,0.001835,0.001045,0.569494,0.324324,0.23083
189,2015-12-01,91.827385,96.734515,89.419539,95.344667,73095691,-0.032125,0.152616,91.929246,5.369322,...,-0.010466,-0.059115,0.743879,-0.063547,6712186000.0,0.0019,0.001085,0.571048,0.326096,0.211456


In [43]:
data = regress_on_mkt(mmm, snp500, 'MMM', window_size=60)
data.head()

KeyError: "['date'] not in index"

In [128]:
macro = get_economic_data(start_date, end_date)
macro.head()

Lets download TB3MS
Lets download DGS10


Unnamed: 0,date,TB3MS,DGS10
0,2000-03-01,5.69,6.26
1,2000-04-01,5.66,5.99
2,2000-05-01,5.79,6.44
3,2000-06-01,5.69,6.1
4,2000-07-01,5.96,6.05


In [126]:
macro.set_index("date", inplace=True)
snp500.set_index("date", inplace=True)
# Perform a join instead of concat to match dates
result = snp500.join(macro, how="left")
result.head()

Unnamed: 0_level_0,^GSPC.Close,^GSPC.High,^GSPC.Low,^GSPC.Open,^GSPC.Volume,^GSPC.Ret,^GSPC.Vol,TB3MS,DGS10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-03-01,1498.579956,1552.869995,1346.619995,1366.420044,26156200000,,,5.69,6.26
2000-04-01,1452.430054,1527.189941,1339.400024,1498.579956,20106460000,-0.03128,,5.66,5.99
2000-05-01,1420.599976,1481.51001,1361.089966,1452.430054,19898300000,-0.022159,,5.79,6.44
2000-06-01,1454.599976,1488.930054,1420.599976,1420.599976,21738300000,0.023652,,5.69,6.1
2000-07-01,1430.829956,1517.319946,1413.890015,1454.599976,19089100000,-0.016476,,5.96,6.05


In [114]:
data = fetch_yahoo_data(ticker='AAPL',technicals=True,snp500=snp500)
data.head()

[*********************100%***********************]  1 of 1 completed


Price,date,AAPL.Close,AAPL.High,AAPL.Low,AAPL.Open,AAPL.Volume,AAPL.Ret,AAPL.Vol,AAPL.M18,AAPL.S18,...,AAPL.mom1m,AAPL.mom6m,AAPL.mom12m,AAPL.mom36m,AAPL.chmom,AAPL.dolvol,AAPL.var,AAPL.cov,AAPL.beta,AAPL.betasq
0,2000-03-01,1.021772,1.131331,0.857667,0.891993,8698356800,,,,,...,,,,,,8887737000.0,,,,
1,2000-04-01,0.933372,1.049515,0.789017,1.019421,8662404800,-0.09049,,,,...,-0.086516,,,,,8085246000.0,,,,
2,2000-05-01,0.631966,0.949829,0.615038,0.939484,9807750400,-0.389969,,,,...,-0.322922,,,,,6198160000.0,,,,
3,2000-06-01,0.788076,0.867072,0.604693,0.615038,8105204800,0.22076,,,,...,0.247024,,,,,6387520000.0,,,,
4,2000-07-01,0.764566,0.912212,0.705319,0.784315,5746770400,-0.030287,,,,...,-0.029833,,,,,4393783000.0,,,,


In [105]:
ticker='AAPL'
# Remove the second level ('Ticker') from the MultiIndex columns
df = data.copy()
close = ticker+'.Close'

for m in [18,36]:
    df[f'{ticker}.M{m}'] = df[close].rolling(m).mean()
    df[f'{ticker}.S{m}'] = df[close].rolling(m).std()
    m = 9 if m == 18 else 18
    df[f'{ticker}.E{m}'] = df[close].ewm(span=m, adjust=False).mean()
df[f'{ticker}BBU'] = df[f'{ticker}.M18'] + 2 * df[f'{ticker}.S18']
df[f'{ticker}BBL'] = df[f'{ticker}.M18'] - 2 * df[f'{ticker}.S18']
df[f'{ticker}.direction'] = df[close]-df[close].shift(1)
for m in [1,6,12,36]:
    df[f'{ticker}.mom{m}m'] = returns(df,close,m)
df[f'{ticker}.chmom'] = df[f'{ticker}.mom1m']-df[f'{ticker}.mom1m'].shift(12)
df[f'{ticker}.dolvol'] = df[close]*df[f'{ticker}.Volume']
df[f'{ticker}.var'] = rolling_var(df[f'{ticker}.mom1m'],36) # over 3 year peiod
df[f'{ticker}.cov'] = rolling_cov(df[f'{ticker}.mom1m'], snp500['^GSPC.Ret'],36) # over 3 year peiod
df[f'{ticker}.beta'] = df[f'{ticker}.cov']/df[f'{ticker}.var'] # over 3 year peiod
df[f'{ticker}.betasq'] = df[f'{ticker}.beta']*df[f'{ticker}.beta']
df.head()

Price,date,AAPL.Close,AAPL.High,AAPL.Low,AAPL.Open,AAPL.Volume,AAPL.Ret,AAPL.Vol,AAPL.M18,AAPL.S18,...,AAPL.mom1m,AAPL.mom6m,AAPL.mom12m,AAPL.mom36m,AAPL.chmom,AAPL.dolvol,AAPL.var,AAPL.cov,AAPL.beta,AAPL.betasq
0,2000-03-01,1.021772,1.131331,0.857667,0.891993,8698356800,,,,,...,,,,,,8887737000.0,,,,
1,2000-04-01,0.933372,1.049515,0.789017,1.019421,8662404800,-0.09049,,,,...,-0.086516,,,,,8085246000.0,,,,
2,2000-05-01,0.631966,0.949829,0.615038,0.939484,9807750400,-0.389969,,,,...,-0.322922,,,,,6198160000.0,,,,
3,2000-06-01,0.788076,0.867072,0.604693,0.615038,8105204800,0.22076,,,,...,0.247024,,,,,6387520000.0,,,,
4,2000-07-01,0.764566,0.912212,0.705319,0.784315,5746770400,-0.030287,,,,...,-0.029833,,,,,4393783000.0,,,,


In [96]:
df1 = regress_on_mkt(df,snp500, ticker='AAPL', window_size=36)
df1.head()

Unnamed: 0,date,AAPL.Close,AAPL.High,AAPL.Low,AAPL.Open,AAPL.Volume,AAPL.Ret,AAPL.Vol,AAPL.M18,AAPL.M36,...,AAPL.mom6m,AAPL.mom12m,AAPL.mom36m,AAPL.chmom,AAPL.dolvol,AAPL.var,AAPL.cov,AAPL.beta,AAPL.betasq,pricedelay
0,2000-03-01,1.021772,1.131331,0.857667,0.891993,8698356800,,,,,...,,,,,8887737000.0,,,,,
1,2000-04-01,0.933372,1.049515,0.789017,1.019421,8662404800,-0.09049,,,,...,,,,,8085246000.0,,,,,
2,2000-05-01,0.631966,0.949829,0.615038,0.939484,9807750400,-0.389969,,,,...,,,,,6198160000.0,,,,,
3,2000-06-01,0.788076,0.867072,0.604693,0.615038,8105204800,0.22076,,,,...,,,,,6387520000.0,,,,,
4,2000-07-01,0.764566,0.912212,0.705319,0.784315,5746770400,-0.030287,,,,...,,,,,4393783000.0,,,,,


In [None]:
ticker = 'MMM'
close = 'Close'+'.'+ticker
ticker_df = data_df.copy()
ticker_df['direction'] = ticker_df[close]-ticker_df[close].shift(1)
ticker_df['mom1m'] = returns(ticker_df,close,1)
ticker_df['mom6m'] = returns(ticker_df,close,6)
ticker_df['mom12m'] = returns(ticker_df,close,12)
ticker_df['mom36m'] = returns(ticker_df,close,36)
ticker_df['chmom'] = ticker_df['mom1m']-ticker_df['mom1m'].shift(12)
ticker_df['dolvol'] = ticker_df[close]*ticker_df['Volume'+'.'+ticker]
ticker_df['var'] = rolling_var(ticker_df['mom1m'],36) # over 3 year peiod
ticker_df['cov'] = rolling_cov(ticker_df['mom1m'], snp500['returns'],36) # over 3 year peiod
ticker_df['beta'] = ticker_df['cov']/ticker_df['var'] # over 3 year peiod
ticker_df['betasq'] = ticker_df['beta']*ticker_df['beta'] # over 3 year peiod'''
ticker_df = regress_on_mkt(ticker_df)
ticker_df.head()

TypeError: 'method' object is not subscriptable

In [31]:
data.info["marketCap"]

TypeError: 'method' object is not subscriptable

In [35]:
ticker.quarterly_balance_sheet.index

Index(['Treasury Shares Number', 'Ordinary Shares Number', 'Share Issued',
       'Total Debt', 'Tangible Book Value', 'Invested Capital',
       'Working Capital', 'Net Tangible Assets', 'Capital Lease Obligations',
       'Common Stock Equity', 'Total Capitalization',
       'Total Equity Gross Minority Interest', 'Stockholders Equity',
       'Gains Losses Not Affecting Retained Earnings',
       'Other Equity Adjustments', 'Foreign Currency Translation Adjustments',
       'Minimum Pension Liabilities', 'Treasury Stock', 'Retained Earnings',
       'Additional Paid In Capital', 'Capital Stock', 'Common Stock',
       'Preferred Stock', 'Total Liabilities Net Minority Interest',
       'Total Non Current Liabilities Net Minority Interest',
       'Other Non Current Liabilities', 'Employee Benefits',
       'Non Current Pension And Other Postretirement Benefit Plans',
       'Long Term Debt And Capital Lease Obligation',
       'Long Term Capital Lease Obligation', 'Long Term Debt',


In [36]:
ticker.balance_sheet

Unnamed: 0,2024-12-31,2023-12-31,2022-12-31,2021-12-31,2020-12-31
Treasury Shares Number,43180265.0,43180265.0,39528515.0,33055027.0,
Ordinary Shares Number,147527329.0,147527327.0,151179079.0,157652567.0,
Share Issued,190577214.0,190577212.0,190577214.0,190577214.0,
Total Debt,216700000.0,155200000.0,366900000.0,219000000.0,
Tangible Book Value,800700000.0,874300000.0,780100000.0,839600000.0,
...,...,...,...,...,...
Allowance For Doubtful Accounts Receivable,-12900000.0,-10100000.0,-9500000.0,-9500000.0,
Gross Accounts Receivable,554300000.0,606100000.0,590700000.0,643900000.0,
Cash Cash Equivalents And Short Term Investments,276100000.0,363400000.0,481800000.0,631400000.0,
Other Short Term Investments,36500000.0,23500000.0,90600000.0,188100000.0,


In [None]:
API_KEY = "YDHYBF65R207CFK6"
# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = 'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=' + 'IBM' + '&apikey=' + API_KEY
r = requests.get(url)
data = r.json()

print(data)

KeyError: 'Mera Data'

In [57]:
for k in data['Monthly Time Series'].keys():
    print(data['Monthly Time Series'][k])

{'1. open': '254.7350', '2. high': '266.4500', '3. low': '243.0400', '4. close': '245.8000', '5. volume': '43721821'}
{'1. open': '252.4000', '2. high': '265.7200', '3. low': '246.5400', '4. close': '252.4400', '5. volume': '94123851'}
{'1. open': '221.8200', '2. high': '261.8000', '3. low': '214.6100', '4. close': '255.7000', '5. volume': '92424171'}
{'1. open': '227.5000', '2. high': '239.3500', '3. low': '217.6523', '4. close': '219.8300', '5. volume': '81535689'}
{'1. open': '207.7700', '2. high': '230.3600', '3. low': '204.0700', '4. close': '227.4100', '5. volume': '77280587'}
{'1. open': '220.6300', '2. high': '237.3700', '3. low': '203.5100', '4. close': '206.7200', '5. volume': '105624376'}
{'1. open': '201.9100', '2. high': '224.1500', '3. low': '199.3350', '4. close': '221.0800', '5. volume': '83415811'}
{'1. open': '192.8100', '2. high': '202.1700', '3. low': '181.8100', '4. close': '202.1300', '5. volume': '65453729'}
{'1. open': '173.4500', '2. high': '196.2600', '3. low'

In [64]:
date = []
open = []
high = []
low = []
close = []
volume = []

df = {}

for key in data['Monthly Time Series'].keys():
    date.append(key)
    open.append(data['Monthly Time Series'][key]['1. open'])
    high.append(data['Monthly Time Series'][key]['2. high'])
    low.append(data['Monthly Time Series'][key]['3. low'])
    close.append(data['Monthly Time Series'][key]['4. close'])
    volume.append(data['Monthly Time Series'][key]['5. volume'])

df = {
    'date': date,
    'open': open,
    'high': high,
    'low': low,
    'close': close,
    'volume': volume
}
df = pd.DataFrame(df)
print(df.head())

         date      open      high       low     close    volume
0  2025-03-13  254.7350  266.4500  243.0400  245.8000  43721821
1  2025-02-28  252.4000  265.7200  246.5400  252.4400  94123851
2  2025-01-31  221.8200  261.8000  214.6100  255.7000  92424171
3  2024-12-31  227.5000  239.3500  217.6523  219.8300  81535689
4  2024-11-29  207.7700  230.3600  204.0700  227.4100  77280587


In [76]:
symbol = 'AAPL'
#api-key

url = 'https://www.alphavantage.co/query?function=OVERVIEW&symbol='+ 'symbol'+'&apikey='+ API_KEY
r = requests.get(url)
if r.status_code == 200:
    print('Lets download it...')
    data = r.json()
    print(data)

Lets download it...
{}


In [None]:
print(data['Meta Data'])

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import alpha_vantage as av
#from alpha_vantage.sectorperformance import SectorPerformances

In [6]:
import requests
import pandas as pd

API_KEY = "YDHYBF65R207CFK6"  # Replace with your actual API key

# Define the API endpoint
url = "https://www.alphavantage.co/query"

# Parameters for sector performance data
params = {
    "function": "SECTOR",
    "apikey": API_KEY
}

# Make the request
response = requests.get(url, params=params)
data = response.json()

print(data)

{}


In [None]:
# Convert JSON data to a pandas DataFrame
df = pd.DataFrame.from_dict(data, orient="index").T

df.head()


In [11]:
import pandas as pd
import fredapi

def get_FredData0(series_id, api_key, start= None,end=None, data_freq = 'm'):
    fred = fredapi.Fred(api_key)
    df = fred.get_series(series_id)
    df["date"] = df.index.strftime("%Y%m").astype(int)
    return df

api_key='12e72fe8659b5ba78d8dd6f1bb8d97ee'
data = get_FredData0("NCBEILQ027S", api_key,)  # Replace with the correct series ID
data

1945-10-01 00:00:00                                             103694.0
1946-01-01 00:00:00                                                  NaN
1946-04-01 00:00:00                                                  NaN
1946-07-01 00:00:00                                                  NaN
1946-10-01 00:00:00                                              97302.0
                                             ...                        
2023-10-01 00:00:00                                           51259502.0
2024-01-01 00:00:00                                           55901810.0
2024-04-01 00:00:00                                           57297715.0
2024-07-01 00:00:00                                           60898170.0
date                   Index([194510, 194601, 194604, 194607, 194610,...
Length: 317, dtype: object

In [2]:
import pandas as pd

url = "https://www.hec.unil.ch/agoyal/docs/PredictorData2022.xlsx"  # Update the URL if needed
df = pd.read_excel(url, sheet_name="Monthly")  # or "Annual" for annual data
print(df.head())

URLError: <urlopen error [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond>

In [12]:
data = pd.read_csv('datashare.csv')
data.head()

Unnamed: 0,permno,DATE,mvel1,beta,betasq,chmom,dolvol,idiovol,indmom,mom1m,...,stdcf,ms,baspread,ill,maxret,retvol,std_dolvol,std_turn,zerotrade,sic2
0,10006,19570131,82249.0,1.122846,1.260784,0.04718,9.569953,0.025742,0.046433,0.044843,...,,,0.013234,9.411565e-08,0.015453,0.008058,0.355638,0.46042,1.120996e-07,37.0
1,10014,19570131,3903.375,0.426734,0.182102,-0.275641,6.237836,0.072103,0.046433,-0.086957,...,,,0.033305,6.610609e-06,0.047619,0.033495,1.152126,1.16961,9.229146e-08,
2,10022,19570131,9273.25,1.066449,1.137313,-0.02549,7.008844,0.027648,0.046433,-0.060377,...,,,0.016023,2.286832e-06,0.020833,0.015589,0.815777,0.679803,1.181757e-07,
3,10030,19570131,54465.875,0.926038,0.857547,0.018171,9.825337,0.0217,0.046433,0.044633,...,,,0.015295,1.464273e-07,0.039326,0.015849,0.739302,1.333656,6.126699e-08,
4,10057,19570131,40250.0,1.247748,1.556875,0.025785,7.901007,0.025506,0.046433,0.086667,...,,,0.005954,1.380375e-06,0.056856,0.019945,0.75551,0.410391,3.31579,


In [14]:
data.columns

Index(['permno', 'DATE', 'mvel1', 'beta', 'betasq', 'chmom', 'dolvol',
       'idiovol', 'indmom', 'mom1m', 'mom6m', 'mom12m', 'mom36m', 'pricedelay',
       'turn', 'absacc', 'acc', 'age', 'agr', 'bm', 'bm_ia', 'cashdebt',
       'cashpr', 'cfp', 'cfp_ia', 'chatoia', 'chcsho', 'chempia', 'chinv',
       'chpmia', 'convind', 'currat', 'depr', 'divi', 'divo', 'dy', 'egr',
       'ep', 'gma', 'grcapx', 'grltnoa', 'herf', 'hire', 'invest', 'lev',
       'lgr', 'mve_ia', 'operprof', 'orgcap', 'pchcapx_ia', 'pchcurrat',
       'pchdepr', 'pchgm_pchsale', 'pchquick', 'pchsale_pchinvt',
       'pchsale_pchrect', 'pchsale_pchxsga', 'pchsaleinv', 'pctacc', 'ps',
       'quick', 'rd', 'rd_mve', 'rd_sale', 'realestate', 'roic', 'salecash',
       'saleinv', 'salerec', 'secured', 'securedind', 'sgr', 'sin', 'sp',
       'tang', 'tb', 'aeavol', 'cash', 'chtx', 'cinvest', 'ear', 'nincr',
       'roaq', 'roavol', 'roeq', 'rsup', 'stdacc', 'stdcf', 'ms', 'baspread',
       'ill', 'maxret', 'retvol', '

In [87]:
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.fundamentaldata import FundamentalData

# Replace 'YOUR_API_KEY' with your actual API key
api_key = 'YOUR_API_KEY'
fd = FundamentalData(key=API_KEY, output_format='pandas')

# Replace 'AAPL' with the stock symbol you're interested in
symbol = 'AAPL'
balance_sheet, _ = fd.get_balance_sheet_annual(symbol)

# Display the balance sheet data
print(balance_sheet)

ts = TimeSeries(key=API_KEY, output_format='pandas')

# Get json object with the intraday data and another with  the call's metadata for January, 2014.
data1, meta_data1 = ts.get_intraday('GOOGL', interval='1mo')

                              fiscalDateEnding reportedCurrency   totalAssets  \
date                                                                            
1970-01-01 00:00:00.000000000       2024-09-30              USD  364980000000   
1970-01-01 00:00:00.000000001       2023-09-30              USD  352583000000   
1970-01-01 00:00:00.000000002       2022-09-30              USD  352755000000   
1970-01-01 00:00:00.000000003       2021-09-30              USD  351002000000   
1970-01-01 00:00:00.000000004       2020-09-30              USD  323888000000   
1970-01-01 00:00:00.000000005       2019-09-30              USD  338516000000   
1970-01-01 00:00:00.000000006       2018-09-30              USD  365725000000   
1970-01-01 00:00:00.000000007       2017-09-30              USD  375319000000   
1970-01-01 00:00:00.000000008       2016-09-30              USD  321686000000   
1970-01-01 00:00:00.000000009       2015-09-30              USD  290345000000   
1970-01-01 00:00:00.00000001

ValueError: Invalid API call. Please retry or visit the documentation (https://www.alphavantage.co/documentation/) for TIME_SERIES_INTRADAY.

In [89]:
overview, _ = fd.get_company_overview(symbol)
overview['MarketCapitalization']


None    3259495350000
Name: MarketCapitalization, dtype: object

In [90]:
# Replace 'YOUR_API_KEY' with your actual API key
api_key = API_KEY
ts = TimeSeries(key=api_key, output_format='pandas')
fd = FundamentalData(key=api_key, output_format='pandas')

# Replace 'AAPL' with the stock symbol you're interested in
symbol = 'AAPL'

# Fetch historical stock prices
stock_data, _ = ts.get_daily_adjusted(symbol=symbol, outputsize='full')

# Fetch the latest shares outstanding
overview, _ = fd.get_company_overview(symbol)
shares_outstanding = int(overview['SharesOutstanding'])

# Calculate historical market capitalization
stock_data['MarketCapitalization'] = stock_data['adjusted close'] * shares_outstanding

# Filter data for the desired time period
start_date = '1990-01-01'
end_date = '2000-01-01'
historical_market_cap = stock_data.loc[start_date:end_date, ['MarketCapitalization']]

print(historical_market_cap)


ValueError: Thank you for using Alpha Vantage! This is a premium endpoint. You may subscribe to any of the premium plans at https://www.alphavantage.co/premium/ to instantly unlock all premium endpoints