In [1]:
import yfinance as yf
from hurst import compute_Hc
import pandas as pd
import warnings
import pandas as pd

# Filter out the specific warning
warnings.filterwarnings("ignore", category=FutureWarning, module='yfinance')

# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
symbols = tickers.Symbol.to_list()
symbols

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AES',
 'AFL',
 'A',
 'APD',
 'ABNB',
 'AKAM',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'AON',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'AXON',
 'BKR',
 'BALL',
 'BAC',
 'BK',
 'BBWI',
 'BAX',
 'BDX',
 'BRK.B',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BX',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'BLDR',
 'BG',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'COR',
 'CNC',
 'CNP',
 'CF',
 'CHRW',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA',
 'CMA'

In [2]:
intervals = [
    '1m',    # max 7 days
    '2m', '5m', '15m', '30m',  # max 60 days
    '1h',    # max 730 days
    '1d', '5d', '1wk', '1mo', '3mo'  # max infinite
]

# List of intervals
# intervals = ['1d']

# Create an empty list to store the results
results = []

# Initialize the total number of iterations
total_iterations = len(symbols) * len(intervals)
current_iteration = 0

# Iterate over each symbol and interval
for symbol in symbols:
    for interval in intervals:
        current_iteration += 1
        
        # Determine the maximum period based on the interval
        if interval == '1m':
            period = '7d'
        elif interval in ['2m', '5m', '15m', '30m']:
            period = '60d'
        elif interval == '1h':
            period = '730d'
        else:
            period = 'max'
        
        # Download stock data from Yahoo Finance
        data = yf.download(symbol, period=period, interval=interval, progress=False)
        
        # Number of data points
        num_data_points = len(data)
        
        # Progress
        print(f"({current_iteration}/{total_iterations}) Downloaded {num_data_points} data points for ${symbol} ({interval})")
        
        # Ensure there is enough data to compute the Hurst exponent
        if num_data_points < 100:
            print(f"Less than 100 data points for {symbol} ({interval})")
            continue
        
        # Extract the closing prices and volume
        close_prices = data['Close'].values
        volume = data['Volume'].values
        
        # Compute the Hurst exponent
        H = 0
        deviation = 0
        try:
            H, c, input_data = compute_Hc(close_prices)
            deviation = abs(H - 0.5)
        except Exception as e:
            print(f"Error computing Hurst exponent for {symbol} ({interval}): {e}")
            H = None
        
        # Calculate the average volume
        avg_volume = volume.mean()
        
        # Append the result to the list
        results.append((symbol, interval, H, deviation, avg_volume, num_data_points))

# Create a DataFrame from the results
df = pd.DataFrame(results, columns=['Symbol', 'Interval', 'Hurst Exponent', 'Deviation', 'Average Volume', 'Data Points'])

# Sort the DataFrame by the number of data points and then by the 'Deviation' column in descending order
df_sorted = df.sort_values(by=['Data Points', 'Deviation'], ascending=[False, False])

(1/5533) Downloaded 2355 data points for $MMM (1m)
(2/5533) Downloaded 6627 data points for $MMM (2m)
(3/5533) Downloaded 4606 data points for $MMM (5m)
(4/5533) Downloaded 1536 data points for $MMM (15m)
(5/5533) Downloaded 768 data points for $MMM (30m)
(6/5533) Downloaded 5088 data points for $MMM (1h)
(7/5533) Downloaded 15721 data points for $MMM (1d)
(8/5533) Downloaded 3144 data points for $MMM (5d)
(9/5533) Downloaded 3259 data points for $MMM (1wk)
(10/5533) Downloaded 749 data points for $MMM (1mo)
(11/5533) Downloaded 250 data points for $MMM (3mo)
(12/5533) Downloaded 1842 data points for $AOS (1m)
(13/5533) Downloaded 6270 data points for $AOS (2m)
(14/5533) Downloaded 4605 data points for $AOS (5m)
(15/5533) Downloaded 1536 data points for $AOS (15m)
(16/5533) Downloaded 768 data points for $AOS (30m)
(17/5533) Downloaded 5088 data points for $AOS (1h)
(18/5533) Downloaded 10260 data points for $AOS (1d)
(19/5533) Downloaded 2052 data points for $AOS (5d)
(20/5533) Downlo


1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')


(683/5533) Downloaded 0 data points for $BRK.B (1m)
Less than 100 data points for BRK.B (1m)



1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')


(684/5533) Downloaded 0 data points for $BRK.B (2m)
Less than 100 data points for BRK.B (2m)



1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')


(685/5533) Downloaded 0 data points for $BRK.B (5m)
Less than 100 data points for BRK.B (5m)



1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')


(686/5533) Downloaded 0 data points for $BRK.B (15m)
Less than 100 data points for BRK.B (15m)



1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')


(687/5533) Downloaded 0 data points for $BRK.B (30m)
Less than 100 data points for BRK.B (30m)



1 Failed download:
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')

1 Failed download:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')


(688/5533) Downloaded 0 data points for $BRK.B (1h)
Less than 100 data points for BRK.B (1h)
(689/5533) Downloaded 0 data points for $BRK.B (1d)
Less than 100 data points for BRK.B (1d)
(690/5533) Downloaded 0 data points for $BRK.B (5d)
Less than 100 data points for BRK.B (5d)
(691/5533) Downloaded 0 data points for $BRK.B (1wk)
Less than 100 data points for BRK.B (1wk)
(692/5533) Downloaded 0 data points for $BRK.B (1mo)
Less than 100 data points for BRK.B (1mo)
(693/5533) Downloaded 0 data points for $BRK.B (3mo)
Less than 100 data points for BRK.B (3mo)
(694/5533) Downloaded 2359 data points for $BBY (1m)
(695/5533) Downloaded 6629 data points for $BBY (2m)
(696/5533) Downloaded 4606 data points for $BBY (5m)
(697/5533) Downloaded 1536 data points for $BBY (15m)
(698/5533) Downloaded 768 data points for $BBY (30m)
(699/5533) Downloaded 5088 data points for $BBY (1h)
(700/5533) Downloaded 9869 data points for $BBY (1d)
(701/5533) Downloaded 1976 data points for $BBY (5d)
(702/5533) 


1 Failed download:
['BF.B']: Exception("%ticker%: Period '7d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")

1 Failed download:
['BF.B']: Exception("%ticker%: Period '60d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")


(858/5533) Downloaded 159 data points for $BRO (3mo)
(859/5533) Downloaded 0 data points for $BF.B (1m)
Less than 100 data points for BF.B (1m)
(860/5533) Downloaded 0 data points for $BF.B (2m)
Less than 100 data points for BF.B (2m)



1 Failed download:
['BF.B']: Exception("%ticker%: Period '60d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")

1 Failed download:
['BF.B']: Exception("%ticker%: Period '60d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")

1 Failed download:
['BF.B']: Exception("%ticker%: Period '60d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")

1 Failed download:
['BF.B']: Exception("%ticker%: Period '730d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']")


(861/5533) Downloaded 0 data points for $BF.B (5m)
Less than 100 data points for BF.B (5m)
(862/5533) Downloaded 0 data points for $BF.B (15m)
Less than 100 data points for BF.B (15m)
(863/5533) Downloaded 0 data points for $BF.B (30m)
Less than 100 data points for BF.B (30m)
(864/5533) Downloaded 0 data points for $BF.B (1h)
Less than 100 data points for BF.B (1h)



1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1d 1925-07-08 -> 2024-06-14)')

1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (5d 1925-07-08 -> 2024-06-14)')

1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1wk 1925-07-08 -> 2024-06-14)')


(865/5533) Downloaded 0 data points for $BF.B (1d)
Less than 100 data points for BF.B (1d)
(866/5533) Downloaded 0 data points for $BF.B (5d)
Less than 100 data points for BF.B (5d)
(867/5533) Downloaded 0 data points for $BF.B (1wk)
Less than 100 data points for BF.B (1wk)



1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1mo 1925-07-08 -> 2024-06-14)')

1 Failed download:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (3mo 1925-07-08 -> 2024-06-14)')


(868/5533) Downloaded 0 data points for $BF.B (1mo)
Less than 100 data points for BF.B (1mo)
(869/5533) Downloaded 0 data points for $BF.B (3mo)
Less than 100 data points for BF.B (3mo)
(870/5533) Downloaded 2299 data points for $BLDR (1m)
(871/5533) Downloaded 6596 data points for $BLDR (2m)
(872/5533) Downloaded 4606 data points for $BLDR (5m)
(873/5533) Downloaded 1536 data points for $BLDR (15m)
(874/5533) Downloaded 768 data points for $BLDR (30m)
(875/5533) Downloaded 5088 data points for $BLDR (1h)
(876/5533) Downloaded 4774 data points for $BLDR (1d)
(877/5533) Downloaded 957 data points for $BLDR (5d)
(878/5533) Downloaded 990 data points for $BLDR (1wk)
(879/5533) Downloaded 228 data points for $BLDR (1mo)
(880/5533) Downloaded 77 data points for $BLDR (3mo)
Less than 100 data points for BLDR (3mo)
(881/5533) Downloaded 2144 data points for $BG (1m)
(882/5533) Downloaded 6566 data points for $BG (2m)
(883/5533) Downloaded 4604 data points for $BG (5m)
(884/5533) Downloaded 15


1 Failed download:
['CEG']: Exception('%ticker%: 1h data not available for startTime=1642602600 and endTime=1718373398. The requested range must be within the last 730 days.')


(1392/5533) Downloaded 0 data points for $CEG (1h)
Less than 100 data points for CEG (1h)
(1393/5533) Downloaded 605 data points for $CEG (1d)
(1394/5533) Downloaded 123 data points for $CEG (5d)
(1395/5533) Downloaded 126 data points for $CEG (1wk)
(1396/5533) Downloaded 29 data points for $CEG (1mo)
Less than 100 data points for CEG (1mo)
(1397/5533) Downloaded 10 data points for $CEG (3mo)
Less than 100 data points for CEG (3mo)
(1398/5533) Downloaded 1943 data points for $COO (1m)
(1399/5533) Downloaded 6351 data points for $COO (2m)
(1400/5533) Downloaded 4599 data points for $COO (5m)
(1401/5533) Downloaded 1536 data points for $COO (15m)
(1402/5533) Downloaded 768 data points for $COO (30m)
(1403/5533) Downloaded 5088 data points for $COO (1h)
(1404/5533) Downloaded 10435 data points for $COO (1d)
(1405/5533) Downloaded 2089 data points for $COO (5d)
(1406/5533) Downloaded 2161 data points for $COO (1wk)
(1407/5533) Downloaded 497 data points for $COO (1mo)
(1408/5533) Downloade


1 Failed download:
['GEV']: Exception('%ticker%: 2m data not available for startTime=1711978200 and endTime=1718373636. The requested range must be within the last 60 days.')


(2345/5533) Downloaded 0 data points for $GEV (2m)
Less than 100 data points for GEV (2m)



1 Failed download:
['GEV']: Exception('%ticker%: 5m data not available for startTime=1711978200 and endTime=1718373636. The requested range must be within the last 60 days.')


(2346/5533) Downloaded 0 data points for $GEV (5m)
Less than 100 data points for GEV (5m)



1 Failed download:
['GEV']: Exception('%ticker%: 15m data not available for startTime=1711978200 and endTime=1718373638. The requested range must be within the last 60 days.')


(2347/5533) Downloaded 0 data points for $GEV (15m)
Less than 100 data points for GEV (15m)



1 Failed download:
['GEV']: Exception('%ticker%: 15m data not available for startTime=1711978200 and endTime=1718373639. The requested range must be within the last 60 days.')


(2348/5533) Downloaded 0 data points for $GEV (30m)
Less than 100 data points for GEV (30m)
(2349/5533) Downloaded 365 data points for $GEV (1h)
(2350/5533) Downloaded 54 data points for $GEV (1d)
Less than 100 data points for GEV (1d)
(2351/5533) Downloaded 11 data points for $GEV (5d)
Less than 100 data points for GEV (5d)
(2352/5533) Downloaded 11 data points for $GEV (1wk)
Less than 100 data points for GEV (1wk)
(2353/5533) Downloaded 3 data points for $GEV (1mo)
Less than 100 data points for GEV (1mo)
(2354/5533) Downloaded 1 data points for $GEV (3mo)
Less than 100 data points for GEV (3mo)
(2355/5533) Downloaded 2356 data points for $GEN (1m)
(2356/5533) Downloaded 6638 data points for $GEN (2m)
(2357/5533) Downloaded 4608 data points for $GEN (5m)
(2358/5533) Downloaded 1537 data points for $GEN (15m)
(2359/5533) Downloaded 769 data points for $GEN (30m)
(2360/5533) Downloaded 2796 data points for $GEN (1h)
(2361/5533) Downloaded 8812 data points for $GEN (1d)
(2362/5533) Downl


1 Failed download:
['O']: IndexError('Boolean index has wrong length: 3 instead of 2')


(4347/5533) Downloaded 0 data points for $O (2m)
Less than 100 data points for O (2m)
(4348/5533) Downloaded 4609 data points for $O (5m)
(4349/5533) Downloaded 1537 data points for $O (15m)
(4350/5533) Downloaded 769 data points for $O (30m)
(4351/5533) Downloaded 5088 data points for $O (1h)
(4352/5533) Downloaded 7467 data points for $O (1d)
(4353/5533) Downloaded 1491 data points for $O (5d)
(4354/5533) Downloaded 1548 data points for $O (1wk)
(4355/5533) Downloaded 356 data points for $O (1mo)
(4356/5533) Downloaded 119 data points for $O (3mo)
(4357/5533) Downloaded 1960 data points for $REG (1m)
(4358/5533) Downloaded 6392 data points for $REG (2m)
(4359/5533) Downloaded 4606 data points for $REG (5m)
(4360/5533) Downloaded 1537 data points for $REG (15m)
(4361/5533) Downloaded 769 data points for $REG (30m)
(4362/5533) Downloaded 5088 data points for $REG (1h)
(4363/5533) Downloaded 7711 data points for $REG (1d)
(4364/5533) Downloaded 1540 data points for $REG (5d)
(4365/5533)


1 Failed download:
['SOLV']: Exception('%ticker%: 2m data not available for startTime=1711978200 and endTime=1718374198. The requested range must be within the last 60 days.')


(4622/5533) Downloaded 0 data points for $SOLV (2m)
Less than 100 data points for SOLV (2m)



1 Failed download:
['SOLV']: Exception('%ticker%: 5m data not available for startTime=1711978200 and endTime=1718374199. The requested range must be within the last 60 days.')


(4623/5533) Downloaded 0 data points for $SOLV (5m)
Less than 100 data points for SOLV (5m)



1 Failed download:
['SOLV']: Exception('%ticker%: 15m data not available for startTime=1711978200 and endTime=1718374200. The requested range must be within the last 60 days.')


(4624/5533) Downloaded 0 data points for $SOLV (15m)
Less than 100 data points for SOLV (15m)



1 Failed download:
['SOLV']: Exception('%ticker%: 15m data not available for startTime=1711978200 and endTime=1718374201. The requested range must be within the last 60 days.')


(4625/5533) Downloaded 0 data points for $SOLV (30m)
Less than 100 data points for SOLV (30m)
(4626/5533) Downloaded 372 data points for $SOLV (1h)
(4627/5533) Downloaded 54 data points for $SOLV (1d)
Less than 100 data points for SOLV (1d)
(4628/5533) Downloaded 11 data points for $SOLV (5d)
Less than 100 data points for SOLV (5d)
(4629/5533) Downloaded 11 data points for $SOLV (1wk)
Less than 100 data points for SOLV (1wk)
(4630/5533) Downloaded 3 data points for $SOLV (1mo)
Less than 100 data points for SOLV (1mo)
(4631/5533) Downloaded 1 data points for $SOLV (3mo)
Less than 100 data points for SOLV (3mo)
(4632/5533) Downloaded 2375 data points for $SO (1m)
(4633/5533) Downloaded 6640 data points for $SO (2m)
(4634/5533) Downloaded 4609 data points for $SO (5m)
(4635/5533) Downloaded 1537 data points for $SO (15m)
(4636/5533) Downloaded 769 data points for $SO (30m)
(4637/5533) Downloaded 5088 data points for $SO (1h)
(4638/5533) Downloaded 10703 data points for $SO (1d)
(4639/5533

In [3]:
# df_sorted.to_csv(f'stock_hurst_exponents_{interval}.csv', index=False)
df_sorted.to_csv(f'stocks_H.csv', index=False)
df_sorted

Unnamed: 0,Symbol,Interval,Hurst Exponent,Deviation,Average Volume,Data Points
2272,GD,1d,0.564147,0.064147,1.249309e+06,15721
2827,JNJ,1d,0.562933,0.062933,5.664510e+06,15721
1309,ED,1d,0.555250,0.055250,8.345521e+05,15721
1232,KO,1d,0.552874,0.052874,9.250407e+06,15721
4014,PG,1d,0.550662,0.050662,5.083022e+06,15721
...,...,...,...,...,...,...
3689,NVDA,3mo,0.614787,0.114787,3.801484e+10,102
2014,FFIV,3mo,0.622959,0.122959,9.372488e+07,101
2351,GS,3mo,0.470735,0.029265,3.126998e+08,101
4344,SBAC,3mo,0.475156,0.024844,6.623527e+07,101


In [6]:
import yfinance as yf

ticker = "GD"

data = yf.download(f"{ticker}", period="max", interval="1d")
data = data.reset_index()
data.to_csv(f'../data/{ticker}.csv', index=False)
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1962-01-02,0.000000,0.753125,0.728125,0.746875,0.174832,2648000
1,1962-01-03,0.000000,0.781250,0.753125,0.771875,0.180684,2092000
2,1962-01-04,0.000000,0.806250,0.771875,0.775000,0.181415,2884000
3,1962-01-05,0.000000,0.793750,0.775000,0.778125,0.182147,1532000
4,1962-01-08,0.000000,0.793750,0.765625,0.790625,0.185073,1676000
...,...,...,...,...,...,...,...
15719,2024-06-13,293.679993,294.570007,290.730011,292.519989,292.519989,686600
15720,2024-06-14,290.700012,292.040009,288.459991,291.779999,291.779999,639900
15721,2024-06-17,291.179993,294.190002,290.660004,293.829987,293.829987,511000
15722,2024-06-18,293.570007,298.049988,293.170013,298.000000,298.000000,748500


In [7]:
import yfinance as yf

ticker = "MRO"

data = yf.download(f"{ticker}", period="max", interval="1d")
data = data.reset_index()
data.to_csv(f'../data/{ticker}.csv', index=False)
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1962-01-02,0.000000,3.274790,3.234975,3.254883,0.349901,84139
1,1962-01-03,0.000000,3.254883,3.234975,3.234975,0.347761,40186
2,1962-01-04,0.000000,3.264837,3.205114,3.225022,0.346691,52744
3,1962-01-05,0.000000,3.244929,3.205114,3.205114,0.344551,55255
4,1962-01-08,0.000000,3.234975,3.185207,3.225022,0.346691,62790
...,...,...,...,...,...,...,...
15719,2024-06-13,27.910000,27.959999,27.480000,27.680000,27.680000,6682100
15720,2024-06-14,27.629999,27.730000,27.230000,27.299999,27.299999,5786100
15721,2024-06-17,27.260000,27.510000,27.059999,27.320000,27.320000,7118000
15722,2024-06-18,27.410000,27.770000,27.250000,27.320000,27.320000,6085900
