In [10]:
%pip install yfinance pandas
import yfinance as yf
import pandas as pd

# First let's play around with yfinance to suss out how we'll source the data

In [54]:
# https://github.com/ranaroussi/yfinance
goog = yf.Ticker('GOOG')
goog.info['previousClose']

105.98

In [47]:
goog = yf.download('GOOG', start='2022-01-01', end='2023-05-03')
goog

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-03,144.475494,145.550003,143.502502,145.074493,145.074493,25214000
2022-01-04,145.550507,146.610001,143.816147,144.416504,144.416504,22928000
2022-01-05,144.181000,144.298004,137.523499,137.653503,137.653503,49642000
2022-01-06,137.497498,139.686005,136.763504,137.550995,137.550995,29050000
2022-01-07,137.904999,138.254745,135.789001,137.004501,137.004501,19408000
...,...,...,...,...,...,...
2023-04-26,105.559998,107.019997,103.269997,104.449997,104.449997,37068200
2023-04-27,105.230003,109.150002,104.419998,108.370003,108.370003,38235200
2023-04-28,107.800003,108.290001,106.040001,108.220001,108.220001,23957900
2023-05-01,107.720001,108.680000,107.500000,107.709999,107.709999,20926300


In [48]:
condition = goog.index.to_series().eq('2023-02-03') #could also .between('2023-02-01', '2023-02-03')
goog[condition]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-02-03,103.510002,108.019997,103.300003,105.220001,105.220001,36823400


# Let's compose a simple market data tabular dataset for a set tickers that have an assumed price relationship

In [81]:
tickers = ['DJIA', 'NDX', 'VIXY', 'SPY', 'XLK', 'QQQ', 'GOOG', 'AAPL', 'MSFT', 'META', 'NFLX']
columns = ['Open', 'High', 'Low', 'Close', 'Volume']
dfs = []
for ticker in tickers:
    print('Processing %s' % ticker)
    df = yf.Ticker(ticker).history(period='1y')
    df = df[columns]
    df = df.add_prefix(ticker+'_')
    dfs.append(df)

Processing DJIA
Processing NDX
Processing VIXY
Processing SPY
Processing XLK
Processing QQQ
Processing GOOG
Processing AAPL
Processing MSFT
Processing META
Processing NFLX


In [87]:
from functools import reduce
market_data = reduce(lambda df1,df2: pd.merge(df1,df2,on='Date'), dfs)
market_data.reset_index(inplace=True)
market_data

Unnamed: 0,Date,DJIA_Open,DJIA_High,DJIA_Low,DJIA_Close,DJIA_Volume,NDX_Open,NDX_High,NDX_Low,NDX_Close,...,META_Open,META_High,META_Low,META_Close,META_Volume,NFLX_Open,NFLX_High,NFLX_Low,NFLX_Close,NFLX_Volume
0,2022-05-03 00:00:00-04:00,22.370368,22.370368,22.125535,22.198078,16000,13045.349609,13178.740234,12982.700195,13089.900391,...,210.449997,214.899994,208.089996,212.029999,41556300,198.289993,203.199997,194.770004,199.869995,11839400
1,2022-05-04 00:00:00-04:00,22.433847,22.787492,22.216219,22.705883,54900,13110.299805,13556.669922,12887.530273,13535.709961,...,210.649994,224.300003,206.960007,223.410004,41375900,197.649994,204.380005,191.009995,204.009995,12218800
2,2022-05-05 00:00:00-04:00,22.769355,22.769355,21.944181,22.207148,45800,13344.790039,13344.790039,12710.589844,12850.549805,...,218.740005,220.169998,206.160004,208.279999,41129200,200.449997,200.710007,186.000000,188.320007,13971300
3,2022-05-06 00:00:00-04:00,22.306895,22.306895,21.822672,22.062063,55500,12776.860352,12926.580078,12522.769531,12693.530273,...,207.339996,209.380005,201.020004,203.770004,34747200,186.750000,187.210007,175.809998,180.970001,17185500
4,2022-05-09 00:00:00-04:00,22.189012,22.189012,21.681213,21.681213,33100,12465.910156,12542.320312,12136.190430,12187.719727,...,199.839996,202.570007,195.580002,196.210007,36303200,177.660004,183.199997,172.300003,173.100006,14031300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,2023-04-26 00:00:00-04:00,21.500000,21.684999,21.500000,21.580000,27100,12866.639648,12929.620117,12783.419922,12806.480469,...,212.500000,214.110001,208.880005,209.399994,41992700,321.359985,325.899994,320.470001,321.149994,4623200
247,2023-04-27 00:00:00-04:00,21.540001,21.796000,21.525000,21.709999,105600,12963.209961,13175.620117,12938.500000,13160.030273,...,239.889999,241.690002,236.770004,238.559998,71196500,324.299988,327.450012,317.440002,325.850006,5618800
248,2023-04-28 00:00:00-04:00,21.889999,21.889999,21.709999,21.820000,50100,13139.349609,13247.389648,13096.940430,13245.990234,...,239.009995,240.429993,235.750000,240.320007,39554000,325.239990,330.809998,324.000000,329.929993,4221900
249,2023-05-01 00:00:00-04:00,21.850000,21.889000,21.820000,21.870001,19900,13229.110352,13287.469727,13186.400391,13231.469727,...,238.619995,244.000000,236.460007,243.179993,29143900,329.440002,331.230011,318.089996,324.119995,5341500


In [89]:
market_data.to_csv('market_data.csv', index=False)