In [1]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import requests
from io import BytesIO
from zipfile import ZipFile, BadZipFile

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from sklearn.datasets import fetch_openml

pd.set_option('display.expand_frame_repr', False)

[learn how to download all the files here](https://github.com/PacktPublishing/Machine-Learning-for-Algorithmic-Trading-Second-Edition/blob/b662d5f933b48f2f02af62a23365e835e6334436/data/create_datasets.ipynb)

In [2]:
DATA_STORE = '../data/assets.h5'

## Quandl Wiki Prices

In [3]:
quandl_wiki_prices = pd.read_csv(
    "../data/wiki_prices.csv",
    parse_dates=["date"],
    index_col=["date", "ticker"],
    infer_datetime_format=True,
).sort_index()

## Store result

In [4]:
us_equities = pd.read_csv('../data/us_equities_meta_data.csv')

## S&P 500 Prices

The following code downloads historical S&P 500 prices from FRED (only last 10 years of daily data is freely available)

In [5]:
sp_500_prices_df = web.DataReader(name="SP500", data_source="fred", start=2009).squeeze().to_frame("close")
sp_500_prices_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2610 entries, 2014-02-03 to 2024-02-02
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   close   2518 non-null   float64
dtypes: float64(1)
memory usage: 40.8 KB


In [6]:
sp500_stooq = pd.read_csv("^spx_d.csv", index_col=0, parse_dates=True).loc["1950":"2019"].rename(columns=str.lower)
sp500_stooq.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17700 entries, 1950-01-03 to 2019-12-31
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    17700 non-null  float64
 1   high    17700 non-null  float64
 2   low     17700 non-null  float64
 3   close   17700 non-null  float64
 4   volume  17700 non-null  float64
dtypes: float64(5)
memory usage: 829.7 KB


In [7]:
with pd.HDFStore(DATA_STORE) as store:
    store.put("quandl/wiki/prices", quandl_wiki_prices)
    store.put("us_equities/stocks", us_equities.set_index("ticker"))
    store.put("sp500/fred", sp_500_prices_df)
    store.put("sp500/stooq", sp500_stooq)