# Dataset construction

Download relevant data as CSV and pickle for later reuse.

In [13]:
import urllib.request
import pandas as pd
import pickle
import io

In [14]:
def build_url(ticker: str) -> str:
    """Build an URL for CSV data download for given stock ticker."""
    return f"https://stooq.pl/q/d/l/?s={ticker}&i=d"

def get_csv(ticker: str) -> io.BytesIO:
    """Download CSV data for given stock ticker."""
    url = build_url(ticker)
    with urllib.request.urlopen(url) as response:
        data = response.read().decode("utf-8")
    return io.BytesIO(data.encode("utf-8"))

def parse_data(data: io.BytesIO) -> pd.DataFrame:
    """Parse CSV data into a pandas DataFrame."""
    df = pd.read_csv(data, index_col="Data")
    df.index.name = "date"
    def _rename_col(col):
        if col.lower() == "otwarcie":
            return "open"
        if col.lower() in ["zamknięcie", "zamkniecie"]:
            return "close"
        if col.lower() in ["najwyższy", "najwyzszy"]:
            return "high"
        if col.lower() in ["najniższy", "najnizszy"]:
            return "low"
        if col.lower() == "wolumen":
            return "volume"
        return col.lower()
    df.columns = map(_rename_col, df.columns)
    return df

In [15]:
tickers = {
    "wig20": "wig20",
    "usdpln": "usdpln",
    "10y": "10yply.b",
    "spx": "^spx",
    "dax": "^dax",
    "swig80": "swig80",
}

In [16]:
data = dict()

for ticker in tickers:
    csv = get_csv(tickers[ticker])
    df = parse_data(csv)
    data[ticker] = df

In [17]:
with open("raw_data.bin", "wb") as f:
    pickle.dump(data, f)