In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
from pathlib import Path

data_folder: Path = Path("../data/")

raw_data_folder: Path = data_folder.joinpath("00_raw/")
prod_data_folder: Path = data_folder.joinpath("30_prod/")

aapl_stock_filename: str = "AAPL.csv"
jpm_stock_filename: str = "JPM.csv"

In [None]:
import pandas as pd

aapl_df: pd.DataFrame = pd.read_csv(
    filepath_or_buffer=raw_data_folder.joinpath(aapl_stock_filename),
    sep=",",
)

jpm_df: pd.DataFrame = pd.read_csv(
    filepath_or_buffer=raw_data_folder.joinpath(jpm_stock_filename),
    sep=",",
)

aapl_df.drop(columns=["gvkey"], inplace=True)
aapl_df["datadate"] = pd.to_datetime(aapl_df["datadate"], format="%Y-%m-%d")
aapl_df.rename(
    columns={
        "datadate": "Date",
        "cshtrd": "Volume",
        "prccd": "Close",
        "prchd": "High",
        "prcld": "Low",
        "prcod": "Open",
    },
    inplace=True,
)
aapl_df.Volume = aapl_df.Volume.astype(int)
aapl_df.reset_index(drop=True, inplace=True)

jpm_df.dropna(subset=["datadate"], inplace=True)
jpm_df.drop(columns=["gvkey"], inplace=True)
jpm_df["datadate"] = pd.to_datetime(jpm_df["datadate"], format="%m/%d/%y")
jpm_df.rename(
    columns={
        "datadate": "Date",
        "cshtrd": "Volume",
        "prccd": "Close",
        "prchd": "High",
        "prcld": "Low",
        "prcod": "Open",
    },
    inplace=True,
)
jpm_df.Volume = jpm_df.Volume.astype(int)
jpm_df.drop_duplicates(subset=["Date"], keep="last", inplace=True)
jpm_df.reset_index(drop=True, inplace=True)

In [None]:
aapl_df.info()

In [None]:
jpm_df.info()

In [None]:
aapl_df.to_csv(
    path_or_buf=prod_data_folder.joinpath(aapl_stock_filename),
    sep=",",
    index=False,
)

jpm_df.to_csv(
    path_or_buf=prod_data_folder.joinpath(jpm_stock_filename), sep=",", index=False
)