In [4]:
import os
import glob
import datetime as dt
import numpy as np
import pandas as pd
import pandas_ta as ta
import yfinance as yf
yf.pdr_override() # <== that's all it takes :-)

from concurrent import futures
from dateutil.relativedelta import relativedelta
from pandas_datareader import data as pdr
from scipy.stats import gaussian_kde
# from talib import BBANDS, RSI, ICHIMOKU

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import plotly.graph_objects as go

""" datetime util """
now = dt.datetime.now()
lastday = now + relativedelta(months=0, days=-1)
firstday_of_this_month = dt.datetime(now.year, now.month, 1)
lastday_of_this_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=1, days=-1)
firstday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=-1, days=0)
lastday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=0, days=-1)

#import plotly.tools as tls
#import plotly.io as pio
#import plotly.graph_objs as go
#from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

def last_working_day(given_date):
    while True:
        given_date -= dt.timedelta(days=1)
        if given_date.weekday() not in [5,6]:
            return given_date
        
""" set the download window """
start_date = "2017-01-01"
end_date = last_working_day(now).strftime('%Y-%m-%d')

""" set output directory """
data_dir = "./data/txn"
report_dir = "./report"
os.makedirs(data_dir, exist_ok=True)

In [5]:
# Load data
def loadTxnData(symbol, end_date):
    df = pd.DataFrame() # Empty DataFrame
    filename = f'./data/txn/{end_date}_{symbol}.csv'
    print(filename)
    if not os.path.exists(filename):
        df = pdr.get_data_yahoo(symbol)

        # Add new "name" column
        df.insert(0,'name',symbol)

        # Replace "datetime" with the appropriate column from your DataFrame
        df.rename(columns={"Date": "datetime", "Open": "open", "High": "high", "Low": "low", "Close": "close" , "Adj Close": "adj_close", "Volume": "volume" }, inplace=True) 
        df.index.names = ["datetime"]

        df.to_csv(filename)
    else:
        df = pd.read_csv(filename)

        # Replace "datetime" with the appropriate column from your DataFrame
        #df.rename(columns={"Date": "datetime", "Open": "open", "High": "high", "Low": "low", "Close": "close" , "Adj Close": "adj_close", "Volume": "volume" }, inplace=True)

        # VWAP requires the DataFrame index to be a DatetimeIndex.
        df.set_index(pd.DatetimeIndex(df["datetime"]), inplace=True)    
        del df['datetime']
    return df

In [7]:
# temp
# end_date="2023-01-27"

symbols = list(pd.read_csv(f"{report_dir}/most_attractives_{end_date}.csv")["stock"])


for symbol in symbols:
    df = loadTxnData(symbol, end_date)
    print(df.head(5))
    
# report_dir = "./report"
# filename = f"{report_dir}/most_attractives_{end_date}.csv"

# """ Load from CSV """
# df = pd.read_csv(filename)
# symbols = df["stock"].tolist()
# print(symbols)

./data/txn/2023-01-27_BKNG.csv
[*********************100%***********************]  1 of 1 completed
                           name    open     high    low   close  adj_close  \
datetime                                                                     
1999-03-31 00:00:00-05:00  BKNG  451.50  517.500  438.0  497.25     497.25   
1999-04-01 00:00:00-05:00  BKNG  550.50  552.000  475.5  483.00     483.00   
1999-04-05 00:00:00-04:00  BKNG  501.00  504.000  435.0  450.75     450.75   
1999-04-06 00:00:00-04:00  BKNG  452.25  484.125  420.0  474.00     474.00   
1999-04-07 00:00:00-04:00  BKNG  480.75  487.500  442.5  447.00     447.00   

                            volume  
datetime                            
1999-03-31 00:00:00-05:00  1322933  
1999-04-01 00:00:00-05:00   483233  
1999-04-05 00:00:00-04:00   324917  
1999-04-06 00:00:00-04:00   353900  
1999-04-07 00:00:00-04:00   156617  
./data/txn/2023-01-27_CAT.csv
[*********************100%***********************]  1 of 1 compl

In [9]:
for symbol in symbols:
    df = pd.read_csv(f"./data/txn/{end_date}_{symbol}.csv")
    print(df.head(5))

                    datetime  name    open     high    low   close  adj_close  \
0  1999-03-31 00:00:00-05:00  BKNG  451.50  517.500  438.0  497.25     497.25   
1  1999-04-01 00:00:00-05:00  BKNG  550.50  552.000  475.5  483.00     483.00   
2  1999-04-05 00:00:00-04:00  BKNG  501.00  504.000  435.0  450.75     450.75   
3  1999-04-06 00:00:00-04:00  BKNG  452.25  484.125  420.0  474.00     474.00   
4  1999-04-07 00:00:00-04:00  BKNG  480.75  487.500  442.5  447.00     447.00   

    volume  
0  1322933  
1   483233  
2   324917  
3   353900  
4   156617  
                    datetime name      open      high       low     close  \
0  1962-01-02 00:00:00-05:00  CAT  1.604167  1.619792  1.588542  1.604167   
1  1962-01-03 00:00:00-05:00  CAT  1.604167  1.619792  1.588542  1.619792   
2  1962-01-04 00:00:00-05:00  CAT  1.656250  1.708333  1.656250  1.661458   
3  1962-01-05 00:00:00-05:00  CAT  1.661458  1.697917  1.656250  1.677083   
4  1962-01-08 00:00:00-05:00  CAT  1.677083  1.703