In [108]:
import os
import os.path
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from pandas.errors import EmptyDataError
from scipy import stats
from sqlalchemy import create_engine, text

pd.options.mode.chained_assignment = None
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option("display.precision", 2)

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

### TODO:
- Work on the seed value under function generate_ema. The value is currently coming for very first date 1/1/2020 which was a holiday. Need to get valye for the first business day somehow.
- Take care of the warnings. Currently they are suppressed. Take out the option "pd.options.mode.chained_assignment = None" and 
    "warnings.simplefilter(action='ignore', category=FutureWarning)"
- To generate records for the first time, the 'dt_target' is manully set to a certain date. The 'dfrmm' under generate_mean() is the full DFRM for whatever SQL has for that symbol. It is possible that manually set dt_target may be older than the oldest record in SQL for a symbol. Need to handle that scenario better.

In [109]:
BASE_DIR = '../../../../workspace/HelloPython/HistoricalMarketData/TechnicalIndicators'
TABLE_EQUITIES_DATA = 'equities_historic_data'
DURATIONS = (14, 30, 90, 200)  # Roughly for bi-weekly, monthly, quarterly, and 200 days running averages

In [110]:
try:
    DB = os.environ["DB"]
    DB_USER = os.environ["DB_USER"]
    DB_PWD = os.environ["DB_PWD"]
except KeyError:
    raise Exception("Required environment variables DB_USER and DB_PWD not set")
DB_URL = 'mysql+mysqlconnector://' + DB_USER + ':' + DB_PWD + '@localhost/' + DB
ENGINE = create_engine(DB_URL)

In [111]:
# def executeQuery(connection, query):
#     dfrm = pd.DataFrame()
#     try:
#         dfrm = pd.read_sql(query, connection)
#     except ProgrammingError as err:
#         print(err)
#     return dfrm

In [112]:
def retrieve_cataloged_market_data(symbol, tblName, startDate, endDate=datetime.strftime(datetime.today(), '%Y%m%d')):
    query = "select date, symbol, open, high, low, close, netChange, pcntChange, volume from " + tblName + " \
        where symbol like '" + symbol + "' and date > '" + startDate + "' and date < '" + endDate + "'"

    with ENGINE.connect() as conn:
        res = conn.execute(text(query))
    dfrm = pd.DataFrame(res.mappings().all())
    dfrm.set_index('date', inplace=True)
    return dfrm

In [113]:
def generate_file_path(symbol, date=None):
    """
    Generates a file path for a given symbol
    to retrieve market data from
    :param symbol: ticker
    :param date: date embedded in the file name
    :return: file name and path
    """
    if date is not None:
        str_date = datetime.strftime(date, '%Y%m%d')
        file_name = symbol.lower() + '_' + str_date + '.csv'
    else:
        file_name = symbol.lower() + '.csv'
    file_path = os.path.join(BASE_DIR, file_name)
    if file_path is None:
        print('Could not find file for symbol:{}'.format(symbol))
    # print(file_path)
    return file_path, file_name

In [114]:
def generate_mean(dfrm, dt_target, duration):
    """
    generate means for daily 'close' values
    :param dfrm: A DataFrame that must contain date as index and 'close' values minimally among others
    :param dt_target: Datetime.datetime() date that confirms the starting point for our calculations
    :param duration: # of days as int that denote bi-weekly, monthly, quartlery... cycles
    :return dfrm_returned: DataFrame with means values added as column 
    """
    # Calculate mean for the last N days with current date as the last date
    dt_start = dt_target - timedelta(days=duration)
    """
    For date in range (dt_target to end):
        calculate some_stat(some_daily_values) for (current date - duration) through current date
    Then reduce the size otherwise length of new column will not match
    """
    # dt_target is datetime.datetime type but dfrm has datetime.date objects as index. Requires a conversion before other ops
    means = [dfrm.loc[date - timedelta(days=duration): date, 'close'].mean() for date in
             dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm.loc[datetime.date(dt_target):]
    dfrm_reduced.loc[:, 'mean_' + str(duration)] = means

    """ 
    More adjustments required because of the need to return data 
    for '- timedelta(days=duration)' as well for downstream calculations.
    Retrieve stats from smaller 'reduced' dfrm and populate the 'returned' dfrm. 
    """
    dfrm_returned = dfrm.loc[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned.loc[:, 'mean_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                     else dfrm_reduced.loc[date, 'mean_' + str(duration)]
                                                     for date in dfrm_returned.index]
    return dfrm_returned

In [115]:
def generate_std_dev(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    For date in range (dt_target to end):
        calculate some_stat(some_daily_values) for (current date - duration) through current date
    Then reduce the size otherwise length of new column will not match
    """
    stddevs = [dfrm.loc[date - timedelta(days=duration): date, 'close'].std() for date in
               dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['stddev_' + str(duration)] = stddevs
    """ 
    More adjustments required because of the need to return data 
    for '- timedelta(days=duration)' as well for downstream calculations.
    Retrieve stats from smaller 'reduced' dfrm and populate the 'returned' dfrm. 
    """
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['stddev_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                else dfrm_reduced.loc[date, 'stddev_' + str(duration)]
                                                for date in dfrm_returned.index]
    return dfrm_returned

In [116]:
def generate_pcntle_closing(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipp.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [
        stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'close'], dfrm.loc[date, 'close'])
        for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleClosing_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleClosing_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                       else dfrm_reduced.loc[date, 'pcntleClosing_' + str(duration)]
                                                       for date in dfrm_returned.index]
    return dfrm_returned

In [117]:
def generate_pcntle_volume(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipy.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [
        stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'volume'], dfrm.loc[date, 'volume'])
        for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleVolume_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleVolume_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                      else dfrm_reduced.loc[date, 'pcntleVolume_' + str(duration)]
                                                      for date in dfrm_returned.index]
    return dfrm_returned

In [118]:
def generate_pcntle_std_devs(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipp.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'stddev_' + str(duration)],
                                       dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]

    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleStdDevs_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleStdDevs_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                       else dfrm_reduced.loc[date, 'pcntleStdDevs_' + str(duration)]
                                                       for date in dfrm_returned.index]
    return dfrm_returned

In [119]:
def generate_stcstc_oscillator(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    LOGIC: oscillator is:
        np.nan if (highestHigh == lowestLow)
        else ((dayClose - lowestLow) / (highestHigh - lowestLow)) * (100)
        for date in dfrm.index     
    """
    oscillators = [np.nan if
                   dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                       days=duration): date, 'close'].min()
                   else (100) *
                        (dfrm.loc[date, 'close'] - dfrm.loc[date - timedelta(days=duration): date, 'close'].min()) /
                        (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                            days=duration): date, 'close'].min())
                   for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['oscillator_' + str(duration)] = oscillators
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['oscillator_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                    else dfrm_reduced.loc[date, 'oscillator_' + str(duration)]
                                                    for date in dfrm_returned.index]
    return dfrm_returned

In [120]:
def generate_williams_r(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    LOGIC: oscillator is:
        np.nan if (highestHigh == lowestLow)
        else ((dayClose - lowestLow) / (highestHigh - lowestLow)) * (100)
        for date in dfrm.index     
    """
    williamsr = [np.nan if
                 dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                     days=duration): date, 'close'].min()
                 else (-100) *
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - (dfrm.loc[date, 'close'])) /
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                          days=duration): date, 'close'].min())
                 for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['williamsr_' + str(duration)] = williamsr
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['williamsr_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                   else dfrm_reduced.loc[date, 'williamsr_' + str(duration)]
                                                   for date in dfrm_returned.index]
    return dfrm_returned

In [121]:
def generate_accumulation_dist(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # https://www.tradingview.com/support/solutions/43000501770-accumulation-distribution-adl/
    # Accumulation/Distribution = ((Close – Low) – (High – Close)) / (High – Low) * Period Volume
    accu_dist = [np.nan if
                 dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                     days=duration): date, 'close'].min()
                 else dfrm.loc[date - timedelta(days=duration): date, 'volume'].mean() *
                      ((dfrm.loc[date, 'close'] - dfrm.loc[date - timedelta(days=duration): date, 'close'].min()) -
                       (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date, 'close']))
                      /
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                          days=duration): date, 'close'].min())
                 for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['accu_dist_' + str(duration)] = accu_dist
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['accu_dist_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                   else dfrm_reduced.loc[date, 'accu_dist_' + str(duration)]
                                                   for date in dfrm_returned.index]
    return dfrm_returned

In [122]:
def generate_bollinger_bands(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # https://www.tradingview.com/support/solutions/43000501770-accumulation-distribution-adl/
    # Accumulation/Distribution = ((Close – Low) – (High – Close)) / (High – Low) * Period Volume
    bbUpper = [dfrm.loc[date, 'mean_' + str(duration)] + (2 * dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]
    bbLower = [dfrm.loc[date, 'mean_' + str(duration)] - (2 * dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]

    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['bollingerUpper_' + str(duration)] = bbUpper
    dfrm_reduced['bollingerLower_' + str(duration)] = bbLower
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['bollingerUpper_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                        else dfrm_reduced.loc[date, 'bollingerUpper_' + str(duration)]
                                                        for date in dfrm_returned.index]
    dfrm_returned['bollingerLower_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                        else dfrm_reduced.loc[date, 'bollingerLower_' + str(duration)]
                                                        for date in dfrm_returned.index]
    return dfrm_returned

In [123]:
"""
def generateRelativeStrengthIndex(dfrm, dt_target, duration):
    # https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/RSI#:~:text=Description,and%20oversold%20when%20below%2030.
    # https://www.investopedia.com/terms/r/rsi.asp
    dt_start = dt_target - timedelta(days=duration)
    # RSI = 100 – [100 / ( 1 + (Average of Upward Price Change / Average of Downward Price Change ) ) ]
    gains = [ closing_value 
             for date in dfrm[datetime.date(dt_target):].index ]
        
    losses = [
             for date in dfrm[datetime.date(dt_target):].index ]
    
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['forceIndex_'+str(duration)] = force_index
    
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['forceIndex_'+str(duration)] = [ np.nan if date not in dfrm_reduced.index
                                             else dfrm_reduced.loc[date, 'forceIndex_'+str(duration)] 
                                            for date in dfrm_returned.index ]
    return dfrm_returned
"""

"\ndef generateRelativeStrengthIndex(dfrm, dt_target, duration):\n    # https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/RSI#:~:text=Description,and%20oversold%20when%20below%2030.\n    # https://www.investopedia.com/terms/r/rsi.asp\n    dt_start = dt_target - timedelta(days=duration)\n    # RSI = 100 – [100 / ( 1 + (Average of Upward Price Change / Average of Downward Price Change ) ) ]\n    gains = [ closing_value \n             for date in dfrm[datetime.date(dt_target):].index ]\n        \n    losses = [\n             for date in dfrm[datetime.date(dt_target):].index ]\n    \n    dfrm_reduced = dfrm[datetime.date(dt_target):]\n    dfrm_reduced['forceIndex_'+str(duration)] = force_index\n    \n    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]\n    dfrm_returned['forceIndex_'+str(duration)] = [ np.nan if date not in dfrm_reduced.index\n                                             else dfrm_reduced.loc[

In [124]:
filePath, _ = generate_file_path("LLY")
filePath

'../../../../workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/lly.csv'

In [125]:
def generate_ema(dfrm, dt_target, duration):
    # TODO: Requires some adjustment. Right now EMA is not moving closely with daily 'close' values
    dt_start = dt_target - timedelta(days=duration)
    # Handle Exponential Moving Averages (EMA)
    emaMultipler = round((2 / (duration + 1)), 2)
    emas = list()
    # Need to pre-populate the first value for ema
    # Use mean for that day as the seed for first value for EMA
    """
    print(dfrm)
    print(duration)
    print(dt_target)
    """
    try:
        seed = dfrm.loc[datetime.date(dt_target):datetime.date(dt_target), 'mean_' + str(duration)][0]
    except IndexError as err:
        print('IndexError: Validate that data exists for start date: {}'.format(dt_target))
        return

    emas.append(seed)  # Need first value prepopulated
    # ema = (sliceClosings.loc[i]-seriesEMA[i-1]) * emaMultipler + seriesEMA[i-1]
    # EMA=Price(t)×k+EMA(y)×(1−k)

    emas_remainder_list = [(dfrm.loc[date, 'close'] * emaMultipler) + (emas[-1] * (1 - emaMultipler))
                           for date in dfrm[datetime.date(dt_target + timedelta(days=1)):].index]

    #emas_remainder_list = [ (dfrm.loc[date, 'close'] - emas[-1]) * emaMultipler + emas[-1]
    #for date in dfrm[datetime.date(dt_target+timedelta(days=1)):].index ]
    emas.extend(emas_remainder_list)
    emas = [round(ema, 2) for ema in emas]
    # emas = dfrm.ewm(span=duration).mean()
    # print(emas)
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    # print(len(emas))
    # print(len(dfrm_reduced))
    dfrm_reduced['ema_' + str(duration)] = emas
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['ema_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                             else dfrm_reduced.loc[date, 'ema_' + str(duration)]
                                             for date in dfrm_returned.index]
    return dfrm_returned

In [126]:
def generate_force_index(dfrm, dt_target, duration):
    # https://www.investopedia.com/terms/f/force-index.asp
    # Used SMA rather than EMA since EMA algo is not working for me properly
    """
    This is somewhat complicated. First calculate (todayClose - lastClose) * volume from dt_target through end.
    Now, we need to calculate mean of the first calculation. But we need to wait until we have 'duration' number
    of calculations before we start calculating means. We therefore, have to mark initial force index with 0 achieved
    here through 'end_offset'. Once we have 'duration' number of "(todayClose - lastClose) * volume" then
    we capture the proper force index.
    Also remember that first few rows before dt_start index will have zero values for indicators. All calculations
    mentioned above are after that point on.
    """
    dt_start = dt_target - timedelta(days=duration)
    force_dly_index = list()
    force_dly_index = [
        (dfrm.iloc[index, dfrm.columns.get_loc("close")] - dfrm.iloc[index - 1, dfrm.columns.get_loc("close")])
        * dfrm.iloc[index, dfrm.columns.get_loc("volume")]
        for index in range(dfrm.index.get_loc(datetime.date(dt_target)), len(dfrm))]
    #force_dly_index.extend(force_dly_index_rem_list)
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['forceDailyIndex_' + str(duration)] = force_dly_index
    force_means_index = list()

    indices = dfrm_reduced.index
    end_offset = indices.get_loc(datetime.date(dt_target + timedelta(days=duration)))
    print("End offset value is: {}".format(end_offset))
    force_means_index[:end_offset] = [0] * end_offset
    force_means_index_rem_list = [
        dfrm_reduced.loc[date - timedelta(days=duration): date, 'forceDailyIndex_' + str(duration)].mean()
        for date in dfrm_reduced[datetime.date(dt_target + timedelta(days=duration)):].index]
    force_means_index.extend(force_means_index_rem_list)
    dfrm_reduced['forceIndex_' + str(duration)] = force_means_index
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['forceIndex_' + str(duration)] = [np.nan if date not in dfrm_reduced.index
                                                    else dfrm_reduced.loc[date, 'forceIndex_' + str(duration)]
                                                    for date in dfrm_returned.index]
    return dfrm_returned

In [127]:
def get_last_date_for_moving_avgs_by_symbol(symbol):
    str_baseline_date = "1999-12-31"
    dt_baseline = datetime.strptime(str_baseline_date, '%Y-%m-%d')

    filePath, _ = generate_file_path(symbol)
    print(filePath)
    if (filePath is not None):
        try:
            dfrm = pd.read_csv(filePath)
            # date_indices_formatted = [ datetime.strftime(datetime.strptime(index, '%m/%d/%Y'), '%Y-%m-%d')
            #         if validate_date_format(str(index)) else index for index in dfrm.index ]
            # dfrm.index = date_indices_formatted
            dfrm['date'] = pd.to_datetime(dfrm['date'])
            dfrm.set_index('date', inplace=True)
            dfrm.sort_index(ascending=True)
            dfrm.index.name = 'date'
            # dfrm.set_index('date', inplace=True)
            # dfrm.sort_index(inplace=True, ascending = False)
            return dfrm.index[-1]
        except FileNotFoundError as e:
            print('Exception reading input data for symbol {}. Generating metadata starting from baseline date.'.format(symbol.upper()))
            return dt_baseline
        except EmptyDataError as e:
            print(f'No technical indicators found for {symbol.upper()}. Generating metadata starting from baseline date.')
            print('Generating all records.')
            return dt_baseline
    else:
        return dt_baseline


In [128]:
if __name__ == "__main__":
    QUERY = True  # Keep it disabled and manage the list manually. Enabling this will retrieve data for all of SnP500
    EXPORT = True
    EXPERIMENTAL = False

    macd_durations = [14, 30]
    if QUERY:  # The index is datetime.date and not string or datetime.datetime object.
        # query = 'select distinct symbol from industrybackground where SnP500 like 1'
        query = 'select distinct symbol from industrybackground where SnP500 like 1 and symbol not \
        in (select distinct symbol from equities_historic_data where date like "2019-12-20")'

        with ENGINE.connect() as conn:
            res = conn.execute(text(query))
        dfrm = pd.DataFrame(res.mappings().all())
        symbols = dfrm['symbol'].tolist()
    else:
        symbols = ['BAC', 'JPM', 'C', 'MS', 'GS', 'WFC', 'FB', 'MSFT', 'GOOGL', 'NFLX', 'AAPL', 'AMZN', 'TSLA', 'MRK',
                   'PFE', 'NKE', 'INTC', 'NVDA', 'ADM', 'TSM', 'MU', 'QCOM']
        s2 = ['RE', 'ACGL', 'AXS', 'CB', 'THG', 'PGR', 'RNR', 'SIGI', 'TRV', 'WRB']

        symbols.extend(s2)
        # symbols = ["USB", "TFC", "PNC", "BK", "STT", "AMTD"]
        # symbols = ('TSLA', 'MSFT')
        # symbols = ['BAC', 'JPM', 'C', 'MS', 'GS', 'WFC']
        # symbols = ["FSLR", "VRT", "COIN", "MRVL", "CRWD", "AVGO", "DDOG", "SMCI", "GOOGL", "AMZN", "SHAK", "APO", "DJT",
        #            "FCX", "LLY", "META"]
        # symbols = ["LLY", "META"]
        # symbols = ["VRT", "NVDA", "MRVL", "SMCI", "SHAK"]
        # symbols = ["SNOW", "PFE", "MRK", "JNJ", "REGN", "NVO", "AAPL"]
        symbols = ["PFE", "MRK", "JNJ", "REGN", "NVO", "AAPL", "LLY", "AVGO", "FCX", "FSLR", "NKE"]

    for symbol in symbols:
        print('Generating data for technical indicators for symbol: {}'.format(symbol))
        dt_returned = get_last_date_for_moving_avgs_by_symbol(symbol)

        # dt_target = datetime.strptime(strDateReturned, '%Y-%m-%d')
        dt_target = dt_returned + timedelta(days=1)  # Start with next day

        print('Generating data since {}'.format(dt_target))
        list_dfrm = list()
        dfrm = retrieve_cataloged_market_data(symbol, TABLE_EQUITIES_DATA, '2000-01-01')
        #indices = [datetime.strptime(dt, '%Y-%m-%d').date() for dt in dfrm.index]
        #dfrm.set_index(indices, inplace=True)
        base_dfrm = dfrm.loc[:, ('symbol', 'close', 'volume')]
        dfrm_final = pd.DataFrame()
        for duration in DURATIONS:
            dfrm_running = generate_mean(base_dfrm, dt_target, duration)
            dfrm_running = generate_std_dev(dfrm_running, dt_target, duration)
            #dfrm_running = generate_ema(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_std_devs(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_volume(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_closing(dfrm_running, dt_target, duration)
            dfrm_running = generate_stcstc_oscillator(dfrm_running, dt_target, duration)
            # dfrm_running = generate_force_index(dfrm_running, dt_target, duration)
            # dfrm_running = generate_williams_r(dfrm_running, dt_target, duration) # Very similar to Stcstc Oscillator. Keep one
            dfrm_running = generate_accumulation_dist(dfrm_running, dt_target, duration)
            dfrm_running = generate_bollinger_bands(dfrm_running, dt_target, duration)

            # Keep rows with non-NaN values
            dfrm_running = dfrm_running[dfrm_running['mean_' + str(duration)].notna()]
            # dfrm_running
            if len(dfrm_final) == 0:
                dfrm_final = dfrm_running.copy()
            else:
                columns = dfrm_running.columns.difference(dfrm_final.columns)
                dfrm_final = pd.merge(dfrm_final, dfrm_running[columns], left_index=True, right_index=True)
                # if macd_durations in DURATIONS:
    if EXPORT:
            if dfrm_final is not None and len(dfrm_final) > 0:
                filePath, _ = generate_file_path(symbol)
                print('Saving data to {}'.format(filePath))
                dfrm = pd.DataFrame()
                if os.path.exists(filePath):  # Append data to existing CSV
                    dfrm_existing = pd.read_csv(filePath)
                    print(dfrm_existing.tail(10))
                    if dfrm_existing is not None and len(dfrm_existing) > 0:
                        print(len(dfrm_existing))
                        dfrm_existing.set_index('date', inplace=True)
                        frames = [dfrm_existing, dfrm_final]
                        dfrm = pd.concat(frames)
                    print(len(dfrm_final))
                    print(len(dfrm))
                    dfrm.to_csv(filePath, sep=',')
                    print(f'Appended data to {filePath}')
                else:  # Directly write final dfrm to CSV file
                    dfrm_final.to_csv(filePath, sep=',')
                    print(f'Added new file at {filePath}')




Generating data for technical indicators for symbol: A
../../../../workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/a.csv
Exception reading input data for symbol A. Generating metadata starting from baseline date.
Generating data since 2000-01-01 00:00:00


KeyboardInterrupt: 