In [96]:
import os
import os.path
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from pandas.errors import EmptyDataError
from scipy import stats
from sqlalchemy import create_engine, text

pd.options.mode.chained_assignment = None
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option("display.precision", 2)

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

### TODO:
- Work on the seed value under function generate_ema. The value is currently coming for very first date 1/1/2020 which was a holiday. Need to get valye for the first business day somehow.
- Take care of the warnings. Currently they are suppressed. Take out the option "pd.options.mode.chained_assignment = None" and 
    "warnings.simplefilter(action='ignore', category=FutureWarning)"
- To generate records for the first time, the 'dt_target' is manully set to a certain date. The 'dfrmm' under generate_mean() is the full DFRM for whatever SQL has for that symbol. It is possible that manually set dt_target may be older than the oldest record in SQL for a symbol. Need to handle that scenario better.

In [97]:
BASE_DIR = '/Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators'
TABLE_EQUITIES_DATA = 'equities_historic_data'
DURATIONS = (14, 30, 90, 200)  # Roughly for bi-weekly, monthly, quarterly, and 200 days running averages

In [98]:
try:
    DB = os.environ["DB"]
    DB_USER = os.environ["DB_USER"]
    DB_PWD = os.environ["DB_PWD"]
except KeyError:
    raise Exception("Required environment variables DB_USER and DB_PWD not set")
DB_URL = 'mysql+mysqlconnector://' + DB_USER + ':' + DB_PWD + '@localhost/' + DB
ENGINE = create_engine(DB_URL)

In [101]:
# def executeQuery(connection, query):
#     dfrm = pd.DataFrame()
#     try:
#         dfrm = pd.read_sql(query, connection)
#     except ProgrammingError as err:
#         print(err)
#     return dfrm

In [102]:
def retrieve_cataloged_market_data(symbol, tblName, startDate, endDate=datetime.strftime(datetime.today(), '%Y%m%d')):
    query = "select date, symbol, open, high, low, close, netChange, pcntChange, volume from " + tblName + " \
        where symbol like '" + symbol + "' and date > '" + startDate + "' and date < '" + endDate + "'"

    with ENGINE.connect() as conn:
        res = conn.execute(text(query))
    dfrm = pd.DataFrame(res.mappings().all())
    dfrm.set_index('date', inplace=True)
    return dfrm

In [103]:
def generate_file_path(symbol, date=None):
    """
    Generates a file path for a given symbol
    to retrieve market data from
    :param symbol: ticker
    :param date: date embedded in the file name
    :return: file name and path
    """
    if date is not None:
        str_date = datetime.strftime(date, '%Y%m%d')
        file_name = symbol.lower() + '_' + str_date + '.csv'
    else:
        file_name = symbol.lower() + '.csv'
    file_path = os.path.join(BASE_DIR, file_name)
    if file_path is None:
        print('Could not find file for symbol:{}'.format(symbol))
    # print(file_path)
    return file_path, file_name

In [104]:
def generate_mean(dfrm, dt_target, duration):
    """
    generate means for daily 'close' values
    :param dfrm: A DataFrame that must contain date as index and 'close' values minimally among others
    :param dt_target: Datetime.datetime() date that confirms the starting point for our calculations
    :param duration: # of days as int that denote bi-weekly, monthly, quartlery... cycles
    :return dfrm_returned: DataFrame with means values added as column 
    """
    # Calculate mean for the last N days with current date as the last date
    dt_start = dt_target - timedelta(days=duration)
    """
    For date in range (dt_target to end):
        calculate some_stat(some_daily_values) for (current date - duration) through current date
    Then reduce the size otherwise length of new column will not match
    """
    # dt_target is datetime.datetime type but dfrm has datetime.date objects as index. Requires a conversion before other ops
    means = [dfrm.loc[date - timedelta(days=duration): date, 'close'].mean() for date in
             dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm.loc[datetime.date(dt_target):]
    dfrm_reduced.loc[:, 'mean_' + str(duration)] = means

    """ 
    More adjustments required because of the need to return data 
    for '- timedelta(days=duration)' as well for downstream calculations.
    Retrieve stats from smaller 'reduced' dfrm and populate the 'returned' dfrm. 
    """
    dfrm_returned = dfrm.loc[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned.loc[:, 'mean_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                     else dfrm_reduced.loc[date, 'mean_' + str(duration)]
                                                     for date in dfrm_returned.index]
    return dfrm_returned

In [105]:
def generate_std_dev(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    For date in range (dt_target to end):
        calculate some_stat(some_daily_values) for (current date - duration) through current date
    Then reduce the size otherwise length of new column will not match
    """
    stddevs = [dfrm.loc[date - timedelta(days=duration): date, 'close'].std() for date in
               dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['stddev_' + str(duration)] = stddevs
    """ 
    More adjustments required because of the need to return data 
    for '- timedelta(days=duration)' as well for downstream calculations.
    Retrieve stats from smaller 'reduced' dfrm and populate the 'returned' dfrm. 
    """
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['stddev_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                else dfrm_reduced.loc[date, 'stddev_' + str(duration)]
                                                for date in dfrm_returned.index]
    return dfrm_returned

In [106]:
def generate_pcntle_closing(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipp.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [
        stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'close'], dfrm.loc[date, 'close'])
        for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleClosing_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleClosing_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                       else dfrm_reduced.loc[date, 'pcntleClosing_' + str(duration)]
                                                       for date in dfrm_returned.index]
    return dfrm_returned

In [107]:
def generate_pcntle_volume(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipy.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [
        stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'volume'], dfrm.loc[date, 'volume'])
        for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleVolume_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleVolume_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                      else dfrm_reduced.loc[date, 'pcntleVolume_' + str(duration)]
                                                      for date in dfrm_returned.index]
    return dfrm_returned

In [108]:
def generate_pcntle_std_devs(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # Use scipp.stats.
    # Give entire rolling range and calculate percentile of the last value in that range
    pcntles = [stats.percentileofscore(dfrm.loc[date - timedelta(days=duration): date, 'stddev_' + str(duration)],
                                       dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]

    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    dfrm_reduced['pcntleStdDevs_' + str(duration)] = pcntles
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['pcntleStdDevs_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                       else dfrm_reduced.loc[date, 'pcntleStdDevs_' + str(duration)]
                                                       for date in dfrm_returned.index]
    return dfrm_returned

In [109]:
def generate_stcstc_oscillator(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    LOGIC: oscillator is:
        np.NaN if (highestHigh == lowestLow)
        else ((dayClose - lowestLow) / (highestHigh - lowestLow)) * (100)
        for date in dfrm.index     
    """
    oscillators = [np.NaN if
                   dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                       days=duration): date, 'close'].min()
                   else (100) *
                        (dfrm.loc[date, 'close'] - dfrm.loc[date - timedelta(days=duration): date, 'close'].min()) /
                        (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                            days=duration): date, 'close'].min())
                   for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['oscillator_' + str(duration)] = oscillators
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['oscillator_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                    else dfrm_reduced.loc[date, 'oscillator_' + str(duration)]
                                                    for date in dfrm_returned.index]
    return dfrm_returned

In [110]:
def generate_williams_r(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    """
    LOGIC: oscillator is:
        np.NaN if (highestHigh == lowestLow)
        else ((dayClose - lowestLow) / (highestHigh - lowestLow)) * (100)
        for date in dfrm.index     
    """
    williamsr = [np.NaN if
                 dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                     days=duration): date, 'close'].min()
                 else (-100) *
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - (dfrm.loc[date, 'close'])) /
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                          days=duration): date, 'close'].min())
                 for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['williamsr_' + str(duration)] = williamsr
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['williamsr_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                   else dfrm_reduced.loc[date, 'williamsr_' + str(duration)]
                                                   for date in dfrm_returned.index]
    return dfrm_returned

In [111]:
def generate_accumulation_dist(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # https://www.tradingview.com/support/solutions/43000501770-accumulation-distribution-adl/
    # Accumulation/Distribution = ((Close – Low) – (High – Close)) / (High – Low) * Period Volume
    accu_dist = [np.NaN if
                 dfrm.loc[date - timedelta(days=duration): date, 'close'].max() == dfrm.loc[date - timedelta(
                     days=duration): date, 'close'].min()
                 else dfrm.loc[date - timedelta(days=duration): date, 'volume'].mean() *
                      ((dfrm.loc[date, 'close'] - dfrm.loc[date - timedelta(days=duration): date, 'close'].min()) -
                       (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date, 'close']))
                      /
                      (dfrm.loc[date - timedelta(days=duration): date, 'close'].max() - dfrm.loc[date - timedelta(
                          days=duration): date, 'close'].min())
                 for date in dfrm[datetime.date(dt_target):].index]
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['accu_dist_' + str(duration)] = accu_dist
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['accu_dist_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                   else dfrm_reduced.loc[date, 'accu_dist_' + str(duration)]
                                                   for date in dfrm_returned.index]
    return dfrm_returned

In [112]:
def generate_bollinger_bands(dfrm, dt_target, duration):
    dt_start = dt_target - timedelta(days=duration)
    # https://www.tradingview.com/support/solutions/43000501770-accumulation-distribution-adl/
    # Accumulation/Distribution = ((Close – Low) – (High – Close)) / (High – Low) * Period Volume
    bbUpper = [dfrm.loc[date, 'mean_' + str(duration)] + (2 * dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]
    bbLower = [dfrm.loc[date, 'mean_' + str(duration)] - (2 * dfrm.loc[date, 'stddev_' + str(duration)])
               for date in dfrm[datetime.date(dt_target):].index]

    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['bollingerUpper_' + str(duration)] = bbUpper
    dfrm_reduced['bollingerLower_' + str(duration)] = bbLower
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['bollingerUpper_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                        else dfrm_reduced.loc[date, 'bollingerUpper_' + str(duration)]
                                                        for date in dfrm_returned.index]
    dfrm_returned['bollingerLower_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                        else dfrm_reduced.loc[date, 'bollingerLower_' + str(duration)]
                                                        for date in dfrm_returned.index]
    return dfrm_returned

In [113]:
"""
def generateRelativeStrengthIndex(dfrm, dt_target, duration):
    # https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/RSI#:~:text=Description,and%20oversold%20when%20below%2030.
    # https://www.investopedia.com/terms/r/rsi.asp
    dt_start = dt_target - timedelta(days=duration)
    # RSI = 100 – [100 / ( 1 + (Average of Upward Price Change / Average of Downward Price Change ) ) ]
    gains = [ closing_value 
             for date in dfrm[datetime.date(dt_target):].index ]
        
    losses = [
             for date in dfrm[datetime.date(dt_target):].index ]
    
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['forceIndex_'+str(duration)] = force_index
    
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['forceIndex_'+str(duration)] = [ np.NaN if date not in dfrm_reduced.index
                                             else dfrm_reduced.loc[date, 'forceIndex_'+str(duration)] 
                                            for date in dfrm_returned.index ]
    return dfrm_returned
"""

"\ndef generateRelativeStrengthIndex(dfrm, dtTarget, duration):\n    # https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/RSI#:~:text=Description,and%20oversold%20when%20below%2030.\n    # https://www.investopedia.com/terms/r/rsi.asp\n    dtStart = dtTarget - timedelta(days=duration)\n    # RSI = 100 – [100 / ( 1 + (Average of Upward Price Change / Average of Downward Price Change ) ) ]\n    gains = [ closing_value \n             for date in dfrm[datetime.date(dtTarget):].index ]\n        \n    losses = [\n             for date in dfrm[datetime.date(dtTarget):].index ]\n    \n    dfrm_reduced = dfrm[datetime.date(dtTarget):]\n    dfrm_reduced['forceIndex_'+str(duration)] = force_index\n    \n    dfrm_returned = dfrm[datetime.date(dtTarget) - timedelta(days=duration):]\n    dfrm_returned['forceIndex_'+str(duration)] = [ np.NaN if date not in dfrm_reduced.index\n                                             else dfrm_reduced.loc[date, '

In [114]:
filePath, _ = generate_file_path("LLY")
filePath

'/Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/lly.csv'

In [115]:
def generate_ema(dfrm, dt_target, duration):
    # TODO: Requires some adjustment. Right now EMA is not moving closely with daily 'close' values
    dt_start = dt_target - timedelta(days=duration)
    # Handle Exponential Moving Averages (EMA)
    emaMultipler = round((2 / (duration + 1)), 2)
    emas = list()
    # Need to pre-populate the first value for ema
    # Use mean for that day as the seed for first value for EMA
    """
    print(dfrm)
    print(duration)
    print(dt_target)
    """
    try:
        seed = dfrm.loc[datetime.date(dt_target):datetime.date(dt_target), 'mean_' + str(duration)][0]
    except IndexError as err:
        print('IndexError: Validate that data exists for start date: {}'.format(dt_target))
        return

    emas.append(seed)  # Need first value prepopulated
    # ema = (sliceClosings.loc[i]-seriesEMA[i-1]) * emaMultipler + seriesEMA[i-1]
    # EMA=Price(t)×k+EMA(y)×(1−k)

    emas_remainder_list = [(dfrm.loc[date, 'close'] * emaMultipler) + (emas[-1] * (1 - emaMultipler))
                           for date in dfrm[datetime.date(dt_target + timedelta(days=1)):].index]

    #emas_remainder_list = [ (dfrm.loc[date, 'close'] - emas[-1]) * emaMultipler + emas[-1]
    #for date in dfrm[datetime.date(dt_target+timedelta(days=1)):].index ]
    emas.extend(emas_remainder_list)
    emas = [round(ema, 2) for ema in emas]
    # emas = dfrm.ewm(span=duration).mean()
    # print(emas)
    dfrm_reduced = dfrm[datetime.date(dt_target):]  # Reduce the size otherwise length of new column will not match
    # print(len(emas))
    # print(len(dfrm_reduced))
    dfrm_reduced['ema_' + str(duration)] = emas
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['ema_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                             else dfrm_reduced.loc[date, 'ema_' + str(duration)]
                                             for date in dfrm_returned.index]
    return dfrm_returned

In [116]:
def generate_force_index(dfrm, dt_target, duration):
    # https://www.investopedia.com/terms/f/force-index.asp
    # Used SMA rather than EMA since EMA algo is not working for me properly
    """
    This is somewhat complicated. First calculate (todayClose - lastClose) * volume from dt_target through end.
    Now, we need to calculate mean of the first calculation. But we need to wait until we have 'duration' number
    of calculations before we start calculating means. We therefore, have to mark initial force index with 0 achieved
    here through 'end_offset'. Once we have 'duration' number of "(todayClose - lastClose) * volume" then
    we capture the proper force index.
    Also remember that first few rows before dt_start index will have zero values for indicators. All calculations
    mentioned above are after that point on.
    """
    dt_start = dt_target - timedelta(days=duration)
    force_dly_index = list()
    force_dly_index = [
        (dfrm.iloc[index, dfrm.columns.get_loc("close")] - dfrm.iloc[index - 1, dfrm.columns.get_loc("close")])
        * dfrm.iloc[index, dfrm.columns.get_loc("volume")]
        for index in range(dfrm.index.get_loc(datetime.date(dt_target)), len(dfrm))]
    #force_dly_index.extend(force_dly_index_rem_list)
    dfrm_reduced = dfrm[datetime.date(dt_target):]
    dfrm_reduced['forceDailyIndex_' + str(duration)] = force_dly_index
    force_means_index = list()

    indices = dfrm_reduced.index
    end_offset = indices.get_loc(datetime.date(dt_target + timedelta(days=duration)))
    print("End offset value is: {}".format(end_offset))
    force_means_index[:end_offset] = [0] * end_offset
    force_means_index_rem_list = [
        dfrm_reduced.loc[date - timedelta(days=duration): date, 'forceDailyIndex_' + str(duration)].mean()
        for date in dfrm_reduced[datetime.date(dt_target + timedelta(days=duration)):].index]
    force_means_index.extend(force_means_index_rem_list)
    dfrm_reduced['forceIndex_' + str(duration)] = force_means_index
    dfrm_returned = dfrm[datetime.date(dt_target) - timedelta(days=duration):]
    dfrm_returned['forceIndex_' + str(duration)] = [np.NaN if date not in dfrm_reduced.index
                                                    else dfrm_reduced.loc[date, 'forceIndex_' + str(duration)]
                                                    for date in dfrm_returned.index]
    return dfrm_returned

In [117]:
def get_last_date_for_moving_avgs_by_symbol(symbol):
    str_baseline_date = "1999-12-31"
    dt_baseline = datetime.strptime(str_baseline_date, '%Y-%m-%d').date()

    filePath, _ = generate_file_path(symbol)
    print(filePath)
    if (filePath is not None):
        try:
            dfrm = pd.read_csv(filePath)
            # date_indices_formatted = [ datetime.strftime(datetime.strptime(index, '%m/%d/%Y'), '%Y-%m-%d')
            #         if validate_date_format(str(index)) else index for index in dfrm.index ]
            # dfrm.index = date_indices_formatted
            dfrm['date'] = pd.to_datetime(dfrm['date'])
            dfrm.set_index('date', inplace=True)
            dfrm.sort_index(ascending=True)
            dfrm.index.name = 'date'
            # dfrm.set_index('date', inplace=True)
            # dfrm.sort_index(inplace=True, ascending = False)
            return dfrm.index[-1]
        except FileNotFoundError as e:
            print('Exception reading input data for symbol {}.'.format(symbol))
            print(e)
            return dt_baseline
        except EmptyDataError as e:
            print(f'No technical indicators found for : '.format(symbol.upper()))
            print('Generating all records.')
            return dt_baseline
    else:
        return dt_baseline


In [118]:
if __name__ == "__main__":
    QUERY = False  # Keep it disabled and manage the list manually. Enabling this will retrieve data for all of SnP500
    EXPORT = True
    EXPERIMENTAL = False

    macd_durations = [14, 30]
    if QUERY:  # The index is datetime.date and not string or datetime.datetime object.
        # query = 'select distinct symbol from industrybackground where SnP500 like 1'
        query = 'select distinct symbol from industrybackground where SnP500 like 1 and symbol not \
        in (select distinct symbol from equities_historic_data where date like "2019-12-20")'

        with ENGINE.connect() as conn:
            res = conn.execute(text(query))
        dfrm = pd.DataFrame(res.mappings().all())
        symbols = dfrm['symbol'].tolist()
    else:
        symbols = ['BAC', 'JPM', 'C', 'MS', 'GS', 'WFC', 'FB', 'MSFT', 'GOOGL', 'NFLX', 'AAPL', 'AMZN', 'TSLA', 'MRK',
                   'PFE', 'NKE', 'INTC', 'NVDA', 'ADM', 'TSM', 'MU', 'QCOM']
        s2 = ['RE', 'ACGL', 'AXS', 'CB', 'THG', 'PGR', 'RNR', 'SIGI', 'TRV', 'WRB']

        symbols.extend(s2)
        # symbols = ["USB", "TFC", "PNC", "BK", "STT", "AMTD"]
        # symbols = ('TSLA', 'MSFT')
        # symbols = ['BAC', 'JPM', 'C', 'MS', 'GS', 'WFC']
        # symbols = ["FSLR", "VRT", "COIN", "MRVL", "CRWD", "AVGO", "DDOG", "SMCI", "GOOGL", "AMZN", "SHAK", "APO", "DJT",
        #            "FCX", "LLY", "META"]
        # symbols = ["LLY", "META"]
        # symbols = ["VRT", "NVDA", "MRVL", "SMCI", "SHAK"]
        # symbols = ["SNOW", "PFE", "MRK", "JNJ", "REGN", "NVO", "AAPL"]
        symbols = [["PFE", "MRK", "JNJ", "REGN", "NVO", "AAPL", "LLY", "AVGO", "FCX", "SBUX", "FSLR", "NKE"]]

    for symbol in symbols:
        print('Generating data for technical indicators for symbol: {}'.format(symbol))
        dt_returned = get_last_date_for_moving_avgs_by_symbol(symbol)

        # dt_target = datetime.strptime(strDateReturned, '%Y-%m-%d')
        dt_target = dt_returned + timedelta(days=1)  # Start with next day

        print('Generating data since {}'.format(dt_target))
        list_dfrm = list()
        dfrm = retrieve_cataloged_market_data(symbol, TABLE_EQUITIES_DATA, '2000-01-01')
        #indices = [datetime.strptime(dt, '%Y-%m-%d').date() for dt in dfrm.index]
        #dfrm.set_index(indices, inplace=True)
        base_dfrm = dfrm.loc[:, ('symbol', 'close', 'volume')]
        dfrm_final = pd.DataFrame()
        for duration in DURATIONS:
            dfrm_running = generate_mean(base_dfrm, dt_target, duration)
            dfrm_running = generate_std_dev(dfrm_running, dt_target, duration)
            #dfrm_running = generate_ema(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_std_devs(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_volume(dfrm_running, dt_target, duration)
            dfrm_running = generate_pcntle_closing(dfrm_running, dt_target, duration)
            dfrm_running = generate_stcstc_oscillator(dfrm_running, dt_target, duration)
            # dfrm_running = generate_force_index(dfrm_running, dt_target, duration)
            # dfrm_running = generate_williams_r(dfrm_running, dt_target, duration) # Very similar to Stcstc Oscillator. Keep one
            dfrm_running = generate_accumulation_dist(dfrm_running, dt_target, duration)
            dfrm_running = generate_bollinger_bands(dfrm_running, dt_target, duration)

            # Keep rows with non-NaN values
            dfrm_running = dfrm_running[dfrm_running['mean_' + str(duration)].notna()]
            # dfrm_running
            if len(dfrm_final) == 0:
                dfrm_final = dfrm_running.copy()
            else:
                columns = dfrm_running.columns.difference(dfrm_final.columns)
                dfrm_final = pd.merge(dfrm_final, dfrm_running[columns], left_index=True, right_index=True)
                # if macd_durations in DURATIONS:
    if EXPORT:
            if dfrm_final is not None and len(dfrm_final) > 0:
                filePath, _ = generate_file_path(symbol)
                print('Saving data to {}'.format(filePath))
                dfrm = pd.DataFrame()
                if os.path.exists(filePath):  # Append data to existing CSV
                    dfrm_existing = pd.read_csv(filePath)
                    print(dfrm_existing.tail(10))
                    if dfrm_existing is not None and len(dfrm_existing) > 0:
                        print(len(dfrm_existing))
                        dfrm_existing.set_index('date', inplace=True)
                        frames = [dfrm_existing, dfrm_final]
                        dfrm = pd.concat(frames)
                    print(len(dfrm_final))
                    print(len(dfrm))
                    dfrm.to_csv(filePath, sep=',')
                    print(f'Appended data to {filePath}')
                else:  # Directly write final dfrm to CSV file
                    dfrm_final.to_csv(filePath, sep=',')
                    print(f'Added new file at {filePath}')




Generating data for technical indicators for symbol: SNOW
/Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/snow.csv
Generating data since 2025-04-29 00:00:00


Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,bollingerUpper_200,bollingerLower_200
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


Generating data for technical indicators for symbol: PFE
/Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/pfe.csv
Generating data since 2025-04-29 00:00:00


Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,PFE,23.79,98540187,25.95,1.72,,99.26,12.5,27.23,-8470000.0,...,1.05,-4980000.0,21.68,28.27,24.98,41.35,26.98,,98.41,1.65


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/pfe.csv
            date symbol  close    volume  mean_200  stddev_200  \
6095  2025-04-14    PFE  22.12  61900899     26.44        1.62   
6096  2025-04-15    PFE  22.44  51814669     26.39        1.64   
6097  2025-04-16    PFE  22.04  37641162     26.34        1.67   
6098  2025-04-17    PFE  22.14  54073683     26.31        1.70   
6099  2025-04-21    PFE  22.04  32021006     26.22        1.71   
6100  2025-04-22    PFE  22.53  36052602     26.18        1.73   
6101  2025-04-23    PFE  22.39  50781008     26.14        1.75   
6102  2025-04-24    PFE  22.78  35009622     26.11        1.76   
6103  2025-04-25    PFE  22.92  35847676     26.09        1.78   
6104  2025-04-28    PFE  23.05  51892600     25.99        1.73   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
6095              91.97             95.62               2.92            6.16  

Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,MRK,84.71,15979097,95.11,8.31,,89.71,14.71,23.52,-3260000.0,...,3.7,-2780000.0,75.51,100.47,87.99,35.36,31.75,,79.37,6.24


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/mrk.csv
            date symbol  close    volume  mean_200  stddev_200  \
6095  2025-04-14    MRK  79.17  15796686     97.52        8.16   
6096  2025-04-15    MRK  78.39  16042488     97.26        8.21   
6097  2025-04-16    MRK  76.46  16551293     96.99        8.28   
6098  2025-04-17    MRK  78.00  21416485     96.85        8.41   
6099  2025-04-21    MRK  77.85  11009959     96.35        8.25   
6100  2025-04-22    MRK  78.97  12918169     96.12        8.29   
6101  2025-04-23    MRK  78.74  17935744     95.89        8.34   
6102  2025-04-24    MRK  79.84  18471542     95.77        8.42   
6103  2025-04-25    MRK  82.74  28377956     95.68        8.46   
6104  2025-04-28    MRK  83.19  14209214     95.29        8.35   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
6095              19.71             94.16               2.19            4.36  

Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,JNJ,155.91,5246725,155.18,7.01,,81.62,58.82,54.02,302186.24,...,3.73,-1270000.0,147.87,169.28,158.58,37.98,42.86,,63.49,5.35


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/jnj.csv
            date symbol   close    volume  mean_200  stddev_200  \
6096  2025-04-14    JNJ  154.36  10685338    155.61        7.14   
6097  2025-04-15    JNJ  153.62  10847653    155.55        7.13   
6098  2025-04-16    JNJ  153.91   7987296    155.50        7.11   
6099  2025-04-17    JNJ  157.47  10981067    155.51        7.09   
6100  2025-04-21    JNJ  156.92   6285438    155.39        7.08   
6101  2025-04-22    JNJ  157.75   7136321    155.36        7.07   
6102  2025-04-23    JNJ  155.38   9099438    155.33        7.06   
6103  2025-04-24    JNJ  154.93   8310425    155.33        7.03   
6104  2025-04-25    JNJ  154.58   8643927    155.32        7.01   
6105  2025-04-28    JNJ  155.35   5815179    155.22        7.02   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
6096              34.31             95.62              43.07       

Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,REGN,568.91,2660427,723.86,106.57,,100.0,5.88,4.79,-846134.08,...,26.34,-797923.11,540.87,759.31,650.09,11.35,12.7,,100.0,54.61


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/regn.csv
            date symbol   close   volume  mean_200  stddev_200  \
5814  2025-04-14   REGN  571.06   946329    758.53      125.65   
5815  2025-04-15   REGN  557.91   838663    755.01      124.46   
5816  2025-04-16   REGN  549.28   875713    751.41      123.21   
5817  2025-04-17   REGN  563.16   827805    750.04      123.80   
5818  2025-04-21   REGN  561.49   745717    742.13      117.49   
5819  2025-04-22   REGN  585.49  1036812    738.97      115.87   
5820  2025-04-23   REGN  587.85   791945    735.85      114.15   
5821  2025-04-24   REGN  599.76  1317237    734.85      114.32   
5822  2025-04-25   REGN  602.64   964394    733.89      114.46   
5823  2025-04-28   REGN  610.86  1175409    727.02      108.31   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
5814              35.77             64.96               2.92            4.89 

Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,NVO,65.16,10923695,89.76,17.2,,95.59,11.76,11.43,-1960000.0,...,3.11,-2450000.0,55.54,96.59,76.06,21.54,25.4,,90.48,10.26


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/nvo.csv
            date symbol  close    volume  mean_200  stddev_200  \
6096  2025-04-14    NVO  66.06   8067341     94.08       16.79   
6097  2025-04-15    NVO  64.49   5945621     93.64       16.77   
6098  2025-04-16    NVO  62.88   6302636     93.22       16.81   
6099  2025-04-17    NVO  58.08  32220629     92.97       17.02   
6100  2025-04-21    NVO  58.33  13419321     92.16       16.98   
6101  2025-04-22    NVO  59.91  11697878     91.76       17.09   
6102  2025-04-23    NVO  61.30   9128730     91.36       17.17   
6103  2025-04-24    NVO  62.63   7891068     91.15       17.28   
6104  2025-04-25    NVO  62.08   7835183     90.94       17.39   
6105  2025-04-28    NVO  62.62   6267092     90.15       17.23   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
6096              56.20             94.89               5.11            7.69  

Unnamed: 0_level_0,symbol,close,volume,mean_200,stddev_200,pcntleStdDevs_200,pcntleVolume_200,pcntleClosing_200,oscillator_200,accu_dist_200,...,stddev_30,accu_dist_90,bollingerLower_90,bollingerUpper_90,mean_90,oscillator_90,pcntleClosing_90,pcntleStdDevs_90,pcntleVolume_90,stddev_90
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-29,AAPL,211.21,36827633,229.3,16.27,,19.12,13.97,44.79,-5540000.0,...,13.03,2300000.0,184.78,257.57,221.17,51.94,30.16,,14.29,18.2


Saving data to /Users/peeyushsharma/Dropbox/workspace/HelloPython/HistoricalMarketData/TechnicalIndicators/aapl.csv
            date symbol   close     volume  mean_200  stddev_200  \
5815  2025-04-14   AAPL  202.52  101352911    231.05       14.38   
5816  2025-04-15   AAPL  202.14   51343872    230.87       14.59   
5817  2025-04-16   AAPL  194.27   59732423    230.62       14.92   
5818  2025-04-17   AAPL  196.98   52164675    230.38       15.13   
5819  2025-04-21   AAPL  193.16   46742537    230.14       15.57   
5820  2025-04-22   AAPL  199.74   52976371    229.95       15.78   
5821  2025-04-23   AAPL  204.60   52929165    229.79       15.93   
5822  2025-04-24   AAPL  208.37   47310989    229.63       15.97   
5823  2025-04-25   AAPL  209.28   38222258    229.49       16.01   
5824  2025-04-28   AAPL  210.14   37626816    229.43       16.20   

      pcntleStdDevs_200  pcntleVolume_200  pcntleClosing_200  oscillator_200  \
5815              69.34             94.89              