## Stock History Generator

This notebook will run similar to the Stock Generator App, except this notebook generates a history of data as quickly as it can between the start/end dates specified. By default, the app will simulate a new reading every 1 second, but only record the result every 1 minute. When complete, results will be written to the specified table (created if necessary). Data will overwrite existing symbol/timestamps (default). 

Each full day takes approx 6 seconds to generate.

In [None]:
import time
import os
import datetime
import json
import math
import random

from datetime import timedelta
from pyspark import SparkFiles
from pyspark.sql.functions import col
from enum import Enum

write_to_table = False
target_table = "raw_stock_test"

write_to_csv = False
csv_path = f"Files/stockhistory/manual/"

In [None]:
# for parameter preparation, get min/max in current target table
if write_to_table & spark.catalog.tableExists(target_table):
    df = spark.sql(f"SELECT min(timestamp) as mindate, max(timestamp) as maxdate FROM {target_table}")
    df.show()

In [None]:
# configure start/end dates

# set dates manually:
startdatetime = "2024-03-01 00:00:00"
enddatetime = "2024-03-05 23:59:00"

startdatetime = datetime.datetime.strptime(startdatetime, '%Y-%m-%d %H:%M:%S')
enddatetime = datetime.datetime.strptime(enddatetime, '%Y-%m-%d %H:%M:%S')

# # set dates by current date/time:
# enddatetime = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
# startdatetime = enddatetime + datetime.timedelta(days=-30)

timestamp = startdatetime

print(f'Start date: {startdatetime}')
print(f'End date: {enddatetime}')
print(f'Starting Timestamp: {timestamp}')


In [None]:
# set up variables -- most of these should not be modified 
# unless you are sure you are sure of the impact

 # seconds to add each interval/tick, default 1
intervalInSeconds = 1

# how often to record values, 60 = once per minute
writeFrequency = 60

# how often to print status - default once per day of data
statusFrequency = 86400

# seconds to sleep between intervals/ticks, default 0
sleepTime = 0 

In [None]:
# merge the predicitions with the table in the lakehouse

from delta.tables import *

def create_raw_table_if_needed():
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS {target_table} (
            symbol VARCHAR(5)
            ,price DOUBLE
            ,timestamp TIMESTAMP
            )
        USING DELTA
        """)

def write_rawstock(df, overwrite = False):

    raw_stock_data = DeltaTable.forName(spark, target_table)

    raw_stock_data.alias('raw') \
    .merge( \
        df.alias('StockData'),
        'raw.timestamp = StockData.timestamp and raw.symbol = StockData.symbol'
    ) \
    .whenMatchedUpdate(set =
        {
            "price": f"CASE WHEN '{str(overwrite)}' == 'True' THEN StockData.Price ELSE raw.price END"
        }
    ) \
    .whenNotMatchedInsert(values =
        {
            "symbol": "StockData.symbol"
            ,"price": "StockData.price"
            ,"timestamp": "StockData.timestamp"
        }
    ) \
    .execute()


## Generator Parameters

The 3 main parameters are:

Stocks: each stock has a number of parameters that determine behavior/statistics

Events: Occasional triggers that causes all stocks to rise/fall

Timers: Follows time-based rules and influences the behavior of specified stocks

In [None]:
# # cell is frozen as these are baseline parameters as used in the container. 
# # recommend keeping these as-is for reference: copy/edit as desired in next cell

# # stocks json
# StocksJson = '{"stocks": [ \
#     {"name":"WHO","startprice":600,"minprice":100,"maxprice":1200,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.510,"increasechance20-40":0.505,"increasechance40-60":0.500,"increasechance60-80":0.484,"increasechance80-100":0.442,"annualgrowthrate":0.08}, \
#     {"name":"WHAT","startprice":500,"minprice":50,"maxprice":1050,"mu":0.04,"sigma":0.8,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.510,"increasechance20-40":0.502,"increasechance40-60":0.500,"increasechance60-80":0.481,"increasechance80-100":0.442,"annualgrowthrate":0.07}, \
#     {"name":"IDK","startprice":500,"minprice":100,"maxprice":1100,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.535,"increasechance20-40":0.540,"increasechance40-60":0.520,"increasechance60-80":0.500,"increasechance80-100":0.475,"annualgrowthrate":0.065}, \
#     {"name":"WHY","startprice":550,"minprice":25,"maxprice":1200,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.515,"increasechance20-40":0.515,"increasechance40-60":0.503,"increasechance60-80":0.480,"increasechance80-100":0.442,"annualgrowthrate":-0.02}, \
#     {"name":"BCUZ","startprice":300,"minprice":5,"maxprice":950,"mu":0.03,"sigma":0.7,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.520,"increasechance20-40":0.510,"increasechance40-60":0.505,"increasechance60-80":0.500,"increasechance80-100":0.465,"annualgrowthrate":0.06}, \
#     {"name":"TMRW","startprice":500,"minprice":50,"maxprice":1100,"mu":0.07,"sigma":1.0,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.530,"increasechance20-40":0.520,"increasechance40-60":0.515,"increasechance60-80":0.502,"increasechance80-100":0.430,"annualgrowthrate":0.052}, \
#     {"name":"TDY","startprice":700,"minprice":225,"maxprice":1250,"mu":0.07,"sigma":1.0,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.530,"increasechance20-40":0.520,"increasechance40-60":0.515,"increasechance60-80":0.502,"increasechance80-100":0.430,"annualgrowthrate":0.02}, \
#     {"name":"IDGD","startprice":500,"minprice":50,"maxprice":1050,"mu":0.04,"sigma":0.8,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.503,"increasechance20-40":0.500,"increasechance40-60":0.496,"increasechance60-80":0.492,"increasechance80-100":0.451,"annualgrowthrate":0.055} \
#     ]}'

# # events are occasional triggers that move all stocks in a direction for a set of time
# EventsJson = '{"events": [ \
#     {"type": "periodic", "name": "900-up", "frequency":900, "increasechance":1.0, "duration": 60, "modifier": 0.5}, \
#     {"type": "periodic", "name": "5220-down", "frequency":5220, "increasechance":0.0, "duration": 30, "modifier": 0.5}, \
#     {"type": "random", "name": "Rando1", "frequency": 0.003, "increasechance": 0.504, "duration": 30, "modifier": 0.4} \
#     ]}'

# # timers are stacked time/date based and can be applied to specific stocks
# TimersJson = '{"timers":[ \
#     {"name": "Workdays", "start":"08:00:00", "end":"18:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.0004, "appliedTo": "WHO|WHAT|IDK|WHY|BCUZ|TMRW|TDY|IDGD"}, \
#     {"name": "Evening Decline", "start":"22:00:00", "end":"23:59:59", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.03, "appliedTo": "WHO|WHAT|IDK|WHY|IDGD"}, \
#     {"name": "Morning Rise", "start":"04:00:00", "end":"06:00:00", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.0003, "appliedTo": "WHO|WHAT|IDK|WHY|BCUZ|TMRW|TDY|IDGD"}, \
#     {"name": "ET Business Hours MWF", "start":"14:00:00", "end":"22:00:00", "days":"0|2|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.04, "appliedTo": "WHO|WHAT|WHY"}, \
#     {"name": "WHO Fridays", "start":"14:00:00", "end":"22:00:00", "days":"4", "dayofmonth":"all", "months": "1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.06, "appliedTo": "WHO"}, \
#     {"name": "GMT Business Hours M-F", "start":"08:00:00", "end":"17:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.015, "appliedTo": "TMRW|TDY"}, \
#     {"name": "Weekend Slide", "start":"00:00:00", "end":"23:59:59", "days":"5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.012, "appliedTo": "TMRW|TDY"}, \
#     {"name": "GMT Business Hours M", "start":"07:00:00", "end":"18:00:00", "days":"0", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.025, "appliedTo": "TMRW|TDY"}, \
#     {"name": "Lunch Slump", "start":"12:00:00", "end":"13:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.01, "appliedTo": "WHO|WHAT|IDK|WHY|IDGD"}, \
#     {"name": "Hour of Darkness", "start":"01:00:00", "end":"02:00:00", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.055, "appliedTo": "IDGD"}, \
#     {"name": "Happy Wednesdays", "start":"01:00:00", "end":"23:00:00", "days":"2", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.035, "appliedTo": "IDGD"}, \
#     {"name": "BCUZ Weekends", "start":"00:00:00", "end":"23:59:59", "days":"5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.026, "appliedTo": "BCUZ"} \
#     ]}'

# printOnlyErrors = True
# maxErrorCount = 15
# useGrowthRate = True
# growthInceptionDate = datetime.datetime.strptime("2023-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')

In [None]:
# stocks json
StocksJson = '{"stocks": [ \
    {"name":"WHO","startprice":600,"minprice":100,"maxprice":1200,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.510,"increasechance20-40":0.505,"increasechance40-60":0.500,"increasechance60-80":0.484,"increasechance80-100":0.442,"annualgrowthrate":0.08}, \
    {"name":"WHAT","startprice":500,"minprice":50,"maxprice":1050,"mu":0.04,"sigma":0.8,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.510,"increasechance20-40":0.502,"increasechance40-60":0.500,"increasechance60-80":0.481,"increasechance80-100":0.442,"annualgrowthrate":0.07}, \
    {"name":"IDK","startprice":500,"minprice":100,"maxprice":1100,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.535,"increasechance20-40":0.540,"increasechance40-60":0.520,"increasechance60-80":0.500,"increasechance80-100":0.475,"annualgrowthrate":0.065}, \
    {"name":"WHY","startprice":550,"minprice":25,"maxprice":1200,"mu":0.04,"sigma":0.9,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.515,"increasechance20-40":0.515,"increasechance40-60":0.503,"increasechance60-80":0.480,"increasechance80-100":0.442,"annualgrowthrate":-0.02}, \
    {"name":"BCUZ","startprice":300,"minprice":5,"maxprice":950,"mu":0.03,"sigma":0.7,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.520,"increasechance20-40":0.510,"increasechance40-60":0.505,"increasechance60-80":0.500,"increasechance80-100":0.465,"annualgrowthrate":0.06}, \
    {"name":"TMRW","startprice":500,"minprice":50,"maxprice":1100,"mu":0.07,"sigma":1.0,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.530,"increasechance20-40":0.520,"increasechance40-60":0.515,"increasechance60-80":0.502,"increasechance80-100":0.430,"annualgrowthrate":0.052}, \
    {"name":"TDY","startprice":700,"minprice":225,"maxprice":1250,"mu":0.07,"sigma":1.0,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.530,"increasechance20-40":0.520,"increasechance40-60":0.515,"increasechance60-80":0.502,"increasechance80-100":0.430,"annualgrowthrate":0.02}, \
    {"name":"IDGD","startprice":500,"minprice":50,"maxprice":1050,"mu":0.04,"sigma":0.8,"correctionchance":0.01,"correctionlength":60,"correctionmodifier":0.4,"increasechance0-20":0.503,"increasechance20-40":0.500,"increasechance40-60":0.496,"increasechance60-80":0.492,"increasechance80-100":0.451,"annualgrowthrate":0.055} \
    ]}'

# events are occasional triggers that move all stocks in a direction for a set of time
EventsJson = '{"events": [ \
    {"type": "periodic", "name": "900-up", "frequency":900, "increasechance":1.0, "duration": 60, "modifier": 0.5}, \
    {"type": "periodic", "name": "5220-down", "frequency":5220, "increasechance":0.0, "duration": 30, "modifier": 0.5}, \
    {"type": "random", "name": "Rando1", "frequency": 0.003, "increasechance": 0.504, "duration": 30, "modifier": 0.4} \
    ]}'

# timers are stacked time/date based and can be applied to specific stocks
TimersJson = '{"timers":[ \
    {"name": "Workdays", "start":"08:00:00", "end":"18:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.0004, "appliedTo": "WHO|WHAT|IDK|WHY|BCUZ|TMRW|TDY|IDGD"}, \
    {"name": "Evening Decline", "start":"22:00:00", "end":"23:59:59", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.03, "appliedTo": "WHO|WHAT|IDK|WHY|IDGD"}, \
    {"name": "Morning Rise", "start":"04:00:00", "end":"06:00:00", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.0003, "appliedTo": "WHO|WHAT|IDK|WHY|BCUZ|TMRW|TDY|IDGD"}, \
    {"name": "ET Business Hours MWF", "start":"14:00:00", "end":"22:00:00", "days":"0|2|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.04, "appliedTo": "WHO|WHAT|WHY"}, \
    {"name": "WHO Fridays", "start":"14:00:00", "end":"22:00:00", "days":"4", "dayofmonth":"all", "months": "1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.06, "appliedTo": "WHO"}, \
    {"name": "GMT Business Hours M-F", "start":"08:00:00", "end":"17:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.015, "appliedTo": "TMRW|TDY"}, \
    {"name": "Weekend Slide", "start":"00:00:00", "end":"23:59:59", "days":"5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.012, "appliedTo": "TMRW|TDY"}, \
    {"name": "GMT Business Hours M", "start":"07:00:00", "end":"18:00:00", "days":"0", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.025, "appliedTo": "TMRW|TDY"}, \
    {"name": "Lunch Slump", "start":"12:00:00", "end":"13:00:00", "days":"0|1|2|3|4", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.01, "appliedTo": "WHO|WHAT|IDK|WHY|IDGD"}, \
    {"name": "Hour of Darkness", "start":"01:00:00", "end":"02:00:00", "days":"0|1|2|3|4|5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":-0.055, "appliedTo": "IDGD"}, \
    {"name": "Happy Wednesdays", "start":"01:00:00", "end":"23:00:00", "days":"2", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.035, "appliedTo": "IDGD"}, \
    {"name": "BCUZ Weekends", "start":"00:00:00", "end":"23:59:59", "days":"5|6", "dayofmonth":"all", "months":"1|2|3|4|5|6|7|8|9|10|11|12", "modifier":0.026, "appliedTo": "BCUZ"} \
    ]}'

printOnlyErrors = True
maxErrorCount = 15
useGrowthRate = True
growthInceptionDate = datetime.datetime.strptime("2023-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')


In [None]:
class StockVariables:
    def __init__(self, stockVariables) -> None:
        self.name = stock["name"]
        self.startPrice = float(stock["startprice"])
        self.minPrice = float(stock["minprice"])
        self.maxPrice = float(stock["maxprice"])
        self.currentPrice = float(stock["startprice"])
        self.mu = float(stock["mu"])
        self.sigma = float(stock["sigma"])
        self.correctionChance = float(stock["correctionchance"])
        self.correctionLength = int(stock["correctionlength"])
        self.correctionCounter = 0
        self.correctionModifier = float(stock["correctionmodifier"])
        self.isInCorrection = False
        self.isCorrectionUpwards = True
        self.aboveStartingCount = 1
        self.belowStartingCount = 1
        self.moveUpCount = 1
        self.moveDownCount = 1
        self.increaseChance_0_20 = float(stock["increasechance0-20"])
        self.increaseChance_20_40 = float(stock["increasechance20-40"])
        self.increaseChance_40_60 = float(stock["increasechance40-60"])
        self.increaseChance_60_80 = float(stock["increasechance60-80"])
        self.increaseChance_80_100 = float(stock["increasechance80-100"])
        self.growthRateAnnual = float(stock["annualgrowthrate"])
        self.growthRateDaily = self.growthRateAnnual / 365
    
    def getMaxPrice(self):
        if useGrowthRate and self.growthRateAnnual != 0:
            # get days since inception
            daysSinceInception = (timestamp - growthInceptionDate).days
            # calculate growth rate
            growthRateModifier = math.pow(1 + self.growthRateDaily, daysSinceInception)
            # apply growth rate
            newMaxPrice = self.maxPrice * growthRateModifier
            # make sure new max price is greater than min price
            if newMaxPrice < (self.minPrice + 1):
                self.growthRateDaily = abs(self.growthRateDaily)
                newMaxPrice = self.minPrice + 1
                
            return newMaxPrice
        else:
            return self.maxPrice
    
    def getPriceRange(self):
        return self.currentPrice / (self.getMaxPrice() - self.minPrice)
    
    def getIncreaseChance(self, timerModifier = 0.0):

        r = self.getPriceRange()
        increaseChance = self.increaseChance_0_20
        if (r >= 0.80): increaseChance = self.increaseChance_80_100
        elif (r >= 0.60): increaseChance = self.increaseChance_60_80
        elif (r >= 0.40): increaseChance = self.increaseChance_40_60
        elif (r >= 0.20): increaseChance = self.increaseChance_20_40

        increaseChance = increaseChance + timerModifier
        if increaseChance < 0.0: increaseChance = 0.0
        if increaseChance > 1.0: increaseChance = 1.0
        
        return increaseChance

In [None]:
dataTable = []

class MessageType(Enum):
    INFO = 1
    ERROR = 2

def printMsg(message, messageType = MessageType.INFO):
    if (printOnlyErrors == False) or (printOnlyErrors and messageType == MessageType.ERROR):
        print(message)

In [None]:
numEvents = 0
numTimers = 0
numStocks = 0

try:
    AllStocks = json.loads(StocksJson)
    numStocks = len(AllStocks['stocks'])

    if numStocks > 0:
        for stock in AllStocks['stocks']:
            dataTable.append(StockVariables(stock))
            print(f'Stock: {stock["name"]} {str(stock["startprice"])} {str(stock["minprice"])} {str(stock["maxprice"])} ' \
                f'{str(stock["mu"])} {str(stock["sigma"])} {str(stock["correctionchance"])} {str(stock["correctionlength"])} ' \
                f'{str(stock["correctionmodifier"])} {str(stock["increasechance0-20"])} {str(stock["increasechance20-40"])} ' \
                f'{str(stock["increasechance40-60"])} {str(stock["increasechance60-80"])} {str(stock["increasechance80-100"])} ' \
                f'{str(stock["annualgrowthrate"])}')
        
except Exception as e:
    numStocks = 0
    print("Error parsing JSON, cannot continue without stocks.")
    print(e)
    raise e

try:
    AllEvents = json.loads(EventsJson)
    numEvents = len(AllEvents['events'])

    if numEvents > 0:
        for event in AllEvents['events']:
            event['durationCount'] = event['duration']
            if (event['type'] == 'periodic'):
                event['frequencyCount'] = event['frequency'] 
            print(f'Event: {event["name"]} {str(event["type"])} {str(event["frequency"])}' \
                f'{str(event["duration"])} {str(event["increasechance"])}')

except Exception as e:
    numEvents = 0
    print(f"{datetime.datetime.utcnow()} Error parsing Events JSON: {e}")
    raise e

try:
    AllTimers = json.loads(TimersJson)
    numTimers = len(AllTimers['timers'])

    if numTimers > 0:
        for timer in AllTimers['timers']:
            timer['start'] = datetime.datetime.strptime(timer['start'], "%H:%M:%S").time()
            timer['end'] = datetime.datetime.strptime(timer['end'], "%H:%M:%S").time()
            print(f'Timer: {timer["name"]} {str(timer["start"])} {str(timer["end"])} ' \
                f'{str(timer["days"])} {str(timer["dayofmonth"])} {str(timer["months"])} {str(timer["appliedTo"])} {str(timer["modifier"])}')   
                
except Exception as e:
    numTimers = 0
    print(f"{datetime.datetime.utcnow()} Error parsing Timers JSON: {e}")
    raise e


In [None]:
isEvent = False
currentEvent = ""
count = 0
errorCount = 0

oldtimestamp = timestamp - timedelta(days=1)
readings = []

print("Starting generator...")
rountine_start_time = datetime.datetime.utcnow()
interval_start_time = datetime.datetime.utcnow()

while True:

    if numEvents > 0 and isEvent == False:
        for event in AllEvents['events']:
            if (event['type'] == 'periodic' and event['frequencyCount'] <= 0):
                # periodic event triggered
                event['frequencyCount'] = event['frequency']
                event['durationCount'] = event['duration']
                event['isIncreasing'] = random.random() < event['increasechance']
                currentEvent = event
                isEvent = True
                printMsg(f'{event["name"]} Event Triggered ({"UP" if event["isIncreasing"] else "DOWN"})')
                break
            elif (event['type'] == 'random' and random.random() < event['frequency']):
                # random event triggered
                event['durationCount'] = event['duration']
                event['isIncreasing'] = random.random() < event['increasechance']
                currentEvent = event
                isEvent = True
                printMsg(f'{event["name"]} Event Triggered ({"UP" if event["isIncreasing"] else "DOWN"})')
                break

    for record in dataTable:

        # apply timers
        # modifier is cumulative across all timers
        currentTimerModifier = 0.0
        appliedTimers = 0
        if numTimers > 0:
            for timer in AllTimers['timers']:
                if (timer['start'] <= timestamp.time() <= timer['end'] 
                        and (timer['appliedTo'] == 'all' or record.name in timer['appliedTo'].split('|'))
                        and str(timestamp.weekday()) in timer['days'].split('|')
                        and str(timestamp.month) in timer['months'].split('|')
                        and (timer['dayofmonth'] == 'all' or str(timestamp.day) in timer['dayofmonth'].split('|'))
                        ):
                    currentTimerModifier += timer['modifier']
                    appliedTimers += 1

        # priceIncDec = abs(price - (random.normalvariate(record.mu, record.sigma) * price))
        priceIncDec = abs(round(random.normalvariate(record.mu, record.sigma),2))

        priceIncrease = random.random() < record.getIncreaseChance(currentTimerModifier)

        if isEvent:
            if currentEvent['durationCount'] <= 0:
                isEvent = False
            else:
                priceIncrease = currentEvent['isIncreasing'] # force direction if in correction
                priceIncDec = priceIncDec * currentEvent['modifier'] # make corrections more gradual
            record.isInCorrection = False # force individual corrections off if event

        else:
            if record.isInCorrection == False and random.random() < record.correctionChance:
                record.isInCorrection = True
                record.correctionCounter = record.correctionLength
                record.isCorrectionUpwards = random.random() < record.getIncreaseChance(currentTimerModifier)

        if record.isInCorrection:
            if record.correctionCounter <= 0:
                record.isInCorrection = False
            else:
                priceIncrease = record.isCorrectionUpwards # force direction if in correction
                priceIncDec = priceIncDec * record.correctionModifier # make corrections more gradual
                record.correctionCounter -= 1

        if priceIncrease:
            newPrice = round(record.currentPrice + priceIncDec,2)
            newPrice = round(newPrice if newPrice < record.getMaxPrice() else record.getMaxPrice(),2)
            record.moveUpCount += 1
            #increase
        else:
            newPrice = round(record.currentPrice - priceIncDec,2)
            newPrice = (newPrice if newPrice > record.minPrice else record.minPrice)
            record.moveDownCount += 1
            #decrease

        record.currentPrice = newPrice
        if (record.currentPrice > record.startPrice):
            record.aboveStartingCount += 1
        else:
            record.belowStartingCount += 1
        
        reading = {'symbol': record.name, 'price': newPrice, 'timestamp': str(timestamp)}

        if count % writeFrequency == 0:
            readings.append(reading)
             
    if count % statusFrequency == 0:
            interval_finish_time = datetime.datetime.utcnow()
            interval_elap = interval_finish_time - interval_start_time
            s = json.dumps(reading)
            print(f"{datetime.datetime.utcnow()} ({interval_elap.total_seconds()}s) Last row: {s}")
            interval_start_time = datetime.datetime.utcnow()

    count += 1

    if isEvent:
        currentEvent["durationCount"] -= 1
    
    if numEvents > 0:
        for event in AllEvents['events']:
            if (event['type'] == 'periodic'):
                event['frequencyCount'] -= 1
        
    oldtimestamp = timestamp
    timestamp = timestamp + datetime.timedelta(seconds=intervalInSeconds)
    if (timestamp > enddatetime):
        print(f'Ending. Current timestamp: {timestamp}')
        break
    
    if sleepTime>0:
        time.sleep(sleepTime)


## Process all readings

Load readings into dataframe

In [None]:
# load the readings list into a dataframe

readings_df = spark.read.json(sc.parallelize(readings))
display(readings_df)

## Write to Delta table

Use the cells below to customize the writing of the data to a Delta table

In [None]:
# merge the predicitions with the table in the lakehouse

from delta.tables import *

def create_raw_table_if_needed():
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS {target_table} (
            symbol VARCHAR(5)
            ,price DOUBLE
            ,timestamp TIMESTAMP
            )
        USING DELTA
        """)

def write_rawstock(df, overwrite = False):

    raw_stock_data = DeltaTable.forName(spark, target_table)

    raw_stock_data.alias('raw') \
    .merge( \
        df.alias('StockData'),
        'raw.timestamp = StockData.timestamp and raw.symbol = StockData.symbol'
    ) \
    .whenMatchedUpdate(set =
        {
            "price": f"CASE WHEN '{str(overwrite)}' == 'True' THEN StockData.Price ELSE raw.price END"
        }
    ) \
    .whenNotMatchedInsert(values =
        {
            "symbol": "StockData.symbol"
            ,"price": "StockData.price"
            ,"timestamp": "StockData.timestamp"
        }
    ) \
    .execute()

In [None]:
if write_to_table:
    
    # will create destination table if not exists
    create_raw_table_if_needed()

    # merge the data
    write_rawstock(readings_df, True)
    spark.sql(f"SELECT min(timestamp) as mindate, max(timestamp) as maxdate FROM {target_table}").show()


In [None]:
# utility statements - use with caution!

# spark.sql(f"DELETE FROM {target_table}")
# spark.sql(f"DROP TABLE {target_table}")

## Write to CSV

Use the cells below to customize the writing of the data to a CSV file

In [None]:
import pyspark.sql
import pyspark.sql.functions

def writeCsvFiles(df):
    df \
        .withColumn("year", pyspark.sql.functions.year('timestamp')) \
        .withColumn("month", pyspark.sql.functions.month('timestamp')) \
        .write.option("header","true") \
        .partitionBy("year","month") \
        .mode("overwrite") \
        .csv(csv_path)

def loadCsvFiles():
    df = (
        spark.read.format("csv")
        .option("header", "true")
        .load(f"{csv_path}*/*/*.csv")
    )
    return df

if (write_to_csv):
    writeCsvFiles(readings_df)
    df_loaded = loadCsvFiles()
    display(df_loaded)


In [None]:
# utlity cell to delete the folder created above

def deleteFolder(folder):
    if mssparkutils.fs.exists(folder):
        mssparkutils.fs.rm(folder, True)

# deleteFolder(csv_path)

## Visualization

View all data. Resample as needed depending on data size.

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

readings_df_pd = readings_df.toPandas()
symbols_pd = sorted(readings_df_pd['symbol'].unique())

fig = go.Figure()

for symbol in symbols_pd:
    # print(symbol)
    dftemp = readings_df_pd.loc[readings_df_pd['symbol'] == symbol][["timestamp","price"]]
    dftemp = dftemp.set_index(pd.DatetimeIndex(dftemp["timestamp"])).drop("timestamp", axis=1)

    # use resample when graphing to limit data points on graph
    # dftemp = dftemp.resample("D").mean()
    # dftemp = dftemp.resample("5min").mean()
    dftemp = dftemp.resample("1H").mean()
 
    dftemp.reset_index(inplace = True)

    fig.add_trace(go.Scatter(x=dftemp['timestamp'], y=dftemp['price'], name=symbol, line=dict(width=1)))

fig.update_layout(title="Generated Data", showlegend=True)
fig.show()