In [1]:
import os
import glob
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

### Get stock data to use as transient data sets

In [2]:
tickers = ['GOOG', 'NFLX', 'MSFT', 'LYFT', 'TSLA', 'AMZN', 'AAPL', 'NVDA', 'META', 'AMD']

def get_stock_data(ticker):
    t = yf.Ticker(ticker)
    df = t.history(period='max').reset_index()
    df['Ticker'] = ticker
    df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%d')
    return df


for ticker in tickers:
    df = get_stock_data(ticker)
    df.to_csv(f'stocks/{ticker}.csv', index=False)

### Build sample job streams

In [16]:
def apply_factors(df, factor):
    columns = ['Open', 'High', 'Low', 'Close']
    for column in columns:
        df[column] = df[column] * factor
    return df

def build_scalar_file(files):
    df = pd.concat([pd.read_csv(f) for f in files]).reset_index(drop=True)
    df = df.loc[df.groupby(['Ticker'])['Close'].idxmax()]
    return df

# Scale factor is applied to the data so that
# each job stream will have unique transient
# data
names_and_factor = [
    ('JobStream1-2022-01-01__211242', 1),
    ('JobStream2-2022-02-14__224212', 1.2),
    ('JobStream3-2022-01-13__029321', 1.35),
    ('JobStream4-2022-05-27__734343', 1.5),
]

tr_files = sorted(glob.glob('stocks/*.csv'))

for name, factor in names_and_factor:

    # Build Plots folder
    plots_path = os.path.join('sample_jobstreams', name, 'Plots')
    if not os.path.isdir(plots_path):
        os.makedirs(plots_path)
    for i, f in enumerate(tr_files, start=1):
        plots_subfolder_path = os.path.join(plots_path, f'Plots_P{i}')
        if not os.path.isdir(plots_subfolder_path):
            os.makedirs(plots_subfolder_path)
        
        df = pd.read_csv(f)
        df = apply_factors(df, factor)
        df.to_csv(os.path.join(plots_subfolder_path, f'Plots_P{i}_data.csv'), index=False)

        for column in ['Open', 'High', 'Low', 'Close']:
            fig, ax = plt.subplots(nrows=1, ncols=1)
            ax.plot(df.index, df[column])
            ax.set_title(column)
            ax.set_xlabel('Index')
            ax.set_ylabel('Price ($)')
            fig.savefig(os.path.join(plots_subfolder_path, f'Plots_P{i}_{column}.png'))
            plt.close()

    # Build Scalar folder
    scalar_path = os.path.join('sample_jobstreams', name, 'Scalar', 'Scalar_Base')
    if not os.path.isdir(scalar_path):
        os.makedirs(scalar_path)
    
    df_scalar = build_scalar_file(tr_files)
    writer = pd.ExcelWriter(os.path.join(scalar_path, 'Scalar_Base.xlsx'), engine='openpyxl')
    df_scalar.to_excel(writer, index=False)
    writer.close()

