# Generate Stocks Demo Data

We use Yahoo finance api to generate some historic data along some dummy data to intially fill key-value, time-series database and stream tables.

In [1]:
# import required libraries 
import yfinance as yf
import numpy as np
import pandas as pd
import v3io_frames as v3f
import os
from datetime import datetime

In [2]:
# initialize iguazio multi-model DB dataframe client library 
client = v3f.Client('framesd:8081')

## Delete KV, TSDB, and Stream tabels
in case we want to start things from scratch (delete current tabels), uncomment the following line(s) and run them 

In [3]:
#client.delete('kv','stocks')

In [4]:
#client.delete('tsdb','stock_metrics')

In [5]:
#client.delete('stream','stock_stream')

## Create TSDB and Stream tabels 

In [6]:
client.create(backend='tsdb', table='stock_metrics', rate='1/m')

In [7]:
client.create(backend='stream', table='stock_stream', retention_hours=48, shards=1)

In [8]:
# fill the key/value table with some data (KV tables are automatically created on write and have a dynamic schema)
kvtbl = '{"price":{"GOOG":1039.55,"AMZN":1641.03,"AAPL":169.6,"MSFT":107.59,"INTC":47.21},"volume":{"GOOG":1807725.1,"AMZN":7494808.1,"AAPL":62025994.1,"MSFT":40801525.1,"INTC":23289000.1},"symbol":{"GOOG":"GOOG","AMZN":"AMZN","AAPL":"AAPL","MSFT":"MSFT","INTC":"INTC"},"last_updated":{"GOOG":"2020-07-26 06:41:56","AMZN":"2020-07-26 06:41:56","AAPL":"2020-07-26 06:41:56","MSFT":"2020-07-26 06:41:56","INTC":"2020-07-26 06:41:56"}}'
client.write(backend='kv', table='stocks', dfs=pd.read_json(kvtbl))

## Fill the time-series table with a week worth of data from Yahoo Finance API

In [9]:
# # read the stocks kv table (to get the Symboles)
sdf = client.read(backend='kv', table='stocks') 
stock_list = sdf.index.tolist()

symbols = []
times = []
prices = []
volumes = []

for stock_sym in stock_list:
        stock_data = yf.Ticker(stock_sym).history(period='7d', interval='60m')
        for rec_time, rec_data in stock_data.iterrows():
            symbols.append(stock_sym)
            times.append(rec_time)
            prices.append(rec_data['Close'])
            volumes.append(rec_data['Volume']) 
        
df = pd.DataFrame({'price': prices, 'volume': volumes}, index=[times, symbols])
df.index.names=['time','symbol']
print(df.head())
client.write(backend='tsdb', table='stock_metrics', dfs=df)

                                    price     volume
time                      symbol                    
2020-07-27 09:30:00-04:00 AMZN    3051.72  1568959.0
2020-07-27 10:30:00-04:00 AMZN    3027.56   642495.0
2020-07-27 11:30:00-04:00 AMZN    3037.39   414108.0
2020-07-27 12:30:00-04:00 AMZN    3042.32   274094.0
2020-07-27 13:30:00-04:00 AMZN    3062.72   333548.0


## Fill dummy news article data in the stream

In [10]:
import json
record = {'content': 'bla bla bla', 
          'time': '2020-07-23 00:00:00',
          'symbol': 'AMZN',
          'link' : 'https://example-url.com/',
          'sentiment': 0.5,
         }
client.execute('stream', 'stock_stream', 'put', args={'data': json.dumps(record)})