# Testing sources for stock data
A number of Python pakcages or Python-compatible APIs exist to pull stock data. There goal here is to get data for free, or as free as possible.

Current options are:
 1. `yfinance` - free, maybe not current?
 1. `iexfinance` (https://iexcloud.io/) - very limited trial, paid plan is 9 USD/month
 1. Alphavantage (https://www.alphavantage.co/) - 29.99 USD/month
 1. World Trading data (https://www.worldtradingdata.com/) - 8 to 32 USD/month



In [33]:
# Imports.
import numpy as np
import pandas as pd
import altair as alt
import os

# Finance package imports.
import yfinance as yf

# ---
from IPython.display import display

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---
## Testing the `yfinance` package

In [43]:
# Ticket object.
nvda = yf.Ticker('NVDA')
NVDA = {}

In [44]:
NVDA['info'] = nvda.info
NVDA['actions'] = nvda.actions
NVDA['financials'] = nvda.financials
NVDA['cashflow'] = nvda.cashflow
NVDA['earnings'] = nvda.earnings
NVDA['sustainability'] = nvda.sustainability
NVDA['recommendations'] = nvda.recommendations
NVDA['history'] = nvda.history(period='10y').reset_index()

In [70]:
nvda.info['shortName']

'NVIDIA Corporation'

In [46]:
NVDA['history']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-03-19,16.31,16.36,15.67,15.88,14615300,0.0,0
1,2010-03-22,15.67,16.20,15.67,16.13,10883400,0.0,0
2,2010-03-23,16.25,16.55,16.14,16.47,12340300,0.0,0
3,2010-03-24,16.33,16.37,15.89,15.91,10906300,0.0,0
4,2010-03-25,16.14,16.24,15.92,15.96,12379600,0.0,0
...,...,...,...,...,...,...,...,...
2512,2020-03-12,225.46,236.12,215.80,216.31,20413500,0.0,0
2513,2020-03-13,230.50,241.02,219.61,240.84,15870900,0.0,0
2514,2020-03-16,212.41,226.10,194.00,196.40,18174300,0.0,0
2515,2020-03-17,200.91,220.86,191.00,217.27,20640000,0.0,0


In [48]:
source = NVDA['history']

base = alt.Chart(source).properties(width=550)

line = base.mark_line().encode(
    x='Date:T',
    y=alt.Y('Close', scale=alt.Scale(type='linear', base=10))
).interactive(bind_y=False)

line

In [49]:
open_close_color = alt.condition("datum.Open < datum.Close",
                                 alt.value("#06982d"),
                                 alt.value("#ae1325"))

rule = alt.Chart(source).mark_rule().encode(
    alt.X('Date:T'),
    alt.Y(
        'Low',
        title='Price',
        scale=alt.Scale(zero=False),
    ),
    alt.Y2('High'),
    color=alt.value('#000000')
).properties(width=1000, height=400).interactive(bind_y=False)

bar = alt.Chart(source).mark_bar().encode(
    x='Date:T',
    y='Open',
    y2='Close',
    color=open_close_color
)

rule + bar

In [54]:
f"{source['Date'].max().year}-{source['Date'].max().month}-{source['Date'].max().day}"

'2020-3-18'

In [None]:
NVDA['history'].Date

In [17]:
NVDA['history']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,0,1.61,1.80,1.43,1.51,67867200.0,0.0,0.0
1,1,1.63,1.69,1.51,1.67,12762000.0,0.0,0.0
2,2,1.69,1.72,1.51,1.54,8580000.0,0.0,0.0
3,3,1.54,1.58,1.46,1.53,6109200.0,0.0,0.0
4,4,1.53,1.54,1.52,1.53,5688000.0,0.0,0.0
...,...,...,...,...,...,...,...,...
5318,5318,255.46,256.60,242.57,246.47,12499100.0,0.0,0.0
5319,5319,225.46,236.12,215.80,216.31,20413500.0,0.0,0.0
5320,5320,230.50,241.02,219.61,240.84,15870900.0,0.0,0.0
5321,5321,212.41,226.10,194.00,196.40,18112100.0,0.0,0.0


In [7]:
for k in NVDA.keys():
    print(f"\n\n{k}\n-------------------------------")
    display(NVDA[k])



info
-------------------------------


{'zip': '95051',
 'sector': 'Technology',
 'fullTimeEmployees': 13775,
 'longBusinessSummary': "NVIDIA Corporation operates as a visual computing company worldwide. It operates in two segments, GPU and Tegra Processor. The GPU segment offers processors, which include GeForce for PC gaming and mainstream PCs; GeForce NOW for cloud-based gaming; Quadro for design professionals working in computer-aided design, video editing, special effects, and other creative applications; Tesla for artificial intelligence (AI) utilizing deep learning, accelerated computing, and general purpose computing; GRID, which provides power of NVIDIA graphics through the cloud and datacenters; DGX for AI scientists, researchers, and developers; and EGX for accelerated AI computing at the edge. The Tegra Processor segment provides processors comprising SHIELD devices and services designed to harness the power of mobile-cloud to revolutionize home entertainment, AI, and gaming; AGX, a power-efficient AI computing 



actions
-------------------------------


Unnamed: 0_level_0,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-06-27,0.0,2.0
2001-09-12,0.0,2.0
2001-09-17,0.0,2.0
2006-04-07,0.0,2.0
2007-09-11,0.0,1.5
2012-11-20,0.075,0.0
2013-02-26,0.075,0.0
2013-05-21,0.075,0.0
2013-08-20,0.075,0.0
2013-11-19,0.085,0.0




financials
-------------------------------


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1




cashflow
-------------------------------


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1




earnings
-------------------------------


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1




sustainability
-------------------------------


Unnamed: 0_level_0,Value
2020-1,Unnamed: 1_level_1
palmOil,False
controversialWeapons,False
gambling,False
socialScore,4.54
nuclear,False
furLeather,False
alcoholic,False
gmo,False
catholic,False
socialPercentile,0




recommendations
-------------------------------


Unnamed: 0_level_0,Firm,To Grade,From Grade,Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-04-09,Morgan Stanley,Overweight,Equal-Weight,up
2018-05-17,Cowen & Co.,Outperform,,init
2018-09-25,KeyBanc,Sector Weight,,init
2018-10-30,JP Morgan,Overweight,Neutral,up
2018-11-08,Morgan Stanley,Overweight,Overweight,main
...,...,...,...,...
2019-11-25,Morgan Stanley,Overweight,Equal-Weight,up
2019-12-17,PiperJaffray,Overweight,,main
2019-12-20,Wells Fargo,Overweight,,main
2019-12-31,Benchmark,Buy,,main




history
-------------------------------


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1999-01-22,1.61,1.80,1.43,1.51,67867200.0,0.0,0.0
1,1999-01-25,1.63,1.69,1.51,1.67,12762000.0,0.0,0.0
2,1999-01-26,1.69,1.72,1.51,1.54,8580000.0,0.0,0.0
3,1999-01-27,1.54,1.58,1.46,1.53,6109200.0,0.0,0.0
4,1999-01-28,1.53,1.54,1.52,1.53,5688000.0,0.0,0.0
...,...,...,...,...,...,...,...,...
5319,2020-03-12,225.46,236.12,215.80,216.31,20413500.0,0.0,0.0
5320,2020-03-13,230.50,241.02,219.61,240.84,15870900.0,0.0,0.0
5321,2020-03-16,212.41,226.10,194.00,196.40,18174300.0,0.0,0.0
5322,2020-03-17,200.91,220.86,191.00,217.27,20640000.0,0.0,0.0


<div class="alert alert-block alert-success">
<b>Conclusion:</b> Looks like `yfinance` will be fine for daily data. We should include the `info.shortName`, `info.longName`, `actions`, `sustainability`, `recommendations`, and `history`. However, only `history` should be saved and tracked. The rest are small enough to be re-downloaded regularly. Maybe...
</div>

In [17]:
#%load_ext autoreload
%autoreload 2
from stocks.classes.stock_data import StockData

In [18]:
sd = StockData("/home/lee/Data/Stocks")

Folder 'data_folder' the following stocks:
	|
	|-- MSFT
	|-- NVDA
	|-- AAPL


In [19]:
sd.add(['nvda', 'aapl', 'msft'])



In [20]:
sd.load()

In [21]:
d = sd.get_object_data()

In [25]:
d['msft'].loc[d['msft']['Dividends']>0]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
4274,2003-02-19,15.98,16.01,15.56,15.79,46902700,0.08,0.0
4440,2003-10-15,18.90,18.94,18.58,18.82,57242100,0.16,0.0
4654,2004-08-23,17.70,17.74,17.61,17.68,39572200,0.08,0.0
4713,2004-11-15,19.78,19.90,19.68,19.82,104468000,3.08,0.0
4776,2005-02-15,18.87,18.93,18.77,18.82,76551600,0.08,0.0
...,...,...,...,...,...,...,...,...
8302,2019-02-20,106.45,106.53,104.90,105.75,21607700,0.46,0.0
8361,2019-05-15,123.09,125.52,122.54,124.83,24722700,0.46,0.0
8424,2019-08-14,135.53,136.08,132.85,133.16,32527300,0.46,0.0
8493,2019-11-20,149.90,150.43,148.06,149.21,25696800,0.51,0.0


In [84]:
appl.history(start='1980-01-01')

- APPL: No data found for this date range, symbol may be delisted


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [27]:
a = '10d'
a.upper()


'10D'

In [2]:
bool(d)

False

In [3]:
if a:
    print(True)

NameError: name 'a' is not defined

In [1]:
print(str)

<class 'str'>
