In [1]:
import csv
import pandas as pd
import seaborn as sb
import time
import datetime
import urllib.request, json
from datetime import date, datetime, timedelta

## data downloader

In [2]:
ticker_file = 'data/stock_ticker_usa.txt'

# start_date = '01/01/2020'
# end_date = '05/01/2020'

today = date.today()
end_date = today.strftime("%m/%d/%Y")

yearago = datetime.now() + timedelta(days=-365)
start_date = yearago.strftime("%m/%d/%Y")

print ('Start date', start_date)
print ('End date', end_date)

Start date 05/02/2019
End date 05/01/2020


In [3]:
def calc_time(date_string):
    uu = time.mktime(datetime.strptime(date_string, "%m/%d/%Y").timetuple())
    return str(int(uu-18000))

In [4]:
base_url = 'https://query2.finance.yahoo.com/v8/finance/chart/<ticker>?formatted=true&crumb=1HexGHzkPFp&lang=en-US&region=US&interval=1d&period1=<start_time>&period2=<end_time>&events=div%7Csplit&corsDomain=finance.yahoo.com'

start_time = calc_time(start_date)
end_time = calc_time(end_date)
base_url = base_url.replace('<start_time>', start_time).replace('<end_time>', end_time)
print ('base_url', base_url)

base_url https://query2.finance.yahoo.com/v8/finance/chart/<ticker>?formatted=true&crumb=1HexGHzkPFp&lang=en-US&region=US&interval=1d&period1=1556755200&period2=1588291200&events=div%7Csplit&corsDomain=finance.yahoo.com


In [5]:
## read ticker file
tickers = list()

with open(ticker_file, 'r') as lines:
    for line in lines:
        parts = line.split(',')
        tup = (parts[0].strip(), parts[1].strip().upper(), parts[2].strip())
        tickers.append(tup)
print ('Len of tickers: ', len(tickers))
print ('Sample ticker:', tickers[0])

Len of tickers:  59
Sample ticker: ('Visa', 'V', 'Finance')


In [6]:
## Download data
dataset = {}

for line in tickers:
    try:
        tick_url = base_url.replace('<ticker>', line[1])
        with urllib.request.urlopen(tick_url) as url:
            data = json.loads(url.read().decode())
            arr = data['chart']['result'][0]['indicators']['quote'][0]['close']
            dataset[line[0]] = (line[0], line[1], line[2], arr)
    except:
        print ('problem with', line)

print ('Size of dataset', len(dataset))
#print ('Sample Record', dataset[tickers[0][0]])

Size of dataset 58


In [7]:
## Base Analysis

# high
# low
# % diff from high
# % diff from low
# last 7 high
# last 7 low
# % diff from 7 days high
# % diff from 7 days low

df = pd.DataFrame(columns=['Name', 'Current',
                           '7 days high', '7 days low' , '% below 7d_high', '% above 52w_low',
                           '52 weeks high', '52 weeks low', '% below 52w_high', '% above 52w_low', '52w percentile'
                           ])

for k,v in dataset.items():
    company_name = v[0]
    ticker = v[1]
    industry = v[2]
    values = v[3]
    values = [round(v,2) for v in values]
    length = len(values)
    
    current = values[-1]
    
    # period    
    period_high = max(values)
    period_low = min(values)
    diff_high = (current-period_high)/period_high
    diff_low = (current-period_low)/period_low
    period_percentile = (current - period_low) / (period_high-period_low)
    
    # last 7 days
    vals_7 = values[-7:]
    high_7 = max(vals_7)
    low_7 = min(vals_7)
    diff_high_7 = (current-high_7)/high_7
    diff_low_7 = (current-low_7)/low_7
    
    df = df.append({'Name':company_name, 'Current':current,
                    '52 weeks high':period_high, '52 weeks low':period_low, 
                    '% below 52w_high':diff_high, '% above 52w_low':diff_low,
                    '7 days high':high_7, '7 days low':low_7 ,
                    '% below 7d_high':diff_high_7, '% above 52w_low':diff_low_7, '52w percentile':period_percentile},
                   ignore_index=True)

In [8]:
print ('''52w percentile -->
       \t 1.0 means 52 weeks high running
       \t 0.0 means 52 weeks low running
       \t Lower the better
       '''
      )

52w percentile -->
       	 1.0 means 52 weeks high running
       	 0.0 means 52 weeks low running
       	 Lower the better
       


In [9]:
cols = ['Name','52w percentile']
df[cols].head(len(tickers))
df.head(len(tickers))

Unnamed: 0,Name,Current,7 days high,7 days low,% below 7d_high,% above 52w_low,52 weeks high,52 weeks low,% below 52w_high,% above 52w_low.1,52w percentile
0,Visa,178.72,181.79,166.38,-0.016888,0.074168,213.31,135.74,-0.162158,0.074168,0.55408
1,Bank of America,24.05,24.78,21.8,-0.029459,0.103211,35.64,18.08,-0.325196,0.103211,0.339977
2,American Express,91.25,96.12,82.46,-0.050666,0.106597,136.93,68.96,-0.333601,0.106597,0.327939
3,Wells Fargo,29.05,30.0,26.53,-0.031667,0.094987,54.46,25.25,-0.466581,0.094987,0.130092
4,US BankCorp,36.5,38.46,33.25,-0.050962,0.097744,60.68,28.93,-0.398484,0.097744,0.238425
5,JP Morgan Chase,95.76,97.86,89.34,-0.021459,0.07186,141.09,79.03,-0.321284,0.07186,0.269578
6,Citigroup,48.56,50.26,42.24,-0.033824,0.149621,81.91,35.39,-0.407154,0.149621,0.283104
7,Paypal,123.0,123.58,115.19,-0.004693,0.067801,123.91,85.26,-0.007344,0.067801,0.976455
8,General Electric,6.8,6.8,6.26,0.0,0.086262,13.16,6.11,-0.483283,0.086262,0.097872
9,Boeing,141.02,141.02,128.68,0.0,0.095897,386.89,95.01,-0.635504,0.095897,0.157633
