# Stock Portfolio
---

**Purpose:**

Dealing with Volume (too much) & Veloctity (too fast)

**Data Source:**

https://pandas-datareader.readthedocs.io/en/latest/



In [None]:
# system libraries
import os
import sys

# check system information
print('Python Information', sys.version)
print('This is your current directory', os.getcwd())

In [None]:
# datetime libraries
import datetime

# assgin current date and time
currentDate = datetime.date.today()
currentTime = datetime.datetime.now()

# check datetime information
print('Today is {}'.format(currentDate))
print('Today is', datetime.datetime.strftime(currentDate, '%m/%d/%Y'))
print('The time is', datetime.datetime.strftime(currentTime, '%H:%M:%S'))

In [None]:
import datetime
import re
from collections import Counter
import glob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.precision', 2)
pd.set_option('max_rows', 250)
pd.set_option('max_columns', 250)

from pylab import rcParams
%matplotlib inline
rcParams['figure.figsize'] = 10, 6
plt.style.use('bmh')

In [None]:
!pip install pandas_datareader

In [None]:
import pandas_datareader as pdr

In [None]:
start_date = datetime.datetime(2015, 1, 1)
end_date = datetime.datetime.now()

In [None]:
portfolio = {
    'AMD':7.72,
    'NVDA':62.20,
    'MYL':43.99,
    'BAC':15.94,
    'NFLX':96.90,
}

portfolio

In [None]:
for key, val in portfolio.items():
    
    df = pdr.get_data_yahoo(key, start=start_date, end=end_date)
    
    plt.figure(figsize=(16,5))
    df.Close.plot(color='navy', ls='solid', lw=0.5, label='Price')
    
    plt.hlines(xmin=df.index.min(), xmax=df.index.max(), y=val, ls='dashed', color='gray', label='Purchase')
    
    plt.title("Symbol {}, Highest: ${:,.2f}, Lowest: ${:,.2f}, Purchased: ${:,.2f}"\
              .format(key, df.Close.max(), df.Close.min(), val))
    
    plt.fill_between(df.index, df.Close, color='b', alpha=0.1)
    
    plt.grid(alpha=0.5)
    
    plt.legend()
    plt.show();    

In [None]:
exch = ['amex', 'nyse', 'nasdaq']

URL = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange={}&render=download'

for e in exch:
    print(URL.format(e))

In [None]:
files = glob.glob("/Users/ppfor/Downloads/companylist*.csv")
files

In [None]:
nq = pd.concat([pd.read_csv(file, parse_dates=['IPOyear']).assign(Exchange=e) for file in files],
               ignore_index=True)
nq.info()

In [None]:
nq.columns = nq.columns.str.lower()
nq.columns

In [None]:
nq.head()

In [None]:
nq[nq.duplicated(subset=['symbol'])]

In [None]:
nq[nq['symbol'] == 'AMOV']

In [None]:
nq.drop_duplicates(subset=['symbol'], keep='first', inplace=True)

In [None]:
nq.dropna(axis=1, how='all', inplace=True)
nq.info()

In [None]:
nq.isna().sum()

In [None]:
sales = nq['lastsale'] >= 1
market = nq['marketcap'].notna()

nq = nq[sales & market]

nq.info()

In [None]:
shares = pd.DataFrame({
    'buy_date':pd.Timestamp('20160829'),
    'symbol':['AMD', 'NVDA', 'MYL', 'NFLX', 'BAC'],
    'buy_price':[7.72, 62.20, 43.99, 96.90, 15.94],
    'shares':[150, 30, 25, 10, 65],
})

shares

In [None]:
shares.loc[shares['symbol'] == 'BAC', ['buy_date']] = pd.Timestamp('20160909')
shares

In [None]:
shares = pd.merge(left=shares, right=nq, how='inner', left_on='symbol', right_on='symbol')
shares

In [None]:
shares['total_value'] = shares.eval("(`lastsale` - `buy_price`) * shares ")
shares['days_since'] = (datetime.datetime.today() - shares['buy_date']).dt.days
shares[['symbol','name','industry','shares','buy_date','buy_price','lastsale','total_value','days_since']]

In [None]:
print("Total value ${:,.2f}".format(shares['total_value'].sum()))

In [None]:
nq['sector'].value_counts(dropna=False)

In [None]:
names = " ".join(nq.name.values)
type(names)

In [None]:
words = re.findall(pattern='\w+', string=names)
Counter(words).most_common(20)

In [None]:
nq['sector'].fillna('Funds & Other', inplace=True)
nq['sector'].value_counts(dropna=False)

In [None]:
nq['sector_groups'] = nq['sector'].apply(lambda x: 'Consumer Groups' if 'Consumer' in x else x)
nq['sector_groups'].value_counts(dropna=False)

In [None]:
import seaborn as sns
sns.set_style('darkgrid')

In [None]:
plt.figure(figsize=(14,10))
plt.xticks(rotation=17)
sns.countplot(x='sector_groups',  color='navy', data=nq);

In [None]:
plt.figure(figsize=(14,10))
plt.xticks(rotation=17)
sns.boxenplot(x='sector_groups', y='lastsale', data=nq);

In [None]:
nq.nlargest(n=10, columns='lastsale')

In [None]:
nq.nsmallest(n=10, columns='lastsale')

In [None]:
nq[nq['lastsale'].between(200,210)]

In [None]:
tech = nq['sector'] == "Technology"
price = nq['lastsale'] > 250

nq[tech & price]

In [None]:
nq[nq['name'].str.contains('Blackrock')]

In [None]:
xf = nq.groupby('sector_groups')
list(xf.groups.keys())

In [None]:
xf.get_group('Technology')

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import (HoverTool, ColumnarDataSource, ColumnDataSource,
                          NumeralTickFormatter, DatetimeTickFormatter, BoxAnnotation)

from bokeh.layouts import gridplot, row

In [None]:
def bokeh_graph(Symbol):
    
    # get Meta Data for Symbol
    
    Security = pdr.nasdaq_trader.get_nasdaq_symbols().loc[[Symbol]]['Security Name'][0]
    
    # get Price/Volume Data for Symbol
    df = pdr.get_data_yahoo(Symbol).reset_index()
    
    df['Dates'] = df['Date'].astype(str)
    df['Volume'] = df['Volume'] / 1e6
    df["Average"] = df['Close'].mean()

    source = ColumnDataSource(df)
    
    # first plot

    p1 = figure(title=f"Close ($) for {Symbol}", x_axis_type='datetime', tools='', width=450, height=300)

    p1.line(x='Date', y='Close', source=source, line_width=.75, color='green')
    p1.line(x='Date', y='Average', source=source, line_width=2, line_dash='dashed', color='gray')
    
    p1.yaxis[0].formatter = NumeralTickFormatter(format="$,0")

    p1.add_tools(HoverTool(tooltips=[("Date", "@Dates"), ("Close", "$@Close{,.2f}"), ("Volume", "@Volume{,}M")]))
    
    # second plot
    
    p2 = figure(title='Volume (M) for ' + Symbol, x_axis_type='linear', tools='', width=450, height=300)
    
    p2.circle(x='Volume', y='Close', source=source, alpha=.4, color='purple')
    
    p2.xaxis[0].formatter = NumeralTickFormatter(format=",")
    p2.yaxis[0].formatter = NumeralTickFormatter(format="$,0")
    
    
    # arrange plots
    
    gp = gridplot([[p1, p2]])
    
    output_notebook()

    show(gp)
    
    print('\t'+Security)

In [None]:
bokeh_graph('AMD')

In [None]:
bokeh_graph('F')

In [None]:
today = datetime.datetime.today().strftime("%Y-%m-%d")

URL = ("https://www.nasdaq.com/api/v1/historical/INDU/index/2020-07-23/" + today)

print(URL)