In [None]:
# web scraper for tickers

import pandas as pd
import pandas_datareader.data as web
import bs4 as bs
import requests
import os
import numpy as np
import datetime as dt
import pickle
import matplotlib.pyplot as plt
from matplotlib import style


In [None]:
# Scrape wikipedia table for nasdaq stocks

def scrape_func():

  response = requests.get('https://en.wikipedia.org/wiki/FTSE_100_Index')         # variable
  soup = bs.BeautifulSoup(response.text)
  table = soup.find('table', {'class': 'wikitable sortable'}, id='constituents')  # variable
  tickers = []
  tickers_dot = []
  for row in table.findAll('tr')[1:]:                                             # variable ('.' if, is for silly formatting on wiki, where tickers have periods)
    ticker = row.findAll('td')[1].text
    if '.' in ticker:
      tickers_dot.append(ticker)
    else:                
      tickers.append(ticker + '.L')

  with open('Nasdaq-100tickers.pickle', 'wb') as f:                               # file name
    pickle.dump(tickers, f)

  return tickers

scrape_func()

In [None]:
# with that list, now get data from yahoo, download as csvs all in the same directory
# if you get throttled for too many requests, add time.sleep(x) to allow server to recover

def yh_get_data(reload_tickers=False):
  
  if reload_tickers:
    tickers = scrape_func()
    tickers = ['BTC', 'ETH', 'BNB', 'XRP', 'DOT', 'ADA', 'UNI']

  else:
    with open('Nasdaq-100tickers.pickle', 'rb') as f:                             # file name
      tickers = pickle.load(f)

  if not os.path.exists('stock_dfs'):
    os.makedirs('stock_dfs')

  start = dt.datetime(2020, 6, 1)
  end = dt.datetime(2020, 6, 30)

  for ticker in tickers:
    print(ticker)
    if not os.path.exists('/content/stock_dfs/{}.csv'.format(ticker)):            # file location (path)
      df = web.DataReader(ticker, 'yahoo', start=start, end=end)                  # source of financial info
      df.to_csv('/content/stock_dfs/{}.csv'.format(ticker))                       # file location (path)
    else:
      print('Already have {}'.format(ticker))

yh_get_data()

In [None]:
# If you already have a ticker lisk, just compile all csv into here

In [None]:
# combining all csv data for ['Adj Close'] into one dataframe

def compile_data():
  with open('Nasdaq-100tickers.pickle', 'rb') as f:                               # file name
    tickers = pickle.load(f)

  compiled_df = pd.DataFrame()

  # iterate through these tickers, dropping all columns and renaming adj close as ticker
  for count, ticker in enumerate(tickers):
    df = pd.read_csv('/content/stock_dfs/{}.csv'.format(ticker))
    df.set_index('Date', inplace = True)

    df.rename(columns = {'Adj Close' : ticker}, inplace = True)
    df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace = True)

    if compiled_df.empty:
      compiled_df = df
    else: 
      compiled_df = compiled_df.join(df, how='outer')

    # basically a loading bar
    if count % 10 == 0:
      print(count)

  print(compiled_df.head(10))
  compiled_df.to_csv('/content/stock_dfs/nasdaq_joined_closes.csv')

compile_data()

In [None]:
# Visualise correlation data as a heatmap

plt.style.use('Solarize_Light2')

def visualise_data():
  df = pd.read_csv('/content/stock_dfs/FTSE_joined_closes.csv')
  
  # Correlation matrix for any scraped data
  df_corr = df.corr()
  data = df_corr.values

  fig = plt.figure(figsize=(24, 12))
  ax = fig.add_subplot(1, 1, 1)

  # plot a correlation heatmap
  heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
  fig.colorbar(heatmap)
  ax.set_xticks(np.arange(data.shape[0]), minor=False)
  ax.set_yticks(np.arange(data.shape[1]), minor=False)
  ax.invert_yaxis()
  ax.xaxis.tick_top()

  # Note, data.shape[x], and column/ row labels only really matter with asymmetric data
  colummn_labels = df_corr.columns
  row_labels = df_corr.index

  ax.set_xticklabels(colummn_labels)
  ax.set_yticklabels(row_labels)
  plt.xticks(rotation=90)
  heatmap.set_clim(-1, 1)

  plt.tight_layout()
  plt.show()

visualise_data()