In [89]:
# Provides ways to work with large multidimensional arrays
import numpy as np
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data
import yfinance as yf
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

# Save to Sqlite
import sqlite3

# Default Values

In [100]:
# Define path to files
path = "/Users/jwiegand/Dev/jrwiegand/data/"

# Start date defaults
S_YEAR = 2018
S_MONTH = 1
S_DAY = 1
S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# Start date defaults
E_YEAR = 2024
E_MONTH = 4
E_DAY = 1
E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)

# Create a connection to the database
sql_connection = sqlite3.connect(path + "files/stocks.db")

# Initial List of Stock Tickers

In [86]:
tickers = get_column_from_csv("/Users/jwiegand/Dev/jrwiegand/data/files/stocks.csv", "Ticker")
tickers

0          A
1         AA
2       AACG
3       AACI
4       AACT
        ... 
5615    ZVRA
5616    ZVSA
5617     ZWS
5618    ZYME
5619    ZYXI
Name: Ticker, Length: 5620, dtype: object

# Saves Stock Data to CSV

In [97]:
def save_from_yahoo_to_sql(connection, ticker, syear, smonth, sday, eyear, emonth, eday):
    try:
        start = dt.datetime(syear, smonth, sday)
        end = dt.datetime(eyear, emonth, eday)
        
        print("Get Data for :", ticker)
        
        yf.pdr_override()
        df = web.DataReader(ticker, start, end)["Adj Close"]
        df.to_sql(name="stocks", con=connection, if_exists="append")
    
    except Exception as ex:
        print("Couldn't Get Data for :", ticker)

# Download Stocks in Blocks

In [101]:
# for ticker in tickers:
end_date = dt.datetime.today()
save_from_yahoo_to_sql(sql_connection, "AAPL", 2018, 1, 1, end_date.year, end_date.month, end_date.day)
# print("Finished")

Get Data for : AAPL


[*********************100%%**********************]  1 of 1 completed


In [84]:
def get_stock_df_from_csv(folder, ticker):
    try:
        df = pd.read_csv(folder + ticker + ".csv")
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

In [93]:
# listdir returns all files in the directory and isfile will return true
# if it is a file and then we store its name in our list named files
files = [x for x in listdir(path) if isfile(join(path, x))]

# Remove extension from file names
# Splitext splits the file name into 2 parts being the name and extension
# We say get all file names and then store just the name in our list named files
tickers = [os.path.splitext(x)[0] for x in files]
tickers

['ANTE',
 'BRZE',
 'CSCO',
 'PRI',
 'TRAK',
 'GEF-B',
 'TYRA',
 'AIRC',
 'WTO',
 'HLNE',
 'HUBS',
 'NVCT',
 'SBGI',
 'UFCS',
 'AIRT',
 'SITC',
 'TLIS',
 'AISP',
 'RHE',
 'SBEV',
 'SBFM',
 'LILM',
 'ISSC',
 'KNDI',
 'ALCY',
 'TRNS',
 'NTZ',
 'LBPH',
 'FBLG',
 'AEIS',
 'UAL',
 'TROW',
 'ISRG',
 'AULT',
 'EGP',
 'GDRX',
 'EFC',
 'ISPO',
 'EEX',
 'NVR',
 'MVST',
 'CTKB',
 'PLYM',
 'TELA',
 'FEIM',
 'WOR',
 'TLRY',
 'PRGO',
 'LECO',
 'ALTG',
 'VABK',
 'VCYT',
 'USAC',
 'IMNM',
 'COHN',
 'PLOW',
 'UTZ',
 'ERF',
 'PHG',
 'ALVO',
 'UVE',
 'BUD',
 'ORRF',
 'NAUT',
 'GGR',
 'PDS',
 'FERG',
 'OESX',
 'BLDP',
 'CCSI',
 'SIBN',
 'NOG',
 'GTLB',
 'BXC',
 'JCSE',
 'XCUR',
 'BROS',
 'SWIM',
 'PDD',
 'ELDN',
 'GGE',
 'CHCO',
 'BBIO',
 'LBTYK',
 'IMAB',
 'SPOK',
 'KRNL',
 'SPLP',
 'TPR',
 'NIVF',
 'PBR-A',
 'DVN',
 'MGEE',
 'AMSC',
 'SVT',
 'AMPX',
 'SCYX',
 'AIH',
 'NU',
 'LOVE',
 'ASYS',
 'VIR',
 'LCAA',
 'INSE',
 'SVC',
 'AMST',
 'HAFC',
 'BTAI',
 'AGM-A',
 'NB',
 'CR',
 'ADTN',
 'JVA',
 'SAND',
 'DX

# Get Stock Names in a List

# Returns a Stock Dataframe from a CSV

# Returns a Named Columns Data from a CSV

In [85]:
def get_column_from_csv(file, col_name):
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

# Get 6 Years of Data for the 1st 20 Stocks

In [87]:
folder = "/Users/jwiegand/Dev/jrwiegand/data/files/stocks/"
for x in range(20):
    end_date = dt.datetime.today()
    save_to_csv_from_yahoo(folder, tickers[x], 2018, 1, 1, end_date.year, end_date.month, end_date.day)
print("Finished")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Get Data for : A
Get Data for : AA
Get Data for : AACG
Get Data for : AACI
Get Data for : AACT
Get Data for : AADI
Get Data for : AAGR
Get Data for : AAL



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Get Data for : AAMC
Get Data for : AAME
Get Data for : AAN
Get Data for : AAOI
Get Data for : AAON
Get Data for : AAP
Get Data for : AAPL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Get Data for : AAT
Get Data for : AB
Get Data for : ABAT
Get Data for : ABBV
Get Data for : ABCB
Finished
