In [4]:
import yfinance as yf, pandas as pd, shutil, os, time, glob
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from get_all_tickers import get_tickers as gt
from ta import add_all_ta_features
from ta.utils import dropna

In [5]:
tickers = gt.get_tickers_filtered(mktcap_min=150000, mktcap_max=10000000)

In [6]:
len(tickers)

51

In [8]:
tickers[:3]

['ABT', 'ABBV', 'BABA']

In [13]:
# Holds the amount of API calls we executed
Amount_of_API_Calls = 0

# Used to make sure we don't waste too many API calls on one Stock ticker that could be having issues
Stock_Failure = 0
Stocks_Not_Imported = 0

# Used to iterate through our list of tickers
i=0
while (i < len(tickers)) and (Amount_of_API_Calls < 1800):
    try:
        stock = tickers[i]  # Gets the current stock ticker
        temp = yf.Ticker(str(stock))
        Hist_data = temp.history(period="max")  # Tells yfinance what kind of data we want about this stock (In this example, all of the historical data)
        Hist_data.to_csv("stock_data/"+stock+".csv")  # Saves the historical data in csv format for further processing later
        time.sleep(2)  # Pauses the loop for two seconds so we don't cause issues with Yahoo Finance's backend operations
        Amount_of_API_Calls += 1 
        Stock_Failure = 0
        i += 1  # Iteration to the next ticker
        print("Importing stock data:" + str(i))
    except ValueError:
        print("Yahoo Finance Backend Error, Attempting to Fix")  # An error occured on Yahoo Finance's backend. We will attempt to retreive the data again
        if Stock_Failure > 5:  # Move on to the next ticker if the current ticker fails more than 5 times
            i+=1
            Stocks_Not_Imported += 1
        Amount_of_API_Calls += 1
        Stock_Failure += 1
print("The amount of stocks we successfully imported: " + str(i - Stocks_Not_Imported))

Importing stock data:1
Importing stock data:2
Importing stock data:3
Importing stock data:4
Importing stock data:5
Importing stock data:6
Importing stock data:7
Importing stock data:8
Importing stock data:9
Importing stock data:10
Importing stock data:11
Importing stock data:12
Importing stock data:13
Importing stock data:14
Importing stock data:15
Importing stock data:16
Importing stock data:17
Importing stock data:18
Importing stock data:19
Importing stock data:20
Importing stock data:21
Importing stock data:22
Importing stock data:23
Importing stock data:24
Importing stock data:25
Importing stock data:26
Importing stock data:27
Importing stock data:28
Importing stock data:29
Importing stock data:30
Importing stock data:31
Importing stock data:32
Importing stock data:33
Importing stock data:34
Importing stock data:35
Importing stock data:36
Importing stock data:37
Importing stock data:38
Importing stock data:39
Importing stock data:40
Importing stock data:41
Importing stock data:42
I

In [16]:
# Creates a list of all stock files in stock_data folder
file_list = glob.glob("stock_data/*.csv")

In [17]:
file_list[1]

'stock_data/TM.csv'

In [34]:
df = pd.read_csv(file_list[1])

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11101 entries, 0 to 11100
Data columns (total 8 columns):
Date            11101 non-null object
Open            11096 non-null float64
High            11096 non-null float64
Low             11096 non-null float64
Close           11096 non-null float64
Volume          11096 non-null float64
Dividends       11101 non-null float64
Stock Splits    11101 non-null float64
dtypes: float64(7), object(1)
memory usage: 693.9+ KB


In [36]:
df[df.Open.isnull()]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
1353,1981-12-25,,,,,,0.0,1.1
1480,1982-06-26,,,,,,0.0,1.1
3125,1988-12-24,,,,,,0.0,1.05
8358,2009-09-27,,,,,,0.576089,0.0
8484,2010-03-28,,,,,,0.700521,0.0


In [37]:
df.dropna(inplace=True)

In [38]:
df = add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume")
df.head(80)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,volume_adi,volume_obv,...,momentum_uo,momentum_stoch,momentum_stoch_signal,momentum_wr,momentum_ao,momentum_kama,momentum_roc,others_dr,others_dlr,others_cr
0,1976-08-18,1.46,1.46,1.46,1.46,19500.0,0.0,0.0,0.0,19500.0,...,,,,,,,,-96.621482,,0.000000
1,1976-08-19,1.46,1.46,1.46,1.46,500.0,0.0,0.0,0.0,20000.0,...,,,,,,,,0.000000,0.000000,0.000000
2,1976-08-20,1.45,1.45,1.45,1.45,8000.0,0.0,0.0,0.0,12000.0,...,,,,,,,,-0.684932,-0.687288,-0.684932
3,1976-08-23,1.41,1.41,1.41,1.41,1000.0,0.0,0.0,0.0,11000.0,...,,,,,,,,-2.758621,-2.797385,-3.424658
4,1976-08-24,1.39,1.39,1.39,1.39,7500.0,0.0,0.0,0.0,3500.0,...,,,,,,,,-1.418440,-1.428596,-4.794521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,1976-12-06,1.54,1.54,1.54,1.54,22500.0,0.0,0.0,0.0,42500.0,...,80.160189,100.000000,73.333333,-0.000000,0.033765,1.409340,15.789474,9.219858,8.819271,5.479452
76,1976-12-07,1.59,1.59,1.59,1.59,33500.0,0.0,0.0,0.0,76000.0,...,86.394558,100.000000,100.000000,-0.000000,0.078118,1.462427,20.454545,3.246753,3.195160,8.904110
77,1976-12-08,1.62,1.62,1.62,1.62,2500.0,0.0,0.0,0.0,78500.0,...,88.462656,100.000000,100.000000,-0.000000,0.124706,1.510748,21.804511,1.886792,1.869213,10.958904
78,1976-12-09,1.61,1.61,1.61,1.61,500.0,0.0,0.0,0.0,78000.0,...,89.889164,96.551724,98.850575,-3.448276,0.169294,1.537657,18.382353,-0.617284,-0.619197,10.273973


In [42]:
np.average(df.iloc[0:30].Open)

1.3496666666666668

In [46]:
# Better indexing function for calculating average windows
for i in range (30,60,1):
    print(np.average(df.iloc[i-30:i].Open))

nan
1.3496666666666668
1.344
1.338
1.3323333333333334
1.3283333333333331
1.325333333333333
1.3233333333333335
1.3216666666666665
1.3179999999999998
1.3143333333333331
1.3109999999999997
1.3076666666666665
1.3043333333333331
1.3033333333333332
1.3023333333333331
1.3009999999999997
1.3006666666666666
1.2999999999999998
1.301333333333333
1.303
1.3056666666666668
1.3099999999999998
1.3139999999999998
1.318
1.3203333333333334
1.3226666666666664
1.3236666666666665
1.3246666666666664
1.326
1.3273333333333333
