In [1]:
# Import dependencies
import pandas as pd
from sqlalchemy import create_engine
import datetime

### Stocks

In [2]:
stocks_url = 'https://finance.yahoo.com/most-active?offset=0&count=50'
tables = pd.read_html(stocks_url)
stock_df = tables[0]
stock_df.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Volume,Avg Vol (3 month),Market Cap,PE Ratio (TTM),52 Week Range
0,F,Ford Motor Company,8.25,0.01,+0.12%,65.393M,47.476M,32.711B,825.0,
1,CSCO,"Cisco Systems, Inc.",47.32,-2.61,-5.23%,47.045M,20.114M,205.098B,18.83,
2,S,Sprint Corporation,8.67,0.07,+0.81%,45.387M,18.694M,35.981B,,
3,NIO,NIO Limited,4.03,-0.04,-0.98%,36.859M,71.047M,4.023B,,
4,KHC,The Kraft Heinz Company,27.77,-2.27,-7.56%,37.334M,6.338M,33.945B,,


In [5]:
# Remove trailing letter and then convert datatype on the series of 'Market Cap' and 'Volume'
market_cap_cleaned = stock_df['Market Cap'].str.rstrip('B,T')
    
volume_cleaned = stock_df['Volume'].str.rstrip('M')
percent_change = stock_df['% Change'].str.rstrip('%')

market_cap_converted = market_cap_cleaned.astype(float)
volume_converted = volume_cleaned.astype(float)
percent_change_converted = percent_change.astype(float)

stock_df.drop(labels = 'Market Cap', axis=1, inplace=True)
stock_df.drop(labels = 'Volume', axis=1, inplace=True)
stock_df.drop(labels = '% Change', axis=1, inplace=True)

stock_df['Market Cap ($Bil.)'] = market_cap_converted
stock_df['Volume (Mil.)'] = volume_converted
stock_df['Percent_Change'] = percent_change_converted

In [6]:
stock_df.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,Avg Vol (3 month),PE Ratio (TTM),52 Week Range,Market Cap ($Bil.),Volume (Mil.),Percent_Change
0,F,Ford Motor Company,8.25,0.01,47.476M,825.0,,32.711,65.393,0.12
1,CSCO,"Cisco Systems, Inc.",47.32,-2.61,20.114M,18.83,,205.098,47.045,-5.23
2,S,Sprint Corporation,8.67,0.07,18.694M,,,35.981,45.387,0.81
3,NIO,NIO Limited,4.03,-0.04,71.047M,,,4.023,36.859,-0.98
4,KHC,The Kraft Heinz Company,27.77,-2.27,6.338M,,,33.945,37.334,-7.56


In [7]:
stock_df['Time'] = datetime.datetime.now()
stock_df_filtered = stock_df[stock_df['Market Cap ($Bil.)'] > 100]

stock_df_filtered.reset_index(drop=True, inplace=True)

In [9]:
stock_df_filtered.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,Avg Vol (3 month),PE Ratio (TTM),52 Week Range,Market Cap ($Bil.),Volume (Mil.),Percent_Change,Time
0,CSCO,"Cisco Systems, Inc.",47.32,-2.61,20.114M,18.83,,205.098,47.045,-5.23,2020-02-13 19:56:26.492840
1,BAC,Bank of America Corporation,34.91,-0.01,43.743M,12.69,,308.47,28.23,-0.03,2020-02-13 19:56:26.492840
2,BABA,Alibaba Group Holding Limited,220.36,-3.95,15.364M,63.01,,602.806,27.47,-1.76,2020-02-13 19:56:26.492840
3,TSLA,"Tesla, Inc.",804.0,36.71,15.582M,,,144.917,26.289,4.78,2020-02-13 19:56:26.492840
4,PFE,Pfizer Inc.,36.93,-0.81,21.362M,12.86,,204.375,20.374,-2.15,2020-02-13 19:56:26.492840


In [10]:
pd.savefig('stock_df.png')

AttributeError: module 'pandas' has no attribute 'savefig'

In [11]:
print(datetime.datetime.now())

2020-02-13 20:00:25.444308


In [12]:
rds_connection_string = "root:Greenman!98@localhost"
engine = create_engine(f'mysql://{rds_connection_string}')

In [13]:
engine.execute('CREATE DATABASE IF NOT EXISTS wolves_db')

<sqlalchemy.engine.result.ResultProxy at 0x24218d63cf8>

In [14]:
engine.execute('USE wolves_db')

<sqlalchemy.engine.result.ResultProxy at 0x24218b11c88>

In [15]:
stock_df_filtered.to_sql(name='market_data', con=engine, if_exists='append', index=False)

### ETF's

In [16]:
etf_url = 'https://finance.yahoo.com/etfs'
tables = pd.read_html(etf_url)
etf_df = tables[0]
etf_df.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Volume,50 Day Average,200 Day Average,52 Week Range
0,XLU,Utilities Select Sector SPDR Fund,69.92,0.77,+1.11%,13.394M,66.68,63.83,
1,XMMO,Invesco S&P MidCap Momentum ETF,64.9,0.67,+1.04%,100180,62.68,59.77,
2,IDU,iShares U.S. Utilities ETF,174.44,1.77,+1.03%,31873,166.79,160.26,
3,FUTY,Fidelity MSCI Utilities Index ETF,45.55,0.52,+1.15%,178459,43.5,41.81,
4,PUI,Invesco DWA Utilities Momentum ETF,37.14,0.36,+0.97%,20583,35.4,34.28,


In [17]:
percent_change = etf_df['% Change'].str.rstrip('%')
percent_change_converted = percent_change.astype(float)
etf_df.drop(labels = '% Change', axis=1, inplace=True)
etf_df['Percent_Change'] = percent_change_converted

etf_df.drop(labels = '52 Week Range', axis=1, inplace=True)
etf_df['Time'] = datetime.datetime.now()

In [18]:
etf_df.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,Volume,50 Day Average,200 Day Average,Percent_Change,Time
0,XLU,Utilities Select Sector SPDR Fund,69.92,0.77,13.394M,66.68,63.83,1.11,2020-02-13 20:01:16.474530
1,XMMO,Invesco S&P MidCap Momentum ETF,64.9,0.67,100180,62.68,59.77,1.04,2020-02-13 20:01:16.474530
2,IDU,iShares U.S. Utilities ETF,174.44,1.77,31873,166.79,160.26,1.03,2020-02-13 20:01:16.474530
3,FUTY,Fidelity MSCI Utilities Index ETF,45.55,0.52,178459,43.5,41.81,1.15,2020-02-13 20:01:16.474530
4,PUI,Invesco DWA Utilities Momentum ETF,37.14,0.36,20583,35.4,34.28,0.97,2020-02-13 20:01:16.474530


In [19]:
engine.execute('USE wolves_db')

<sqlalchemy.engine.result.ResultProxy at 0x24218fddb70>

In [20]:
stock_df_filtered.to_sql(name='etf_data', con=engine, if_exists='append', index=False)