# ETF Top Performer Analyzer - POC

In [55]:
# Import Modules
import pandas as pd
import os
import json
import requests
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
import yfinance as yf
from pathlib import Path
import sqlalchemy as sql
%matplotlib inline

## 1. IMPORT CSV FILES AND INSERT INTO SQL Tables

In [40]:
#CSV Data Path
etf_list_path = Path('../Resources/etf_list.csv')
etf_constituents_path = Path('../Resources/etf_holdings.csv')

In [34]:
#1.Load ETF List from CSV
etf_list_df = pd.read_csv(etf_list_path,index_col='etf', parse_dates=True, infer_datetime_format=True)
etf_list_df.head()

Unnamed: 0_level_0,category,type
etf,Unnamed: 1_level_1,Unnamed: 2_level_1
RYT,Sector,Technology
XSW,Sector,Technology
XLK,Sector,Technology
USRT,Sector,Real Estate
XLRE,Sector,Real Estate


In [43]:
#2.Load ETF Constituents from CSV
etf_constituents_df = pd.read_csv(etf_constituents_path, parse_dates=True, infer_datetime_format=True)
etf_constituents_df.head()

Unnamed: 0,etf,symbol,name,pct_holding
0,RYT,ENPH,Enphase Energy Inc,0.0192
1,RYT,ANET,Arista Networks Inc,0.0153
2,RYT,CRM,Salesforce.com Inc,0.0153
3,RYT,XLNX,Xilinx Inc,0.0152
4,RYT,PAYC,Paycom Software Inc,0.0151


In [47]:
# Create DB
# Database connection string
eft_data_connection_string = 'sqlite:///../Resources/etf.db'
# Database engine
etf_data_engine = sql.create_engine(eft_data_connection_string, echo=True)
# Create two tables from the dataframe
etf_list_df.to_sql('ETF_LIST', etf_data_engine, index_label='etf', if_exists='replace')
etf_constituents_df.to_sql('ETF_CONSTITUENTS', etf_data_engine, index=False, if_exists='replace')


2021-11-03 19:26:50,355 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("ETF_LIST")
2021-11-03 19:26:50,361 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-11-03 19:26:50,370 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("ETF_LIST")
2021-11-03 19:26:50,373 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-11-03 19:26:50,386 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-03 19:26:50,391 INFO sqlalchemy.engine.Engine 
CREATE TABLE "ETF_LIST" (
	etf TEXT, 
	category TEXT, 
	type TEXT
)


2021-11-03 19:26:50,393 INFO sqlalchemy.engine.Engine [no key 0.00259s] ()
2021-11-03 19:26:50,414 INFO sqlalchemy.engine.Engine CREATE INDEX "ix_ETF_LIST_etf" ON "ETF_LIST" (etf)
2021-11-03 19:26:50,417 INFO sqlalchemy.engine.Engine [no key 0.00276s] ()
2021-11-03 19:26:50,438 INFO sqlalchemy.engine.Engine COMMIT
2021-11-03 19:26:50,465 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-03 19:26:50,470 INFO sqlalchemy.engine.Engine INSERT INTO "ETF_LIST" (etf, category, type) VALUES

In [74]:
# Select all symbols and names from the Finance sector
sql_query = """
SELECT distinct symbol
FROM ETF_CONSTITUENTS
UNION
SELECT distinct etf
FROM ETF_LIST
"""

In [105]:
etf_symbols = pd.read_sql_query(sql_query, eft_data_connection_string, index_col='symbol')
symbol_list = etf_symbols.index.values.tolist()

['A', 'AAON', 'AAPL', 'AAT', 'ABBV', 'ABCB', 'ABG', 'ABM', 'ABT', 'ACC', 'ACM', 'ACN', 'ADBE', 'ADC', 'ADI', 'ADP', 'ADSK', 'AEIS', 'AFG', 'AFRM', 'AGO', 'AGPXX', 'AIRC', 'AIT', 'AJG', 'AJRD', 'AKAM', 'AKR', 'ALEX', 'ALRM', 'ALTR', 'AMAT', 'AMD', 'AMED', 'AMGN', 'AMH', 'AMN', 'AMSWA', 'AMT', 'AMZN', 'ANET', 'ANSS', 'ANTM', 'AON', 'APD', 'APH', 'APLE', 'APP', 'APPF', 'APPS', 'ARE', 'ARNC', 'ARW', 'ARWR', 'ASAN', 'ASGN', 'ATEN', 'ATR', 'AVAV', 'AVB', 'AVGO', 'AWK', 'AWR', 'AX', 'AXON', 'AXP', 'AYX', 'AZPN', 'BA', 'BAC', 'BBUS', 'BCPC', 'BDN', 'BGS', 'BILL', 'BJ', 'BKNG', 'BKU', 'BL', 'BLD', 'BLDR', 'BLK', 'BLKB', 'BLL', 'BMI', 'BMY', 'BNL', 'BOOT', 'BOX', 'BR', 'BRK.B', 'BRKS', 'BRX', 'BSIG', 'BXP', 'C', 'CABO', 'CAR', 'CAT', 'CBRE', 'CBU', 'CCI', 'CCOI', 'CCS', 'CDAY', 'CDK', 'CDNS', 'CDW', 'CELH', 'CGNX', 'CHDN', 'CHE', 'CHTR', 'CI', 'CIEN', 'CLF', 'CLI', 'CLSK', 'CMCSA', 'CMG', 'CNDT', 'CNMD', 'CNXC', 'COLD', 'CONE', 'COOP', 'COR', 'CORT', 'COST', 'CPRI', 'CPRT', 'CPT', 'CRM', 'CRNC',

# 2. Download historical Data and store into DB
Including parents(ETF), SPY and QQQ

In [78]:
def historical_data_fetcher(symbol_list):
    # Set Alpaca API key and secret
    alpaca_api_key = os.getenv("ALPACA_API_KEY")
    alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
    alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")
    

    #run symbols in bulk or loop based on limitation
    #Create STOCK_HISTORY table and store the historical data
    today = pd.Timestamp("2021-11-04", tz="America/New_York").isoformat()
    ticker  = ["FB", "TWTR"]
    print(etf_symbols['symbol'].values.tolist())
    
a = historical_data_fetcher(symbol_list)

KeyError: 'symbol'

# 3. Analyze Stock Performance
Analyze the historical data and store the data with index: symbol

moving average
Return
Sharpe Ratio
P/E ratio
Correlations...

In [60]:
def stock_analzer(etf_symbol):
    alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")
    return 

# 4. Cherry Picking to create our portfolio
pick best based on performance scoring matrix per investment criteria and benchmark EFTs

In [61]:
def Stock_picker(eft_symbol, criteria):
    #stock_picker.py
    #code here
    #return dataframe with pick
    return

# 5. Performance Forecast Visualization - Validation

Show our portfolio performance with Chart and Matrix
Run Montecarlo simulation
Compare performance Forecast

In [62]:
def performance_forecast():
    from MCForecastTools import MCSimulation
    # performance_forecast including benchmarks
    # code here
    # See 05-Inst_Simulation_of_Stock_Price_Trajectory/
    return
def report_visualization():
    # Import the hvPlot library
    import hvplot.pandas
    #code here
    # See 06-Data-Visualization-with-PyViz/
    # Report with charts
    return

# 6. REPORT
1. Summarize the analysis and results
2. Future improvement and limitation --no Dividend and CF analysis