In [63]:
import datetime as dt
from os.path import exists
from pathlib import Path
import numpy as np
import pandas as pd
import yfinance as yf
from finviz import get_news
from yahooquery import Ticker
from newspaper import Article, Config
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dateutil.relativedelta import relativedelta

import src.tools.functions as f0
import src.tools.lists as l0

pd.set_option('display.max_columns', None)
nltk.download("vader_lexicon")
nltk.download('punkt')
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 10

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gdp/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/gdp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [64]:
day1 = '2023-02-01'
month1 = str(day1)[:7]
year1 = str(day1)[:4]

bulk_data_file_1 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/bulk_tickers.csv")
bulk_data_file_2 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/core_selection_tickers.xlsx")
bulk_data_file_3 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/dodds_etf_df.csv")
bulk_data_file_4 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/etf_tickers.csv")
bulk_data_file_5 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/Results.csv")

save_raw = Path(f"/home/gdp/hot_box/etfs/data/raw/{year1}/{month1}/{day1}/")
if not save_raw.exists():
    save_raw.mkdir(parents=True)

save_hist = Path(f"/home/gdp/hot_box/etfs/data/hist/{year1}/{month1}/{day1}/")
if not save_hist.exists():
    save_hist.mkdir(parents=True)    
    
sentiment = Path(f"/home/gdp/hot_box/etfs/data/sentiment/sentiment/{year1}/{month1}/{day1}/")
if not sentiment.exists():
    sentiment.mkdir(parents=True)   
    
single_news = Path(f"/home/gdp/hot_box/etfs/data/sentiment/single_news/{year1}/{month1}/{day1}/")
if not single_news.exists():
    single_news.mkdir(parents=True)
    
saveRec = Path(f"/home/gdp/hot_box/etfs/data/recommenders/{year1}/{month1}/{day1}/")
if not saveRec.exists():
    saveRec.mkdir(parents=True)    

In [65]:
bulk_tickers_df = pd.DataFrame(pd.read_csv(bulk_data_file_1))
core_selection_tickers_df = pd.DataFrame(pd.read_excel(bulk_data_file_2))
dodds_etf_df_df = pd.DataFrame(pd.read_csv(bulk_data_file_3))
etf_tickers_df = pd.DataFrame(pd.read_csv(bulk_data_file_4))
chuck_tickers_df = pd.DataFrame(pd.read_csv(bulk_data_file_5))

etf_ticker_list = (
    # list(bulk_tickers_df['symbol']) + \
        # list(core_selection_tickers_df['AAA']) + \
            list(chuck_tickers_df['Symbol']) + \
                list(dodds_etf_df_df['ticker'])
                    # list(etf_tickers_df['symbol'])
    )

etf_ticker_list = list(set(etf_ticker_list))

len(etf_ticker_list)

159

In [66]:
def get_history():
    hist = pd.DataFrame(
        yf.download(
            tickers=etf_ticker_list, 
            period='2y', 
            rounding=True, 
            group_by='tickers', 
            auto_adjust=True,
            actions=False,
            show_errors=True
        )
    )
    hist.to_pickle(save_hist / f"all_etf_history_data.pkl")

    for ticker in etf_ticker_list:
        try:    
            data = pd.DataFrame(hist[ticker])
            data.columns = [x.lower() for x in data.columns]
            data.index = pd.to_datetime(data.index)
            data.to_pickle(save_raw / f"{ticker}.pkl")    
        except Exception as e:
            etf_ticker_list.remove(ticker) 

    return etf_ticker_list

In [67]:
etf_ticker_list = get_history()

[*********************100%***********************]  159 of 159 completed


In [68]:
def technicals_minervini(rec_02_tickers):            
    exportList = pd.DataFrame(
        columns=[
            "ticker", 
            "rs_rating", 
            "returns_multiple", 
            "current_price", 
            "sma_50", 
            "sma_150", 
            "sma_200", 
            "sma_200_20", 
            "low_52_week", 
            "high_52_week"
        ]
    )    

    
    # Index Returns
    index_name = '^GSPC'
    if exists(save_raw / "sp500_index.pkl"):
        index_df = pd.DataFrame(pd.read_pickle(save_raw / "sp500_index.pkl"))
        index_df["pct_change"] = index_df["Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]

    elif not exists(save_raw / "sp500_index.pkl"):
        index_df = pd.DataFrame(yf.download(index_name, period='2y'))
        index_df.to_pickle(save_raw / "sp500_index.pkl")
        index_df["pct_change"] = index_df["Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]
        
        
    # Find top 50% performing stocks (relative to the S&P 500)
    returns_multiples = []
    for ticker in rec_02_tickers:

        # Calculating returns relative to the market (returns multiple)      
        try:
            df = pd.DataFrame(pd.read_pickle(save_raw / f"{ticker}.pkl"))
            df["pct_change"] = df["close"].pct_change()
            stock_return = (df["pct_change"] + 1).cumprod()[-1]
            returns_multiple = round((stock_return / index_return), 2)
            returns_multiples.extend([returns_multiple])

        except Exception:
            print(f"Bad Ticker: {ticker}")
         
         
    # Creating dataframe of only top 70%
    rs_df = pd.DataFrame(list(zip(rec_02_tickers, returns_multiples)),columns=["ticker", "returns_multiple"],)
    rs_df["rs_rating"] = rs_df["returns_multiple"].rank(pct=True) * 100
    rs_df = rs_df[rs_df["rs_rating"] >= rs_df["rs_rating"].quantile(0.3)]    
    
    
    # Checking Minervini conditions of top 60% of stocks in given list
    rs_stocks = list(rs_df["ticker"])
    
    for stock in rs_stocks:
        try:     
            df = pd.DataFrame(pd.read_pickle(save_raw / f"{stock}.pkl")).dropna()
            sma = [50, 150, 200]

            for x in sma:
                df["SMA_" + str(x)] = round(df["close"].rolling(window=x).mean(), 2)

            # Storing required values
            currentClose = df["close"].iloc[-1]
            MA_50 = df["SMA_50"].iloc[-1]
            MA_150 = df["SMA_150"].iloc[-1]
            MA_200 = df["SMA_200"].iloc[-1]
            low_52_week = round(min(df["low"][-260:]), 2)
            high_52_week = round(max(df["high"][-260:]), 2)    
            RS_Rating = round(rs_df[rs_df["ticker"] == stock].rs_rating.tolist()[0], 2)
            Returns_multiple = round(rs_df[rs_df["ticker"] == stock].returns_multiple.tolist()[0], 2)
            
            try:
                MA_200_20 = df["SMA_200"][-20]
            except Exception:
                MA_200_20 = 0
                        
            condition_1 = currentClose >= MA_150 >= MA_200                                                            # Condition 1: Current Price > 150 SMA and > 200 SMA
            condition_2 = MA_150 >= MA_200                                                                            # Condition 2: 150 SMA and > 200 SMA
            condition_3 = MA_200 >= MA_200_20                                                                         # Condition 3: 200 SMA trending up for at least 1 month
            condition_4 = MA_50 >= MA_150 >= MA_200                                                                   # Condition 4: 50 SMA > 150 SMA and 50 SMA> 200 SMA
            condition_5 = currentClose >= MA_50                                                                       # Condition 5: Current Price > 50 SMA
            condition_6 = currentClose >= (1.3 * low_52_week)                                                         # Condition 6: Current Price is at least 30% above 52 week low
            condition_7 = currentClose >= (0.7 * high_52_week)                                                        # Condition 7: Current Price is within 30% of 52 week high

            # if (
            #     condition_1 & 
            #     # condition_2 & 
            #     condition_3 & 
            #     condition_4 &
            #     # condition_5 & 
            #     condition_6 & 
            #     condition_7
            #     ):                                                                                                      # If all conditions above are true, add Ticker to exportList 
            exportList = exportList.append(
                {
                    "ticker": stock,
                    "rs_rating": RS_Rating,
                    "returns_multiple": Returns_multiple,
                    "current_price": currentClose,
                    "sma_50": MA_50,
                    "sma_150": MA_150,
                    "sma_200": MA_200,
                    "sma_200_20": MA_200_20,
                    "low_52_week": low_52_week,
                    "high_52_week": high_52_week
                }, 
                ignore_index=True
            ).sort_values(by="rs_rating", ascending=False)
                    
        except Exception:
            print(f"Bad Ticker: {stock}")
            
    
    print("\n[2] MINERVINI ")
    print(f"   > PART-A:")
    exportList_A = exportList.drop_duplicates(subset="ticker")    
    part_a_len = len(exportList_A['ticker'])        
    print(f"     * Successful Stocks: [{part_a_len}]")
    
    
    print(f"   > PART-B:")
    exportList_B = exportList_A[exportList_A['rs_rating'] >= 30]
    exportList_B.to_pickle(saveRec / "recommender_02_return_dataFrame.pkl")
    part_b_len = len(exportList_B['ticker'])    
    print(f"     * Successful Stocks (rs_rating >= 69.0): [{part_b_len}]")    
    
    display(exportList_B.round(2))
    return exportList_B.round(2)    

In [69]:
data_2 = technicals_minervini(etf_ticker_list)


[2] MINERVINI 
   > PART-A:
     * Successful Stocks: [111]
   > PART-B:
     * Successful Stocks (rs_rating >= 69.0): [111]


Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week
0,NRGU,100.00,5.88,442.90,508.01,465.73,478.63,470.57,198.58,797.00
1,DIG,99.37,3.85,40.59,43.05,38.93,38.75,38.03,21.66,50.45
2,ERX,98.74,3.71,63.61,67.10,60.17,60.38,59.26,34.20,80.27
3,GUSH,98.11,2.72,141.91,153.02,156.32,162.11,164.31,84.50,257.76
4,PXE,97.48,2.34,28.97,30.30,29.99,30.05,29.87,19.97,36.69
...,...,...,...,...,...,...,...,...,...,...
105,PIO,32.70,0.94,36.61,34.02,32.42,32.59,32.71,27.82,39.14
106,EIPX,32.70,0.94,20.43,20.22,,,,19.46,20.84
107,JHMI,32.70,0.94,45.29,45.37,46.65,47.70,48.66,41.94,56.51
108,JXI,31.13,0.93,60.69,60.64,60.03,60.74,61.22,51.01,67.69


In [70]:
from os.path import exists
from pathlib import Path
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newspaper import Article, Config
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

pd.set_option('display.max_columns', None)
nltk.download("vader_lexicon")
nltk.download("punkt")

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 10



day1 = day1
month1 = str(day1)[:7]
year1 = str(day1)[:4]
saveRec = Path(f"/home/gdp/hot_box/etfs/data/recommenders/{year1}/{month1}/{day1}/")
sentiment = Path(f"/home/gdp/hot_box/etfs/data/sentiment/sentiment/{year1}/{month1}/{day1}/")
single_news = Path(f"/home/gdp/hot_box/etfs/data/sentiment/single_news/{year1}/{month1}/{day1}/")   
if not sentiment.exists():
    sentiment.mkdir(parents=True)          
if not single_news.exists():
    single_news.mkdir(parents=True)       

data_2 = pd.read_pickle(saveRec / "recommender_02_return_dataFrame.pkl")
stocks_list = list(data_2['ticker'])    

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gdp/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/gdp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [71]:
def sentiment_1(stocks, parsed_news=[], new_stock_list=[], pull_list=[], bad_stocks=[], news_tables={}, n=10):
        

    def finviz_pull(tickers, c=0.0):
        for ticker in tickers:          
            c += 1
            try:
                url = 'https://finviz.com/quote.ashx?t=' + ticker
                req = Request(url=url,headers={'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'}) 
                resp = urlopen(req)    
                html = BeautifulSoup(resp, features="lxml")
                news_table = html.find(id='news-table')
                news_tables[ticker] = news_table
                new_stock_list.append(ticker)
                print(f"({c}) [{ticker}] - DATA SOURCED")
            except Exception:
                bad_stocks.append(ticker)

        if exists(single_news / f"sentiment_all_stock_news.pkl"):
            parsed_news_df = pd.read_pickle(single_news / f"sentiment_all_stock_news.pkl")
            return parsed_news_df
            
        else:
            for file_name, news_table in news_tables.items():
                try:
                    rows = news_table.findAll("tr")
                    rows = rows[:n]
                    for row in rows:
                        cols = row.findAll("td")
                        try:
                            ticker = file_name.split('_')[0]
                            date = cols[0].text.split()[0]
                            title = cols[1].get_text()
                            link = cols[1].a['href']
                            source = link.split("/")[2]          
                            if source == "feedproxy.google.com":
                                source = link.split("/")[4]
                            info_dict = {
                                "Ticker": ticker,
                                "Date": date, 
                                "Title": title, 
                                "Source": source, 
                                "Link": link
                                }
                            parsed_news.append(info_dict)
                        except Exception:
                            pass
                except Exception as e:
                    print(e)
                    pass
                    
            parsed_news_df = pd.DataFrame(parsed_news)
            parsed_news_df.columns = [x.lower() for x in parsed_news_df.columns]
            parsed_news_df['date'] = pd.to_datetime(parsed_news_df['date'])
            # parsed_news_df = parsed_news_df[parsed_news_df['date'] >= pd.Timestamp('2022-01-01')]    
            parsed_news_df.to_pickle(single_news / f"sentiment_all_stock_news.pkl")
            for ticker in new_stock_list:
                if exists(single_news / f"df_single_news_{ticker}.pkl"):
                    pass
                else:            
                    stock_news_df = pd.DataFrame(parsed_news_df[parsed_news_df['ticker'] == ticker]).sort_values('date', ascending=False).iloc[:n]
                    stock_news_df.to_pickle(single_news / f"df_single_news_{ticker}.pkl")
                    print(f"{ticker} - COMPLETE - DATA SAVED")
            return parsed_news_df


    print(f"\nTotal Input Stocks: {len(stocks)} \n")
    pull_list = stocks
    parsed_news_df = finviz_pull(pull_list)
    return parsed_news_df

In [72]:
parsed_news_df = sentiment_1(stocks_list)
parsed_news_df


Total Input Stocks: 111 

(1.0) [NRGU] - DATA SOURCED
(2.0) [DIG] - DATA SOURCED
(3.0) [ERX] - DATA SOURCED
(4.0) [GUSH] - DATA SOURCED
(5.0) [PXE] - DATA SOURCED
(6.0) [UCO] - DATA SOURCED
(7.0) [FCG] - DATA SOURCED
(8.0) [IEO] - DATA SOURCED
(9.0) [FENY] - DATA SOURCED
(10.0) [RYE] - DATA SOURCED
(11.0) [XLE] - DATA SOURCED
(12.0) [VDE] - DATA SOURCED
(13.0) [IYE] - DATA SOURCED
(14.0) [PXI] - DATA SOURCED
(15.0) [FTXN] - DATA SOURCED
(16.0) [UGA] - DATA SOURCED
(17.0) [XOP] - DATA SOURCED
(18.0) [FXN] - DATA SOURCED
(19.0) [PSCE] - DATA SOURCED
(20.0) [OIH] - DATA SOURCED
(22.0) [BNO] - DATA SOURCED
(23.0) [IEZ] - DATA SOURCED
(24.0) [IXC] - DATA SOURCED
(25.0) [USO] - DATA SOURCED
(26.0) [FILL] - DATA SOURCED
(27.0) [USL] - DATA SOURCED
(28.0) [DBE] - DATA SOURCED
(29.0) [XME] - DATA SOURCED
(30.0) [AMZA] - DATA SOURCED
(31.0) [IGE] - DATA SOURCED
(32.0) [XES] - DATA SOURCED
(33.0) [PXJ] - DATA SOURCED
(34.0) [NANR] - DATA SOURCED
(35.0) [URNM] - DATA SOURCED
(36.0) [MLPA] - DATA 

Unnamed: 0,ticker,date,title,source,link
0,NRGU,2020-10-02,Bank of Montreal Announces the Reverse Split o...,finance.yahoo.com,https://finance.yahoo.com/news/bank-montreal-a...
1,NRGU,2020-05-20,Leveraged ETFs That Have Gained More Than 80% ...,finance.yahoo.com,https://finance.yahoo.com/news/leveraged-etfs-...
2,NRGU,2020-05-11,6 Leveraged ETFs Up At Least 25% Last Week Zac...,finance.yahoo.com,https://finance.yahoo.com/news/6-leveraged-etf...
3,NRGU,2020-01-08,Bet on Oil Surge With These Leveraged ETFs Zacks,finance.yahoo.com,https://finance.yahoo.com/news/bet-oil-surge-l...
4,NRGU,2019-10-14,5 Top-Performing Leveraged ETFs of Last Week Z...,finance.yahoo.com,https://finance.yahoo.com/news/5-top-performin...
...,...,...,...,...,...
1064,FXZ,2017-12-12,Huntsman Repays Its Senior Term Loan Market Re...,marketrealist.com,http://marketrealist.com/2017/12/huntsman-repa...
1065,FXZ,2017-11-13,Huntsman to Pay 4Q17 Dividend in December Mark...,marketrealist.com,http://marketrealist.com/2017/11/huntsman-pay-...
1066,FXZ,2017-09-25,Huntsmans 3Q17 Expectations amid Hurricane Har...,marketrealist.com,http://marketrealist.com/2017/09/huntsmans-3q1...
1067,FXZ,2017-09-18,How Attractive Is Eastman Chemicals Dividend Y...,marketrealist.com,http://marketrealist.com/2017/09/how-attractiv...


In [73]:
def sentiment_2(stocks, n=10):
    c = 0.0

    for stock in stocks:
        c += 1
        if exists(single_news / f"df_single_news_full_{stock}.pkl"):
            print(f"\n[ {int(c)} / {int(len(stocks))} ] - {stock} \n [X] - DONE - {stock}")

        else:
            print(f"\n[ {int(c)} / {int(len(stocks))} ] - {stock}")

            try:
                df = pd.DataFrame(pd.read_pickle(single_news / f"df_single_news_{stock}.pkl"))
                df.columns = [x.lower() for x in df.columns]
                df = df.sort_values('date', ascending=False).iloc[:n]
                list =[]                                                                         # creating an empty list

                for i in df.index:
                    dict = {}                                                                    # create empty dictionary to add articles
                    article = Article(df['link'][i], config=config)                              # providing the link

                    try:
                        article.download()                                                       # downloading the article 
                        article.parse()                                                          # parsing the article
                        article.nlp()                                                            # performing natural language processing
                    except Exception as e:                                                               # exception handling
                        print("Exception 2B:" + str(e))

                    dict['date']=df['date'][i]                                                   # storing results in dictionary from above
                    dict['source']=df['source'][i] 
                    dict['title']=article.title
                    dict['article']=article.text
                    dict['summary']=article.summary
                    dict['key_words']=article.keywords
                    dict['link']=df['link'][i]
                    list.append(dict)

                check_empty = not any(list)
                if check_empty == False:
                    try:
                        news_df=pd.DataFrame(list)                                               # creating dataframe
                        p1 = (single_news / f"df_single_news_full_{stock}.pkl")
                        news_df.to_pickle(p1)
                        print(f"[X] - DONE - {stock}")                                           # exception handling
                    except Exception as e:                                                               # exception handling
                        print("Exception 2C:" + str(e))
                        
            except Exception as e:                                                               # exception handling
                print("Exception 2A:" + str(e))

    return     

In [74]:
sentiment_2(stocks_list)


[ 1 / 111 ] - NRGU 
 [X] - DONE - NRGU

[ 2 / 111 ] - DIG 
 [X] - DONE - DIG

[ 3 / 111 ] - ERX 
 [X] - DONE - ERX

[ 4 / 111 ] - GUSH 
 [X] - DONE - GUSH

[ 5 / 111 ] - PXE 
 [X] - DONE - PXE

[ 6 / 111 ] - UCO 
 [X] - DONE - UCO

[ 7 / 111 ] - FCG 
 [X] - DONE - FCG

[ 8 / 111 ] - IEO 
 [X] - DONE - IEO

[ 9 / 111 ] - FENY 
 [X] - DONE - FENY

[ 10 / 111 ] - RYE 
 [X] - DONE - RYE

[ 11 / 111 ] - XLE 
 [X] - DONE - XLE

[ 12 / 111 ] - VDE 
 [X] - DONE - VDE

[ 13 / 111 ] - IYE 
 [X] - DONE - IYE

[ 14 / 111 ] - PXI 
 [X] - DONE - PXI

[ 15 / 111 ] - FTXN 
 [X] - DONE - FTXN

[ 16 / 111 ] - UGA 
 [X] - DONE - UGA

[ 17 / 111 ] - XOP 
 [X] - DONE - XOP

[ 18 / 111 ] - FXN 
 [X] - DONE - FXN

[ 19 / 111 ] - PSCE 
 [X] - DONE - PSCE

[ 20 / 111 ] - OIH 
 [X] - DONE - OIH

[ 21 / 111 ] - FRAK
Exception 2A:[Errno 2] No such file or directory: '/home/gdp/hot_box/etfs/data/sentiment/single_news/2023/2023-02/2023-02-01/df_single_news_FRAK.pkl'

[ 22 / 111 ] - BNO 
 [X] - DONE - BNO

[ 23 / 1

In [75]:
def sentiment_3(newS, stocks, a_or_b):
    for stock in stocks:
        try:
            parsed_news=[]
            if a_or_b == 'a':
                (
                    dates, sources, titles, links
                ) = (
                    newS['date'], newS['source'], newS['title'], newS['link']
                )
                
                for r in range(len(newS)):
                    parsed_news.append([stock, dates.iloc[r], sources.iloc[r], titles.iloc[r], links.iloc[r]])                

            elif a_or_b == 'b':
                (
                    dates, sources, titles, links, articles, summarys, key_words
                ) = (
                    newS['date'], newS['source'], newS['title'], newS['link'], newS['article'], newS['summary'], newS['key_words']
                )

                for r in range(len(newS)):
                    parsed_news.append([stock, dates.iloc[r], sources.iloc[r], titles.iloc[r], links.iloc[r], articles.iloc[r], summarys.iloc[r], key_words.iloc[r]])

        except Exception:
            print(f"BAD TICKER - 3A - {stock}")            
            return


        # Sentiment Analysis
        analyzer = SentimentIntensityAnalyzer()
        try:          
            if a_or_b == 'a':
                cols_lst = ["ticker", "date", 'source', "title", "link"]
                news = pd.DataFrame(parsed_news, columns=cols_lst).dropna()  
                scores = news["title"].apply(analyzer.polarity_scores).tolist()
                df_scores = pd.DataFrame(scores)
                news = news.join(df_scores, rsuffix="_right")            

            elif a_or_b == 'b':
                cols_lst = ['ticker', 'date', 'source', 'title', 'link', 'article', 'summary', 'key_words']
                news = pd.DataFrame(parsed_news, columns=cols_lst).dropna()  
                scores = news["summary"].apply(analyzer.polarity_scores).tolist()
                df_scores = pd.DataFrame(scores)
                news = news.join(df_scores, rsuffix="_right")  

        except Exception:
            print(f"BAD TICKER - 3B - {stock}")           
            return


        # View Data
        try:
            news["date"] = pd.to_datetime(news['date'])
            unique_ticker = news["ticker"].unique().tolist()
            news_dict = {name: news.loc[news["ticker"] == name] for name in unique_ticker}
            values = []

        except Exception:
            print(f"BAD TICKER - 3C - {stock}")
            return

    for stock in stocks:
        try:
            dataframe = news_dict[stock]
            dataframe = dataframe.set_index("ticker")
            mean = round(dataframe["compound"].mean() * 100, 0)
            values.append(mean)

        except Exception:
            print(f"BAD TICKER - 3D - {stock}")
            return

    try:
        df = pd.DataFrame(stocks, columns=["ticker"])
        df["sentiment_score"] = values
        return df  
    except Exception:
        print(f"BAD TICKER - 3E - {stock}")
        return



def sentiment_4(stocks, a_or_b):
    df = pd.DataFrame()
    symbols = []
    sentiments = []
    bad_stocks = []
    for stock in stocks:
        try:           
            if a_or_b == 'a':
                newS = pd.read_pickle(single_news / f"df_single_news_{stock}.pkl")

            elif a_or_b == 'b':
                newS = pd.read_pickle(single_news / f"df_single_news_full_{stock}.pkl")
            
            fd = pd.DataFrame(sentiment_3(newS, [stock], a_or_b))
            symbols.append(fd["ticker"].loc[0])
            sentiments.append(fd["sentiment_score"].loc[0])
            fd.to_pickle(sentiment / f"{stock}_sentiment.pkl")

        except Exception:
            print(f"BAD TICKER - 4 - {stock}")
            bad_stocks.append(stock)
            
    df["ticker"] = symbols
    df["sentiment_score"] = sentiments
    return df, bad_stocks



def sentiment_5(main_ticker_list, a_or_b):     
    df_final, bad_stocks = sentiment_4(main_ticker_list, a_or_b)
    print(df_final.shape)

    df_final = pd.DataFrame(df_final.copy()).sort_values('sentiment_score', ascending=False).sort_values('sentiment_score', ascending=False)
    df_final.to_pickle(saveRec / "recommender_03_return_dataFrame.pkl")

    print(f"[3] Sentiment Analysis - Successful Securities = [{len(df_final['ticker'])}]]")
    print(df_final.shape)
    return df_final, bad_stocks

In [76]:
data_3, bad_stocks_list = sentiment_5(stocks_list, a_or_b='b')

data_3

BAD TICKER - 4 - FRAK
BAD TICKER - 4 - IMLP
BAD TICKER - 4 - UMI
BAD TICKER - 4 - GRES
BAD TICKER - 4 - GUNR
BAD TICKER - 4 - GNR
BAD TICKER - 4 - DUSL
BAD TICKER - 4 - FUE
BAD TICKER - 4 - EMLP
BAD TICKER - 4 - COPX
BAD TICKER - 4 - HAP
BAD TICKER - 4 - PYZ
BAD TICKER - 4 - RGI
BAD TICKER - 4 - RTM
BAD TICKER - 4 - PSCM
BAD TICKER - 4 - PICK
BAD TICKER - 4 - CRAK
BAD TICKER - 4 - IYM
BAD TICKER - 4 - REMX
BAD TICKER - 4 - FXU
BAD TICKER - 4 - FMAT
BAD TICKER - 4 - IWTR
BAD TICKER - 4 - VEGI
BAD TICKER - 4 - VAW
BAD TICKER - 4 - FTRI
BAD TICKER - 4 - PSCI
BAD TICKER - 4 - UPW
BAD TICKER - 4 - RYU
BAD TICKER - 4 - UTSL
BAD TICKER - 4 - XLB
BAD TICKER - 4 - FIW
BAD TICKER - 4 - VIS
BAD TICKER - 4 - FIDU
BAD TICKER - 4 - PHO
BAD TICKER - 4 - UTES
BAD TICKER - 4 - ECLN
BAD TICKER - 4 - FXR
BAD TICKER - 4 - RLY
BAD TICKER - 4 - DRLL
BAD TICKER - 4 - FUTY
BAD TICKER - 4 - VPU
BAD TICKER - 4 - MOO
BAD TICKER - 4 - FTAG
BAD TICKER - 4 - XLU
BAD TICKER - 4 - IDU
BAD TICKER - 4 - EDOW
BAD TICKER

Unnamed: 0,ticker,sentiment_score
13,PXI,92.0
40,UNL,87.0
12,IYE,79.0
4,PXE,78.0
8,FENY,77.0
41,USAI,74.0
14,FTXN,74.0
28,AMZA,74.0
7,IEO,73.0
21,IEZ,66.0


In [77]:
data_3 = data_3.sort_values('ticker')
data_4 = pd.DataFrame(data_2[data_2['ticker'].isin(list(data_3['ticker']))]).sort_values('ticker')
data_4 = data_4.merge(data_3, on='ticker')
data_4

Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week,sentiment_score
0,AMLP,76.73,1.42,40.78,39.12,38.82,38.7,38.55,32.25,42.47,38.0
1,AMZA,81.45,1.59,33.83,32.3,31.18,30.94,30.68,24.4,34.44,74.0
2,BNO,87.11,1.72,26.99,27.39,29.23,30.06,30.34,22.78,36.84,-59.0
3,DBC,76.73,1.42,24.17,24.52,25.13,25.94,26.15,21.63,30.64,54.0
4,DBE,82.7,1.6,21.03,22.33,23.94,24.72,24.91,18.27,30.51,44.0
5,DBO,75.47,1.41,14.63,14.96,16.19,16.87,17.13,14.0,21.12,10.0
6,DIG,99.37,3.85,40.59,43.05,38.93,38.75,38.03,21.66,50.45,-13.0
7,EINC,72.01,1.35,63.33,62.9,61.99,62.36,62.48,53.82,69.56,47.0
8,ENFR,72.01,1.35,22.24,21.77,21.59,21.76,21.83,18.96,24.52,39.0
9,ERX,98.74,3.71,63.61,67.1,60.17,60.38,59.26,34.2,80.27,58.0


In [78]:
def create_new_cols(df):
    df["my_score"] = (((df["rs_rating"]) + (df["sentiment_score"])) / 2)
    return df   



def company_longName(ticker):
    try:
        d = Ticker(ticker).quote_type
        return list(d.values())[0]["longName"]
    except Exception as e:
        return ticker    

In [79]:
data_5 = create_new_cols(data_4).round(2).sort_values('my_score', ascending=False)

In [80]:
data_5_tickers = list(data_5['ticker'])
data_5_names = []

for ticker in data_5_tickers:
    data_5_names.append(company_longName(ticker))

data_5['name'] = data_5_names
data_5 = data_5.set_index('name')
data_5 = data_5[
    [
        'ticker', 'my_score', 'sentiment_score', 'rs_rating', 'returns_multiple', 'current_price', 'sma_50', 'sma_150', 'sma_200', 'sma_200_20', 'low_52_week', 'high_52_week', 
    ]
]

In [81]:
# data_5 = data_5[data_5['sentiment_score'] > 0.0]

In [82]:
data_5 = data_5[data_5['ticker'] != 'URA']
data_5 = data_5[data_5['ticker'] != 'SLX']

In [83]:
data_5.to_pickle(saveRec / "recommender_05_return_dataFrame.pkl")
data_5.to_csv(saveRec / "recommender_05_return_dataFrame.csv")
data_5

Unnamed: 0_level_0,ticker,my_score,sentiment_score,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Invesco DWA Energy Momentum ETF,PXI,91.91,92.0,91.82,1.96,43.24,44.45,43.57,43.81,43.73,30.59,53.05
Invesco Dynamic Energy Exploration & Production ETF,PXE,87.74,78.0,97.48,2.34,28.97,30.3,29.99,30.05,29.87,19.97,36.69
Fidelity MSCI Energy Index ETF,FENY,85.98,77.0,94.97,2.02,23.49,24.0,22.54,22.41,22.12,16.16,25.98
iShares U.S. Energy ETF,IYE,85.72,79.0,92.45,1.97,45.59,46.88,44.3,43.9,43.35,31.69,50.75
iShares U.S. Oil & Gas Exploration & Production ETF,IEO,84.46,73.0,95.91,2.17,90.25,94.5,91.34,90.86,89.97,63.44,106.36
First Trust Nasdaq Oil & Gas ETF,FTXN,82.6,74.0,91.19,1.95,27.69,28.3,27.26,27.28,27.12,20.04,31.87
"United States 12 Month Natural Gas Fund, LP",UNL,80.29,87.0,73.58,1.38,12.7,18.03,21.36,22.01,22.49,12.21,29.37
First Trust Natural Gas ETF,FCG,78.46,61.0,95.91,2.17,24.01,25.05,25.03,25.22,25.24,17.12,31.07
Direxion Daily Energy Bull 2X Shares,ERX,78.37,58.0,98.74,3.71,63.61,67.1,60.17,60.38,59.26,34.2,80.27
InfraCap MLP ETF,AMZA,77.72,74.0,81.45,1.59,33.83,32.3,31.18,30.94,30.68,24.4,34.44


In [84]:
data_5.columns

Index(['ticker', 'my_score', 'sentiment_score', 'rs_rating',
       'returns_multiple', 'current_price', 'sma_50', 'sma_150', 'sma_200',
       'sma_200_20', 'low_52_week', 'high_52_week'],
      dtype='object')

In [85]:
for i in list(data_5['ticker']):
    print(i)

PXI
PXE
FENY
IYE
IEO
FTXN
UNL
FCG
ERX
AMZA
IEZ
GUSH
UGA
USAI
XOP
OIH
NRGU
URNM
VDE
PXJ
PSCE
XME
MLPA
FXZ
IXC
XES
DBC
DBE
IGE
FXN
EINC
FILL
AMLP
XLE
ENFR
USO
NANR
RYE
TPYP
USL
DIG
DBO
MLPX
BNO
UCO
