In [76]:
import datetime as dt
from os.path import exists
from pathlib import Path
import numpy as np
import pandas as pd
import yfinance as yf
from finviz import get_news
from yahooquery import Ticker
from newspaper import Article, Config
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dateutil.relativedelta import relativedelta

import src.tools.functions as f0
import src.tools.lists as l0

pd.set_option('display.max_columns', None)
nltk.download("vader_lexicon")
nltk.download('punkt')
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 10

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gdp/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/gdp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [77]:
day1 = '2023-01-31'
month1 = str(day1)[:7]
year1 = str(day1)[:4]

bulk_data_file_1 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/bulk_tickers.csv")
bulk_data_file_2 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/core_selection_tickers.xlsx")
bulk_data_file_3 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/dodds_etf_df.csv")
bulk_data_file_4 = (f"/home/gdp/hot_box/etfs/data/bulk/{year1}/{month1}/{day1}/etf_tickers.csv")

save_raw = Path(f"/home/gdp/hot_box/etfs/data/raw/{year1}/{month1}/{day1}/")
if not save_raw.exists():
    save_raw.mkdir(parents=True)

save_hist = Path(f"/home/gdp/hot_box/etfs/data/hist/{year1}/{month1}/{day1}/")
if not save_hist.exists():
    save_hist.mkdir(parents=True)    
    
sentiment = Path(f"/home/gdp/hot_box/etfs/data/sentiment/sentiment/{year1}/{month1}/{day1}/")
if not sentiment.exists():
    sentiment.mkdir(parents=True)   
    
single_news = Path(f"/home/gdp/hot_box/etfs/data/sentiment/single_news/{year1}/{month1}/{day1}/")
if not single_news.exists():
    single_news.mkdir(parents=True)
    
saveRec = Path(f"/home/gdp/hot_box/etfs/data/recommenders/{year1}/{month1}/{day1}/")
if not saveRec.exists():
    saveRec.mkdir(parents=True)    

In [78]:
bulk_tickers_df = pd.DataFrame(pd.read_csv(bulk_data_file_1))
core_selection_tickers_df = pd.DataFrame(pd.read_excel(bulk_data_file_2))
dodds_etf_df_df = pd.DataFrame(pd.read_csv(bulk_data_file_3))
etf_tickers_df = pd.DataFrame(pd.read_csv(bulk_data_file_4))
chuck_tickers_df = pd.read_csv('/home/gdp/hot_box/etfs/Results.csv')

etf_ticker_list = (
    # list(bulk_tickers_df['symbol']) + \
        # list(core_selection_tickers_df['AAA']) + \
            list(chuck_tickers_df['Symbol']) + \
                list(dodds_etf_df_df['ticker'])
                    # list(etf_tickers_df['symbol'])
    )

etf_ticker_list = list(set(etf_ticker_list))

len(etf_ticker_list)

159

In [79]:
from finvizfinance.quote import finvizfinance
import pandas as pd
from pathlib import Path
from datetime import datetime
from os.path import exists


class Recommendations1(object):


    def __init__(self, today):
        self.today = today
        self.saveMonth = str(today)[:7]
        self.saveDay = str(today)[8:10]
        self.reportDate = str(today)[:10]

        self.saveRec = Path(f"data/recommenders/{self.saveMonth}/{self.today}/")
        if not self.saveRec.exists():
            self.saveRec.mkdir(parents=True)

        self.saveRaw = Path(f"data/raw/{self.saveMonth}/{self.today}/")
        if not self.saveRaw.exists():
            self.saveRaw.mkdir(parents=True)

        self.saveScreeners = Path(f"data/screeners/{self.saveMonth}/{self.today}/")
        if not self.saveScreeners.exists():
            self.saveScreeners.mkdir(parents=True)

        self.saveTickers = Path(f"data/tickers/{self.saveMonth}/{self.today}/")
        if not self.saveTickers.exists():
            self.saveTickers.mkdir(parents=True)
            

    def run_rec1(self, tickers):
        self.ticker_list = tickers
        self.recommendations = []

        for s in self.ticker_list:
            try:
                recommendation = finvizfinance(s).ticker_fundament()["Recom"]
            except Exception:
                recommendation = 6.0

            if recommendation == "-":
                recommendation = 6.0

            self.recommendations.append(round(float(recommendation), 2))

        dataframe = pd.DataFrame(
            list(zip(self.ticker_list, self.recommendations)),
            columns=["Company", "Recommendations"],
        ).sort_values("Recommendations")
        dataframe_02 = dataframe[dataframe["Recommendations"] < 2.6]
        dataframe_02.columns = ["Symbol", "Score"]
        dataframe_02["rank"] = range(1, len(dataframe_02["Symbol"]) + 1)
        dataframe_02 = dataframe_02.set_index("rank")
        return dataframe, dataframe_02

In [80]:
# dataframe, dataframe_02 = Recommendations1('2023-02-01').run_rec1(etf_ticker_list)

In [81]:
def get_history():
    hist = pd.DataFrame(
        yf.download(
            tickers=etf_ticker_list, 
            period='2y', 
            rounding=True, 
            group_by='tickers', 
            auto_adjust=True,
            actions=False,
            show_errors=True
        )
    )
    hist.to_pickle(save_hist / f"all_etf_history_data.pkl")

    for ticker in etf_ticker_list:
        try:    
            data = pd.DataFrame(hist[ticker])
            data.columns = [x.lower() for x in data.columns]
            data.index = pd.to_datetime(data.index)
            data.to_pickle(save_raw / f"{ticker}.pkl")    
        except Exception as e:
            etf_ticker_list.remove(ticker) 

    return etf_ticker_list

In [82]:
etf_ticker_list = get_history()

[*********************100%***********************]  159 of 159 completed


In [83]:
def technicals_minervini(rec_02_tickers):            
    exportList = pd.DataFrame(
        columns=[
            "ticker", 
            "rs_rating", 
            "returns_multiple", 
            "current_price", 
            "sma_50", 
            "sma_150", 
            "sma_200", 
            "sma_200_20", 
            "low_52_week", 
            "high_52_week"
        ]
    )    

    
    # Index Returns
    index_name = '^GSPC'
    if exists(save_raw / "sp500_index.pkl"):
        index_df = pd.DataFrame(pd.read_pickle(save_raw / "sp500_index.pkl"))
        index_df["pct_change"] = index_df["Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]

    elif not exists(save_raw / "sp500_index.pkl"):
        index_df = pd.DataFrame(yf.download(index_name, period='2y'))
        index_df.to_pickle(save_raw / "sp500_index.pkl")
        index_df["pct_change"] = index_df["Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]
        
        
    # Find top 50% performing stocks (relative to the S&P 500)
    returns_multiples = []
    for ticker in rec_02_tickers:

        # Calculating returns relative to the market (returns multiple)      
        try:
            df = pd.DataFrame(pd.read_pickle(save_raw / f"{ticker}.pkl"))
            df["pct_change"] = df["close"].pct_change()
            stock_return = (df["pct_change"] + 1).cumprod()[-1]
            returns_multiple = round((stock_return / index_return), 2)
            returns_multiples.extend([returns_multiple])

        except Exception:
            print(f"Bad Ticker: {ticker}")
         
         
    # Creating dataframe of only top 70%
    rs_df = pd.DataFrame(list(zip(rec_02_tickers, returns_multiples)),columns=["ticker", "returns_multiple"],)
    rs_df["rs_rating"] = rs_df["returns_multiple"].rank(pct=True) * 100
    rs_df = rs_df[rs_df["rs_rating"] >= rs_df["rs_rating"].quantile(0.3)]    
    
    
    # Checking Minervini conditions of top 60% of stocks in given list
    rs_stocks = list(rs_df["ticker"])
    
    for stock in rs_stocks:
        try:     
            df = pd.DataFrame(pd.read_pickle(save_raw / f"{stock}.pkl")).dropna()
            sma = [50, 150, 200]

            for x in sma:
                df["SMA_" + str(x)] = round(df["close"].rolling(window=x).mean(), 2)

            # Storing required values
            currentClose = df["close"].iloc[-1]
            MA_50 = df["SMA_50"].iloc[-1]
            MA_150 = df["SMA_150"].iloc[-1]
            MA_200 = df["SMA_200"].iloc[-1]
            low_52_week = round(min(df["low"][-260:]), 2)
            high_52_week = round(max(df["high"][-260:]), 2)    
            RS_Rating = round(rs_df[rs_df["ticker"] == stock].rs_rating.tolist()[0], 2)
            Returns_multiple = round(rs_df[rs_df["ticker"] == stock].returns_multiple.tolist()[0], 2)
            
            try:
                MA_200_20 = df["SMA_200"][-20]
            except Exception:
                MA_200_20 = 0
                        
            condition_1 = currentClose >= MA_150 >= MA_200                                                            # Condition 1: Current Price > 150 SMA and > 200 SMA
            condition_2 = MA_150 >= MA_200                                                                            # Condition 2: 150 SMA and > 200 SMA
            condition_3 = MA_200 >= MA_200_20                                                                         # Condition 3: 200 SMA trending up for at least 1 month
            condition_4 = MA_50 >= MA_150 >= MA_200                                                                   # Condition 4: 50 SMA > 150 SMA and 50 SMA> 200 SMA
            condition_5 = currentClose >= MA_50                                                                       # Condition 5: Current Price > 50 SMA
            condition_6 = currentClose >= (1.3 * low_52_week)                                                         # Condition 6: Current Price is at least 30% above 52 week low
            condition_7 = currentClose >= (0.7 * high_52_week)                                                        # Condition 7: Current Price is within 30% of 52 week high

            # if (
            #     condition_1 & 
            #     # condition_2 & 
            #     condition_3 & 
            #     condition_4 &
            #     # condition_5 & 
            #     condition_6 & 
            #     condition_7
            #     ):                                                                                                      # If all conditions above are true, add Ticker to exportList 
            exportList = exportList.append(
                {
                    "ticker": stock,
                    "rs_rating": RS_Rating,
                    "returns_multiple": Returns_multiple,
                    "current_price": currentClose,
                    "sma_50": MA_50,
                    "sma_150": MA_150,
                    "sma_200": MA_200,
                    "sma_200_20": MA_200_20,
                    "low_52_week": low_52_week,
                    "high_52_week": high_52_week
                }, 
                ignore_index=True
            ).sort_values(by="rs_rating", ascending=False)
                    
        except Exception:
            print(f"Bad Ticker: {stock}")
            
    
    print("\n[2] MINERVINI ")
    print(f"   > PART-A:")
    exportList_A = exportList.drop_duplicates(subset="ticker")    
    part_a_len = len(exportList_A['ticker'])        
    print(f"     * Successful Stocks: [{part_a_len}]")
    
    
    print(f"   > PART-B:")
    exportList_B = exportList_A[exportList_A['rs_rating'] >= 69]
    exportList_B.to_pickle(saveRec / "recommender_02_return_dataFrame.pkl")
    part_b_len = len(exportList_B['ticker'])    
    print(f"     * Successful Stocks (rs_rating >= 69.0): [{part_b_len}]")    
    
    display(exportList_B.round(2))
    return exportList_B.round(2)    

In [84]:
data_2 = technicals_minervini(etf_ticker_list)


[2] MINERVINI 
   > PART-A:
     * Successful Stocks: [113]
   > PART-B:
     * Successful Stocks (rs_rating >= 69.0): [50]


Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week
0,NRGU,100.0,6.15,455.88,508.27,465.81,478.7,470.57,198.58,797.0
1,ERX,99.37,3.95,63.95,66.97,59.77,59.78,58.55,33.3,78.67
2,DIG,98.74,3.9,41.05,42.95,38.74,38.53,37.77,21.33,50.14
3,GUSH,98.11,2.82,144.5,153.07,156.13,161.77,163.89,84.13,256.62
4,PXE,97.48,2.5,29.27,30.18,29.63,29.59,29.33,19.35,35.71
5,IEO,96.86,2.35,90.84,94.15,89.98,89.14,87.98,60.95,105.04
6,FCG,95.91,2.33,24.24,24.93,24.73,24.83,24.78,16.59,30.26
7,UCO,95.91,2.33,27.82,28.57,31.79,35.1,36.18,24.26,55.69
10,VDE,94.34,2.18,119.86,121.74,113.64,112.48,110.72,79.31,131.12
8,FENY,94.34,2.18,23.55,23.93,22.33,22.1,21.75,15.58,25.77


In [85]:
from os.path import exists
from pathlib import Path
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newspaper import Article, Config
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

pd.set_option('display.max_columns', None)
nltk.download("vader_lexicon")
nltk.download("punkt")

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 10



day1 = day1
month1 = str(day1)[:7]
year1 = str(day1)[:4]
saveRec = Path(f"/home/gdp/hot_box/etfs/data/recommenders/{year1}/{month1}/{day1}/")
sentiment = Path(f"/home/gdp/hot_box/etfs/data/sentiment/sentiment/{year1}/{month1}/{day1}/")
single_news = Path(f"/home/gdp/hot_box/etfs/data/sentiment/single_news/{year1}/{month1}/{day1}/")   
if not sentiment.exists():
    sentiment.mkdir(parents=True)          
if not single_news.exists():
    single_news.mkdir(parents=True)       

data_2 = pd.read_pickle(saveRec / "recommender_02_return_dataFrame.pkl")
stocks_list = list(data_2['ticker'])    

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gdp/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/gdp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [86]:
def sentiment_1(stocks, parsed_news=[], new_stock_list=[], pull_list=[], bad_stocks=[], news_tables={}, n=25):
        

    def finviz_pull(tickers, c=0.0):
        for ticker in tickers:          
            c += 1
            try:
                url = 'https://finviz.com/quote.ashx?t=' + ticker
                req = Request(url=url,headers={'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'}) 
                resp = urlopen(req)    
                html = BeautifulSoup(resp, features="lxml")
                news_table = html.find(id='news-table')
                news_tables[ticker] = news_table
                new_stock_list.append(ticker)
                print(f"({c}) [{ticker}] - DATA SOURCED")
            except Exception:
                bad_stocks.append(ticker)

        if exists(single_news / f"sentiment_all_stock_news.pkl"):
            parsed_news_df = pd.read_pickle(single_news / f"sentiment_all_stock_news.pkl")
            return parsed_news_df
            
        else:
            for file_name, news_table in news_tables.items():
                try:
                    rows = news_table.findAll("tr")
                    rows = rows[:n]
                    for row in rows:
                        cols = row.findAll("td")
                        try:
                            ticker = file_name.split('_')[0]
                            date = cols[0].text.split()[0]
                            title = cols[1].get_text()
                            link = cols[1].a['href']
                            source = link.split("/")[2]          
                            if source == "feedproxy.google.com":
                                source = link.split("/")[4]
                            info_dict = {
                                "Ticker": ticker,
                                "Date": date, 
                                "Title": title, 
                                "Source": source, 
                                "Link": link
                                }
                            parsed_news.append(info_dict)
                        except Exception:
                            pass
                except Exception as e:
                    print(e)
                    pass
                    
            parsed_news_df = pd.DataFrame(parsed_news)
            parsed_news_df.columns = [x.lower() for x in parsed_news_df.columns]
            parsed_news_df['date'] = pd.to_datetime(parsed_news_df['date'])
            # parsed_news_df = parsed_news_df[parsed_news_df['date'] >= pd.Timestamp('2022-01-01')]    
            parsed_news_df.to_pickle(single_news / f"sentiment_all_stock_news.pkl")
            for ticker in new_stock_list:
                if exists(single_news / f"df_single_news_{ticker}.pkl"):
                    pass
                else:            
                    stock_news_df = pd.DataFrame(parsed_news_df[parsed_news_df['ticker'] == ticker]).sort_values('date', ascending=False).iloc[:n]
                    stock_news_df.to_pickle(single_news / f"df_single_news_{ticker}.pkl")
                    print(f"{ticker} - COMPLETE - DATA SAVED")
            return parsed_news_df


    print(f"\nTotal Input Stocks: {len(stocks)} \n")
    pull_list = stocks
    parsed_news_df = finviz_pull(pull_list)
    return parsed_news_df

In [87]:
parsed_news_df = sentiment_1(stocks_list)
parsed_news_df


Total Input Stocks: 50 

(1.0) [NRGU] - DATA SOURCED
(2.0) [ERX] - DATA SOURCED
(3.0) [DIG] - DATA SOURCED
(4.0) [GUSH] - DATA SOURCED
(5.0) [PXE] - DATA SOURCED
(6.0) [IEO] - DATA SOURCED
(7.0) [FCG] - DATA SOURCED
(8.0) [UCO] - DATA SOURCED
(9.0) [VDE] - DATA SOURCED
(10.0) [FENY] - DATA SOURCED
(11.0) [XLE] - DATA SOURCED
(12.0) [RYE] - DATA SOURCED
(13.0) [IYE] - DATA SOURCED
(14.0) [PXI] - DATA SOURCED
(15.0) [FTXN] - DATA SOURCED
(16.0) [XOP] - DATA SOURCED
(17.0) [UGA] - DATA SOURCED
(18.0) [FXN] - DATA SOURCED
(19.0) [PSCE] - DATA SOURCED
(20.0) [AMZA] - DATA SOURCED
(21.0) [IXC] - DATA SOURCED
(22.0) [OIH] - DATA SOURCED
(23.0) [FILL] - DATA SOURCED
(24.0) [BNO] - DATA SOURCED
(25.0) [IEZ] - DATA SOURCED
(27.0) [IGE] - DATA SOURCED
(28.0) [USO] - DATA SOURCED
(29.0) [XME] - DATA SOURCED
(30.0) [USL] - DATA SOURCED
(31.0) [DBE] - DATA SOURCED
(32.0) [URNM] - DATA SOURCED
(33.0) [AMLP] - DATA SOURCED
(34.0) [NANR] - DATA SOURCED
(35.0) [XES] - DATA SOURCED
(36.0) [PXJ] - DATA S

Unnamed: 0,ticker,date,title,source,link
6,DIG,2022-05-13 00:00:00,ETF Odds & Ends: State Street Fund Targets Liq...,finance.yahoo.com,https://finance.yahoo.com/news/etf-odds-ends-s...
7,DIG,2022-05-11 00:00:00,ProShares Announces ETF Share Splits Business ...,finance.yahoo.com,https://finance.yahoo.com/news/proshares-annou...
23,DIG,2023-02-01 10:32:00,Will the APIs Crude Oil Inventories Pressure C...,marketrealist.com,http://marketrealist.com/2017/09/will-apis-cru...
24,DIG,2023-02-01 08:09:00,Will Major Oil Producers Extend the Output Cut...,marketrealist.com,http://marketrealist.com/2017/09/will-major-oi...
29,ERX,2022-07-11 00:00:00,Gas ETFs To Fill Your Tank ETF.com,finance.yahoo.com,https://finance.yahoo.com/news/gas-etfs-fill-t...
...,...,...,...,...,...
976,ENFR,2023-02-01 16:35:00,Invesco announces estimated cash and annual re...,finance.yahoo.com,https://finance.yahoo.com/news/invesco-announc...
993,EINC,2022-04-04 00:00:00,Possible turnaround for E Automotive Inc. (TSE...,finance.yahoo.com,https://finance.yahoo.com/news/possible-turnar...
994,EINC,2022-04-01 00:00:00,Early Warning Report Issued Pursuant to Nation...,finance.yahoo.com,https://finance.yahoo.com/news/early-warning-r...
995,EINC,2022-03-22 00:00:00,E INC announces 2021 Fourth Quarter and Year E...,finance.yahoo.com,https://finance.yahoo.com/news/e-inc-announces...


In [88]:
def sentiment_2(stocks, n=25):
    c = 0.0

    for stock in stocks:
        c += 1
        if exists(single_news / f"df_single_news_full_{stock}.pkl"):
            print(f"\n[ {int(c)} / {int(len(stocks))} ] - {stock} \n [X] - DONE - {stock}")

        else:
            print(f"\n[ {int(c)} / {int(len(stocks))} ] - {stock}")

            try:
                df = pd.DataFrame(pd.read_pickle(single_news / f"df_single_news_{stock}.pkl"))
                df.columns = [x.lower() for x in df.columns]
                df = df.sort_values('date', ascending=False).iloc[:n]
                list =[]                                                                         # creating an empty list

                for i in df.index:
                    dict = {}                                                                    # create empty dictionary to add articles
                    article = Article(df['link'][i], config=config)                              # providing the link

                    try:
                        article.download()                                                       # downloading the article 
                        article.parse()                                                          # parsing the article
                        article.nlp()                                                            # performing natural language processing
                    except Exception as e:                                                               # exception handling
                        print("Exception 2B:" + str(e))

                    dict['date']=df['date'][i]                                                   # storing results in dictionary from above
                    dict['source']=df['source'][i] 
                    dict['title']=article.title
                    dict['article']=article.text
                    dict['summary']=article.summary
                    dict['key_words']=article.keywords
                    dict['link']=df['link'][i]
                    list.append(dict)

                check_empty = not any(list)
                if check_empty == False:
                    try:
                        news_df=pd.DataFrame(list)                                               # creating dataframe
                        p1 = (single_news / f"df_single_news_full_{stock}.pkl")
                        news_df.to_pickle(p1)
                        print(f"[X] - DONE - {stock}")                                           # exception handling
                    except Exception as e:                                                               # exception handling
                        print("Exception 2C:" + str(e))
                        
            except Exception as e:                                                               # exception handling
                print("Exception 2A:" + str(e))

    return     

In [89]:
sentiment_2(stocks_list)


[ 1 / 50 ] - NRGU

[ 2 / 50 ] - ERX 
 [X] - DONE - ERX

[ 3 / 50 ] - DIG 
 [X] - DONE - DIG

[ 4 / 50 ] - GUSH 
 [X] - DONE - GUSH

[ 5 / 50 ] - PXE 
 [X] - DONE - PXE

[ 6 / 50 ] - IEO 
 [X] - DONE - IEO

[ 7 / 50 ] - FCG 
 [X] - DONE - FCG

[ 8 / 50 ] - UCO 
 [X] - DONE - UCO

[ 9 / 50 ] - VDE 
 [X] - DONE - VDE

[ 10 / 50 ] - FENY 
 [X] - DONE - FENY

[ 11 / 50 ] - XLE 
 [X] - DONE - XLE

[ 12 / 50 ] - RYE 
 [X] - DONE - RYE

[ 13 / 50 ] - IYE 
 [X] - DONE - IYE

[ 14 / 50 ] - PXI 
 [X] - DONE - PXI

[ 15 / 50 ] - FTXN 
 [X] - DONE - FTXN

[ 16 / 50 ] - XOP 
 [X] - DONE - XOP

[ 17 / 50 ] - UGA 
 [X] - DONE - UGA

[ 18 / 50 ] - FXN 
 [X] - DONE - FXN

[ 19 / 50 ] - PSCE 
 [X] - DONE - PSCE

[ 20 / 50 ] - AMZA 
 [X] - DONE - AMZA

[ 21 / 50 ] - IXC 
 [X] - DONE - IXC

[ 22 / 50 ] - OIH 
 [X] - DONE - OIH

[ 23 / 50 ] - FILL 
 [X] - DONE - FILL

[ 24 / 50 ] - BNO 
 [X] - DONE - BNO

[ 25 / 50 ] - IEZ 
 [X] - DONE - IEZ

[ 26 / 50 ] - FRAK
Exception 2A:[Errno 2] No such file or direct

In [90]:
def sentiment_3(newS, stocks, a_or_b):
    for stock in stocks:
        try:
            parsed_news=[]
            if a_or_b == 'a':
                (
                    dates, sources, titles, links
                ) = (
                    newS['date'], newS['source'], newS['title'], newS['link']
                )
                
                for r in range(len(newS)):
                    parsed_news.append([stock, dates.iloc[r], sources.iloc[r], titles.iloc[r], links.iloc[r]])                

            elif a_or_b == 'b':
                (
                    dates, sources, titles, links, articles, summarys, key_words
                ) = (
                    newS['date'], newS['source'], newS['title'], newS['link'], newS['article'], newS['summary'], newS['key_words']
                )

                for r in range(len(newS)):
                    parsed_news.append([stock, dates.iloc[r], sources.iloc[r], titles.iloc[r], links.iloc[r], articles.iloc[r], summarys.iloc[r], key_words.iloc[r]])

        except Exception:
            print(f"BAD TICKER - 3A - {stock}")            
            return


        # Sentiment Analysis
        analyzer = SentimentIntensityAnalyzer()
        try:          
            if a_or_b == 'a':
                cols_lst = ["ticker", "date", 'source', "title", "link"]
                news = pd.DataFrame(parsed_news, columns=cols_lst).dropna()  
                scores = news["title"].apply(analyzer.polarity_scores).tolist()
                df_scores = pd.DataFrame(scores)
                news = news.join(df_scores, rsuffix="_right")            

            elif a_or_b == 'b':
                cols_lst = ['ticker', 'date', 'source', 'title', 'link', 'article', 'summary', 'key_words']
                news = pd.DataFrame(parsed_news, columns=cols_lst).dropna()  
                scores = news["summary"].apply(analyzer.polarity_scores).tolist()
                df_scores = pd.DataFrame(scores)
                news = news.join(df_scores, rsuffix="_right")  

        except Exception:
            print(f"BAD TICKER - 3B - {stock}")           
            return


        # View Data
        try:
            news["date"] = pd.to_datetime(news['date'])
            unique_ticker = news["ticker"].unique().tolist()
            news_dict = {name: news.loc[news["ticker"] == name] for name in unique_ticker}
            values = []

        except Exception:
            print(f"BAD TICKER - 3C - {stock}")
            return

    for stock in stocks:
        try:
            dataframe = news_dict[stock]
            dataframe = dataframe.set_index("ticker")
            mean = round(dataframe["compound"].mean() * 100, 0)
            values.append(mean)

        except Exception:
            print(f"BAD TICKER - 3D - {stock}")
            return

    try:
        df = pd.DataFrame(stocks, columns=["ticker"])
        df["sentiment_score"] = values
        return df  
    except Exception:
        print(f"BAD TICKER - 3E - {stock}")
        return



def sentiment_4(stocks, a_or_b):
    df = pd.DataFrame()
    symbols = []
    sentiments = []
    bad_stocks = []
    for stock in stocks:
        try:           
            if a_or_b == 'a':
                newS = pd.read_pickle(single_news / f"df_single_news_{stock}.pkl")

            elif a_or_b == 'b':
                newS = pd.read_pickle(single_news / f"df_single_news_full_{stock}.pkl")
            
            fd = pd.DataFrame(sentiment_3(newS, [stock], a_or_b))
            symbols.append(fd["ticker"].loc[0])
            sentiments.append(fd["sentiment_score"].loc[0])
            fd.to_pickle(sentiment / f"{stock}_sentiment.pkl")

        except Exception:
            print(f"BAD TICKER - 4 - {stock}")
            bad_stocks.append(stock)
            
    df["ticker"] = symbols
    df["sentiment_score"] = sentiments
    return df, bad_stocks



def sentiment_5(main_ticker_list, a_or_b):     
    df_final, bad_stocks = sentiment_4(main_ticker_list, a_or_b)
    print(df_final.shape)

    df_final = pd.DataFrame(df_final.copy()).sort_values('sentiment_score', ascending=False).sort_values('sentiment_score', ascending=False)
    df_final.to_pickle(saveRec / "recommender_03_return_dataFrame.pkl")

    print(f"[3] Sentiment Analysis - Successful Securities = [{len(df_final['ticker'])}]]")
    print(df_final.shape)
    return df_final, bad_stocks

In [91]:
data_3, bad_stocks_list = sentiment_5(stocks_list, a_or_b='b')

data_3

BAD TICKER - 4 - NRGU
BAD TICKER - 4 - FRAK
BAD TICKER - 4 - NANR
BAD TICKER - 4 - USAI
BAD TICKER - 4 - PDBC
BAD TICKER - 4 - OILK
(44, 2)
[3] Sentiment Analysis - Successful Securities = [44]]
(44, 2)


Unnamed: 0,ticker,sentiment_score
43,FXZ,97.0
42,UNL,97.0
3,PXE,96.0
17,PSCE,96.0
2,GUSH,94.0
7,VDE,93.0
8,FENY,92.0
28,DBE,91.0
26,XME,90.0
11,IYE,90.0


In [92]:
data_3 = data_3.sort_values('ticker')
data_4 = pd.DataFrame(data_2[data_2['ticker'].isin(list(data_3['ticker']))]).sort_values('ticker')
data_4 = data_4.merge(data_3, on='ticker')
data_4

Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week,sentiment_score
0,AMLP,79.56,1.63,40.74,39.12,38.26,37.86,37.51,30.87,41.71,13.0
1,AMZA,88.05,1.86,33.87,31.85,30.36,29.88,29.43,23.22,34.44,74.0
2,BNO,85.53,1.77,27.32,27.4,29.23,30.06,30.34,22.78,36.84,-59.0
3,DBC,73.27,1.46,24.32,24.47,25.01,25.81,26.0,21.5,30.46,54.0
4,DBE,81.13,1.66,21.24,22.27,23.79,24.56,24.72,18.13,30.27,91.0
5,DBO,71.7,1.45,14.76,14.93,16.1,16.77,17.02,13.9,20.97,46.0
6,DIG,98.74,3.9,41.05,42.95,38.74,38.53,37.77,21.33,50.14,-5.0
7,EINC,74.21,1.47,63.33,62.9,61.76,61.97,61.95,52.29,68.75,61.0
8,ENFR,74.84,1.5,22.23,21.77,21.37,21.42,21.41,18.11,23.9,66.0
9,ERX,99.37,3.95,63.95,66.97,59.77,59.78,58.55,33.3,78.67,48.0


In [93]:
def create_new_cols(df):
    df["my_score"] = (
        (
            (df["returns_multiple"] * 0.3) + (df["rs_rating"] * 0.3) + (df["sentiment_score"] * 0.4)
        )
    )
    return df   

In [94]:
data_5 = create_new_cols(data_4).round(2).sort_values('my_score', ascending=False)

In [95]:
# data_5 = data_5[data_5['sentiment_score'] > 0.0]
data_5.to_pickle(saveRec / "recommender_05_return_dataFrame.pkl")
data_5

Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week,sentiment_score,my_score
26,PXE,97.48,2.5,29.27,30.18,29.63,29.59,29.33,19.35,35.71,96.0,68.39
16,GUSH,98.11,2.82,144.5,153.07,156.13,161.77,163.89,84.13,256.62,94.0,67.88
39,VDE,94.34,2.18,119.86,121.74,113.64,112.48,110.72,79.31,131.12,93.0,66.16
11,FENY,94.34,2.18,23.55,23.93,22.33,22.1,21.75,15.58,25.77,92.0,65.76
25,PSCE,88.68,1.87,10.57,10.23,9.77,9.86,9.82,6.74,12.34,96.0,65.57
17,IEO,96.86,2.35,90.84,94.15,89.98,89.14,87.98,60.95,105.04,88.0,64.96
21,IYE,92.45,2.13,45.77,46.76,43.79,43.24,42.57,30.55,50.29,90.0,64.37
27,PXI,91.82,2.08,43.61,44.21,42.94,43.02,42.82,29.63,51.5,89.0,63.77
13,FTXN,91.19,2.05,27.77,28.21,27.05,26.99,26.78,19.55,31.55,85.0,61.97
4,DBE,81.13,1.66,21.24,22.27,23.79,24.56,24.72,18.13,30.27,91.0,61.24


In [96]:
for i in list(data_5['ticker']):
    print(i)

PXE
GUSH
VDE
FENY
PSCE
IEO
IYE
PXI
FTXN
DBE
XME
FCG
UNL
FXZ
XES
OIH
AMZA
UGA
URA
URNM
IGE
XOP
ERX
ENFR
EINC
USL
DBC
IXC
DBO
IEZ
XLE
USO
SLX
MLPA
AMLP
DIG
PXJ
FILL
RYE
MLPX
TPYP
UCO
BNO
FXN


In [97]:
for i in ['GUSH', 'ERX', 'PSCE', 'XES', 'PXI', 'OIH', 'IEZ', 'FCG', 'XOP', 'PXE', 'PXJ', 'FTXN', 'IEO', 'URNM', 'FENY']:
    print(i)


GUSH
ERX
PSCE
XES
PXI
OIH
IEZ
FCG
XOP
PXE
PXJ
FTXN
IEO
URNM
FENY
