In [193]:
import datetime as dt
from os.path import exists
from pathlib import Path
import numpy as np
import pandas as pd
import yfinance as yf
from finviz import get_news
from yahooquery import Ticker
from newspaper import Article, Config
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dateutil.relativedelta import relativedelta
import datetime
import math


import src.tools.functions as f0


pd.set_option('display.max_columns', None)
nltk.download("vader_lexicon")
nltk.download('punkt')
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 10


day1 = '2022-07-28'
month1 = str(day1)[:7]
year1 = str(day1)[:4]
now = dt.date.today()
now = now.strftime('%m-%d-%Y')
yesterday = dt.date.today() - dt.timedelta(days = 3)
yesterday = yesterday.strftime('%m-%d-%Y')

saveRaw = Path(f"/home/gdp/code/larrys_law/data/raw/{month1}/{day1}/")    
sentiment = Path(f"/home/gdp/code/larrys_law/data/sentiment/sentiment/{year1}/{month1}/{day1}/")    
single_news = Path(f"/home/gdp/code/larrys_law/data/sentiment/single_news/{year1}/{month1}/{day1}/")    
saveRec = Path(f"/home/gdp/code/larrys_law/data/recommenders/{year1}/{month1}/{day1}/")
bulk_data_file = Path(f"/home/gdp/code/larrys_law/data/finviz/{month1}/{day1}/finviz.csv")
save_larry = Path(f"/home/gdp/code/larrys_law/data/larry/{year1}/{month1}/{day1}/")

if not saveRaw.exists():
    saveRaw.mkdir(parents=True)
if not sentiment.exists():
    sentiment.mkdir(parents=True)   
if not single_news.exists():
    single_news.mkdir(parents=True)           
if not saveRec.exists():
    saveRec.mkdir(parents=True)  
if not save_larry.exists():
    save_larry.mkdir(parents=True)     

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gdp/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/gdp/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [160]:
def source_data_0():
    data_0 = pd.read_csv(bulk_data_file).round(4).fillna(0.001)
    data_0 = pd.DataFrame(f0.clean_sort(data_0))
    data_0.to_pickle(saveRec / "larry_finviz.pkl")
    data_0.to_pickle(save_larry / "larry_finviz.pkl")
    print("[0] Bulk Data:")
    print(f"---> Total Stock Count: {data_0.shape} \n")
    return data_0    


def source_data_1():
    mkt_cap_min = 500000000
    mkt_cap_max = 2500000000
    data_0 = source_data_0()
    data = pd.DataFrame(data_0)  
    data['market_cap'] = data['market_cap'] * 1000000
    data = data[data['market_cap'] >= mkt_cap_min]
    data = data[data['market_cap'] <= mkt_cap_max]
    data['market_cap'] = [f"${x:,.2f}" for x in list(data['market_cap'])]
    print(f"[1] Market Cap")
    print(f"---> Stocks With [[${mkt_cap_min:,.2f} < Market-Cap < ${mkt_cap_max:,.2f}]]: {data.shape}")
    data.to_pickle(saveRec / "larry_recommender_01_return_dataFrame.pkl")
    data.to_pickle(save_larry / "larry_recommender_01_return_dataFrame.pkl")
    return data_0, data


def source_saved_data():
    data_0 = pd.DataFrame(pd.read_pickle(saveRec / "larry_finviz.pkl"))
    data_1 = pd.DataFrame(pd.read_pickle(save_larry / "larry_recommender_01_return_dataFrame.pkl"))
    mkt_cap_min = 500000000
    mkt_cap_max = 2500000000
    print("\n[0] Bulk Data:")
    print(f"---> Total Stock Count: {data_0.shape} \n")
    print(f"[1] Market Cap")
    print(f"---> Stocks With [[${mkt_cap_min:,.2f} < Market-Cap < ${mkt_cap_max:,.2f}]]: {data_1.shape}\n")
    return data_0, data_1


# def get_diff(a: list,b: list) -> list:
#     return list(set(a) ^ set(b))

# a = l
# b = df.columns
# get_diff(a, b)        

In [161]:
# data_0, data_1 = source_data_1()

data_0, data_1 = source_saved_data()


[0] Bulk Data:
---> Total Stock Count: (8558, 67) 

[1] Market Cap
---> Stocks With [[$500,000,000.00 < Market-Cap < $2,500,000,000.00]]: (1565, 67)



In [9]:
from yahooquery import Ticker


def fte(stocks):
    fte_list = []
    for stock in stocks:
        try:
            yqT = Ticker(stock)
            x = pd.DataFrame(yqT.asset_profile)
            y = float(x[x.index == 'fullTimeEmployees'][f"{stock}"][0])
            fte_list.append(y)
        except Exception:
            fte_list.append(0)
    return fte_list


def revenue(stocks):
    list1 = []
    for stock in stocks:
        try:
            yqT = Ticker(stock)
            x = yqT.income_statement(frequency='a').iloc[-1]['TotalRevenue']
            list1.append(round(float(x),2))
        except Exception:
            list1.append(0)
    return list1    


def run_fte_revenue(stock_tickers):
    fte_list = fte(stock_tickers)
    revenue_list = revenue(stock_tickers)
    return fte_list, revenue_list

In [10]:
stock_ticker_list = sorted(list(data_1['ticker']))

fte_lst, rev_lst = run_fte_revenue(stock_ticker_list)



In [181]:
from re import T


pd.set_option('display.float_format', lambda x: '%.3f' % x)


cols_lst = [
    # 'no', 'country',
    'ticker', 'company', 'sector', 'industry', 
    'market_cap', 'fte', 'revenue', 'rev_per_fte',
    'price', 'target_price', 'average_true_range',
    'sma_20', 'sma_50', 'sma_200', 'high_50_day', 'low_50_day', 'high_52_week', 'low_52_week',
    'volume', 'shares_outstanding', 'average_volume','relative_volume', 'volatility_week', 'volatility_month',    
    'relative_strength_index_14', 'analyst_recom', 
    'gross_margin', 'operating_margin', 'profit_margin', 'dividend_yield', 'payout_ratio',
    'pe', 'forward_pe', 'peg', 'ps', 'pb', 'pcash', 'pfree_cash_flow', 'eps_ttm', 'beta', 'gap', 
    'current_ratio', 'quick_ratio', 'lt_debtequity', 'total_debtequity', 'shares_float', 'short_ratio', 'float_short',
    'return_on_assets', 'return_on_equity', 'return_on_investment',
    'sales_growth_past_5_years', 'sales_growth_quarter_over_quarter',
    'insider_ownership', 'insider_transactions', 'institutional_ownership', 'institutional_transactions',
    'eps_growth_this_year', 'eps_growth_next_year', 'eps_growth_past_5_years', 'eps_growth_next_5_years', 'eps_growth_quarter_over_quarter',
    'performance_half_year', 'performance_year', 'performance_ytd', 'performance_week', 'performance_month', 'performance_quarter',
]


def build_compiled_fte_rev():
    df_0 = pd.DataFrame(data_1.copy())
    df_0['fte'] = fte_lst
    df_0['revenue'] = rev_lst
    df_0['rev_per_fte'] = round(df_0['revenue'] / df_0['fte'], 2)
    df = pd.DataFrame(df_0[cols_lst].copy())
    df.to_pickle(save_larry / "compiled_fte_rev.pkl")
    print(f"[2] Original DataFrame Dimensions: {df.shape}")
    return df


def source_compiled_fte_rev():
    df = pd.DataFrame(pd.read_pickle(save_larry / "compiled_fte_rev.pkl"))
    print(f"[2] Original DataFrame Dimensions: {df.shape}")
    df.to_pickle(save_larry / "larry_recommender_02_return_dataFrame.pkl")
    return df


def collect_missing_fte(df):
    no_fte_lst = []
    l1 = list(df['fte'])
    for k, v in enumerate(l1):
        if v == 0:
            no_fte_lst.append(df['ticker'].iloc[k])
        elif math.isnan(v) == True:
            no_fte_lst.append(df['ticker'].iloc[k])
    print(f"---> Stocks Missing FTE Data: {len(no_fte_lst)}")
    return no_fte_lst


def collect_missing_revenue(df):
    no_rev_lst = []
    l1 = list(df['revenue'])
    for k, v in enumerate(l1):
        if v == 0:
            no_rev_lst.append(df['ticker'].iloc[k])
        elif math.isnan(v) == True:
            no_rev_lst.append(df['ticker'].iloc[k])            
    print(f"---> Stocks Missing REVENUE Data: {len(no_rev_lst)}")
    return no_rev_lst    


def refine_per_missing_fte():
    df_0 = build_compiled_fte_rev()
    missing_fte_list = collect_missing_fte(df_0)
    missing_rev_list = collect_missing_revenue(df_0)
    df = pd.DataFrame(df_0[df_0['fte'] != 0.0])
    df = pd.DataFrame(df[df['revenue'] != 0.0])
    df = pd.DataFrame(df.copy()).dropna()
    df['rev_per_fte'] = [f"${x:,.2f}" for x in list(df['rev_per_fte'])]
    df['revenue'] = [f"${x:,.2f}" for x in list(df['revenue'])]    
    print(f"[3] New Refined DataFrame Dimensions: {df.shape}")
    df.to_pickle(save_larry / "larry_recommender_03_return_dataFrame.pkl")
    return df.round(2), missing_fte_list, missing_rev_list

In [185]:
df, missing_fte_list, missing_rev_list = refine_per_missing_fte()

# df_0 = source_compiled_fte_rev()
# df, missing_fte_list = refine_per_missing_fte()

df = df.sort_values('rev_per_fte', ascending=False)
df.head()

[2] Original DataFrame Dimensions: (1565, 68)
---> Stocks Missing FTE Data: 269
---> Stocks Missing REVENUE Data: 140
[3] New Refined DataFrame Dimensions: (1218, 68)


Unnamed: 0,ticker,company,sector,industry,market_cap,fte,revenue,rev_per_fte,price,target_price,average_true_range,sma_20,sma_50,sma_200,high_50_day,low_50_day,high_52_week,low_52_week,volume,shares_outstanding,average_volume,relative_volume,volatility_week,volatility_month,relative_strength_index_14,analyst_recom,gross_margin,operating_margin,profit_margin,dividend_yield,payout_ratio,pe,forward_pe,peg,ps,pb,pcash,pfree_cash_flow,eps_ttm,beta,gap,current_ratio,quick_ratio,lt_debtequity,total_debtequity,shares_float,short_ratio,float_short,return_on_assets,return_on_equity,return_on_investment,sales_growth_past_5_years,sales_growth_quarter_over_quarter,insider_ownership,insider_transactions,institutional_ownership,institutional_transactions,eps_growth_this_year,eps_growth_next_year,eps_growth_past_5_years,eps_growth_next_5_years,eps_growth_quarter_over_quarter,performance_half_year,performance_year,performance_ytd,performance_week,performance_month,performance_quarter
507,ARRY,"Array Technologies, Inc.",Technology,Solar,"$1,978,040,000.00",1135.0,"$1,134,077,000.00","$999,186.78",16.1,17.42,1.02,11.56,11.4,13.33,14.63,6.62,27.67,5.45,12398671.0,148.29,4517.54,2.74,0.08,0.09,76.34,2.4,0.07,-0.08,-0.12,0.0,0.0,0.0,14.41,0.0,2.18,17.86,39.96,0.0,-0.76,0.0,0.14,1.9,1.2,7.18,7.62,147.98,4.72,0.14,-0.1,0.0,-0.02,0.0,0.21,0.01,0.02,0.95,0.02,-2.18,1.81,0.0,-0.06,-7.31,0.28,-0.01,-0.17,0.08,0.05,0.91
3039,FTAI,Fortress Transportation and Infrastructure Inv...,Industrials,Rental & Leasing Services,"$2,164,410,000.00",600.0,"$597,509,000.00","$995,848.33",23.03,36.4,1.02,19.91,19.44,23.74,21.71,16.79,30.18,16.74,691426.0,99.37,645.15,1.07,0.04,0.05,72.12,1.4,0.97,-0.66,-0.63,0.06,0.0,0.0,13.14,0.0,4.19,2.79,14.9,0.0,-3.36,1.85,-0.01,0.0,0.0,3.98,4.43,98.29,11.35,0.07,-0.07,-0.31,-0.04,0.25,0.79,0.0,0.0,0.76,0.0,-0.16,2.39,-0.4,0.0,-4.74,-0.2,-0.26,-0.26,0.03,0.14,-0.07
2528,ESRT,"Empire State Realty Trust, Inc.",Real Estate,REIT - Diversified,"$1,290,310,000.00",693.0,"$687,438,000.00","$991,974.03",8.2,9.21,0.26,7.34,7.47,8.95,8.53,6.53,12.16,6.53,2080504.0,169.73,1533.83,1.36,0.03,0.03,66.59,2.9,0.57,0.1,-0.03,0.02,0.0,0.0,48.06,0.0,2.01,1.34,3.0,17.58,-0.12,1.03,0.02,0.0,0.0,2.36,2.36,166.49,8.19,0.08,-0.0,-0.02,0.02,-0.02,0.13,0.0,0.0,0.99,0.0,0.35,3.0,-0.17,0.02,-3.36,-0.14,-0.34,-0.14,-0.01,0.06,-0.12
6495,RICK,"RCI Hospitality Holdings, Inc.",Consumer Cyclical,Restaurants,"$515,480,000.00",2529.0,"$251,183,000.00","$99,321.08",56.22,110.0,2.38,52.17,52.91,64.2,60.7,46.49,94.33,46.49,44972.0,9.49,109.04,0.41,0.03,0.05,58.94,0.0,0.6,0.23,0.15,0.0,0.04,14.04,9.71,0.35,2.16,2.28,13.53,17.07,3.9,1.91,0.01,1.7,1.6,0.73,0.78,7.68,3.64,0.05,0.09,0.18,0.13,0.08,0.44,0.08,0.0,0.46,-0.01,6.09,0.15,0.25,0.4,0.7,-0.23,-0.18,-0.3,-0.03,0.09,-0.13
2251,EBIX,"Ebix, Inc.",Technology,Software - Application,"$674,610,000.00",10030.0,"$995,597,000.00","$99,261.91",22.95,99.5,1.25,19.19,23.1,29.48,33.42,12.5,44.42,12.5,181370.0,30.71,545.88,0.33,0.06,0.06,61.02,1.5,0.3,0.12,0.07,0.01,0.14,10.34,6.74,1.03,0.68,1.03,7.32,16.58,2.14,2.65,0.01,0.4,0.4,0.0,0.98,23.25,10.35,0.24,0.04,0.1,0.08,0.27,-0.01,0.25,0.0,0.66,-0.01,-0.26,0.25,-0.05,0.1,-0.11,-0.26,-0.25,-0.27,0.06,0.21,-0.25


In [194]:
def technicals_minervini(data):            
    rec_02_tickers = list(data["ticker"])      
    start_date_101 = dt.date(int(str(day1)[:4]), int(str(day1)[5:7]), int(str(day1)[8:]))
    years_ago = str(start_date_101 - relativedelta(years=1, days=69))[:10]
                  
    exportList = pd.DataFrame(
        columns=[
            "ticker", 
            "rs_rating", 
            "returns_multiple", 
            "current_price", 
            "sma_50", 
            "sma_150", 
            "sma_200", 
            "sma_200_20", 
            "low_52_week", 
            "high_52_week"
        ]
    )    
    

    # Index Returns
    index_name = '^GSPC'
    if exists(saveRaw / "sp500_index.pkl"):
        index_df = pd.DataFrame(pd.read_pickle(saveRaw / "sp500_index.pkl"))
        index_df["pct_change"] = index_df["Adj Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]
    elif not exists(saveRaw / "sp500_index.pkl"):
        index_df = pd.DataFrame(yf.download(index_name, start=years_ago, end=day1))
        index_df.to_pickle(saveRaw / "larry_sp500_index.pkl")
        index_df["pct_change"] = index_df["Adj Close"].pct_change()
        index_return = (index_df["pct_change"] + 1).cumprod()[-1]


    def source_hist(ticker_list):
        bad_list = []
        for ticker in ticker_list:
            if exists(saveRaw / f"{ticker}.pkl"):
                pass
            else:
                bad_list.append(ticker)
        return bad_list    
    
    def import_history(port_tics1):                               
        tickers = Ticker(port_tics1, asynchronous=True)
        df3 = pd.DataFrame(tickers.history(start=years_ago, end=day1))
        for s in port_tics1:
            try:
                df = pd.DataFrame(df3.T[s].T[['adjclose', 'high', 'low']][1:])
                df.index = pd.to_datetime(df.index)
                df.to_pickle(saveRaw / f"larry_{s}.pkl")
            except:
                print(f"failed ticker {s}")
        return


    # Find top 50% performing stocks (relative to the S&P 500)
    bad_list = source_hist(rec_02_tickers)  
    if bad_list:
        import_history(bad_list)        

    returns_multiples = []
    for ticker in rec_02_tickers:
        # Calculating returns relative to the market (returns multiple)      
        try:
            df = pd.DataFrame(pd.read_pickle(saveRaw / f"{ticker}.pkl"))
            df["pct_change"] = df["adjclose"].pct_change()
            stock_return = (df["pct_change"] + 1).cumprod()[-1]
            returns_multiple = round((stock_return / index_return), 2)
            returns_multiples.extend([returns_multiple])
        except Exception:
            print(f"Bad Ticker: {ticker}")
         
    # Creating dataframe of only top 70%
    rs_df = pd.DataFrame(list(zip(rec_02_tickers, returns_multiples)),columns=["ticker", "returns_multiple"],)
    rs_df["rs_rating"] = rs_df["returns_multiple"].rank(pct=True) * 100
    rs_df = rs_df[rs_df["rs_rating"] >= rs_df["rs_rating"].quantile(0.3)]
    
    
    # Checking Minervini conditions of top 60% of stocks in given list
    rs_stocks = list(rs_df["ticker"])
    for stock in rs_stocks:
        try:     
            df = pd.DataFrame(pd.read_pickle(saveRaw / f"{stock}.pkl"))
            sma = [50, 150, 200]
            for x in sma:
                df["SMA_" + str(x)] = round(df["adjclose"].rolling(window=x).mean(), 2)
            # Storing required values
            currentClose = df["adjclose"].iloc[-1]
            MA_50 = df["SMA_50"].iloc[-1]
            MA_150 = df["SMA_150"].iloc[-1]
            MA_200 = df["SMA_200"].iloc[-1]
            low_52_week = round(min(df["low"][-260:]), 2)
            high_52_week = round(max(df["high"][-260:]), 2)
            RS_Rating = round(rs_df[rs_df["ticker"] == stock].rs_rating.tolist()[0], 2)
            Returns_multiple = round(
                rs_df[rs_df["ticker"] == stock].returns_multiple.tolist()[0], 2)
            try:
                MA_200_20 = df["SMA_200"][-20]
            except Exception:
                MA_200_20 = 0
                        
        # Condition 1: Current Price > 150 SMA and > 200 SMA
            condition_1 = currentClose > MA_150 > MA_200
        # Condition 2: 150 SMA and > 200 SMA
            condition_2 = MA_150 > MA_200
        # Condition 3: 200 SMA trending up for at least 1 month
            condition_3 = MA_200 > MA_200_20
        # Condition 4: 50 SMA> 150 SMA and 50 SMA> 200 SMA
            condition_4 = MA_50 > MA_150 > MA_200
        # Condition 5: Current Price > 50 SMA
            condition_5 = currentClose > MA_50
        # Condition 6: Current Price is at least 30% above 52 week low
            condition_6 = currentClose >= (1.30 * low_52_week)
        # Condition 7: Current Price is within 25% of 52 week high
            condition_7 = currentClose >= (0.75 * high_52_week)            
            
        # If all conditions above are true, add Ticker to exportList
            if (
                condition_1 &
                condition_2 &
                condition_3 &
                condition_4 &
                condition_5 &
                condition_6 &
                condition_7
            ):
                exportList = exportList.append(
                    {
                        "ticker": stock,
                        "rs_rating": RS_Rating,
                        "returns_multiple": Returns_multiple,
                        "current_price": currentClose,
                        "sma_50": MA_50,
                        "sma_150": MA_150,
                        "sma_200": MA_200,
                        "sma_200_20": MA_200_20,
                        "low_52_week": low_52_week,
                        "high_52_week": high_52_week,
                    },
                    ignore_index=True,
                ).sort_values(by="rs_rating", ascending=False)             
        except Exception:
            print(f"Bad Ticker: {stock}")

    exportList_A = exportList.drop_duplicates(subset="ticker")
    exportList_B = exportList_A[exportList_A.rs_rating >= 60.0]
    part_a_len = len(exportList_A['ticker'])
    part_b_len = len(exportList_B['ticker'])        
    
    print("\n[2] MINERVINI ")
    print(f"   > PART-A:")
    print(f"     * Successful Stocks: [{part_a_len}]")
    print(f"   > PART-B:")
    print(f"     * Successful Stock WHERE (rs_rating > 60.0): [{part_b_len}] \n")
    
    exportList_B.to_pickle(saveRec / "larry_recommender_02_return_dataFrame.pkl")
    return exportList_B.round(2)    

In [195]:
data_1 = pd.read_pickle(saveRec / "larry_recommender_01_return_dataFrame.pkl")

data_2 = technicals_minervini(data_1)
# data_2 = pd.read_pickle((saveRec / "larry_recommender_02_return_dataFrame.pkl"))

data_2

[*********************100%***********************]  1 of 1 completed
Bad Ticker: AAC
Bad Ticker: AAT
Bad Ticker: AAWW
Bad Ticker: ABST
Bad Ticker: ABTX
Bad Ticker: AC
Bad Ticker: ACA
Bad Ticker: ACAD
Bad Ticker: ACCD
Bad Ticker: ACCO
Bad Ticker: ACEL
Bad Ticker: ACET
Bad Ticker: ACHR
Bad Ticker: ACLS
Bad Ticker: ACLX
Bad Ticker: ACMR
Bad Ticker: ACQR
Bad Ticker: ACRE
Bad Ticker: ACRS
Bad Ticker: ACVA
Bad Ticker: ADCT
Bad Ticker: ADPT
Bad Ticker: ADTN
Bad Ticker: ADUS
Bad Ticker: ADV
Bad Ticker: ADX
Bad Ticker: AEO
Bad Ticker: AEVA
Bad Ticker: AFTR
Bad Ticker: AFYA
Bad Ticker: AG
Bad Ticker: AGCB
Bad Ticker: AGEN
Bad Ticker: AGIO
Bad Ticker: AGM
Bad Ticker: AGRO
Bad Ticker: AGX
Bad Ticker: AGYS
Bad Ticker: AHH
Bad Ticker: AI
Bad Ticker: AINV
Bad Ticker: AIO
Bad Ticker: AIR
Bad Ticker: AIV
Bad Ticker: AKO-A
Bad Ticker: AKO-B
Bad Ticker: AKR
Bad Ticker: AKYA
Bad Ticker: ALCC
Bad Ticker: ALEC
Bad Ticker: ALEX
Bad Ticker: ALG
Bad Ticker: ALGT
Bad Ticker: ALKT
Bad Ticker: ALLG
Bad Ticker: AL

Unnamed: 0,ticker,rs_rating,returns_multiple,current_price,sma_50,sma_150,sma_200,sma_200_20,low_52_week,high_52_week


In [None]:
def mini_news(stocks):
    print(f"\nTotal Input Stocks: {len(stocks)} \n")
    c = 0.0
    for stock in stocks:
        pkl0 = (single_news / f"larry_df_single_news_{stock}.pkl")
        col0 = ["date", "title", "link", "source"]
        msg0 = '[[ COMPLETE ]] - DATA SOURCED AND SAVED -[SUCCESSFUL]'
        c += 1
        if exists(pkl0):
            print(f"[{c}] - {stock} - COMPLETE - {stock} DATA ON FILE")
        else:
            try: 
                data_news = get_news(stock)
                df_news = pd.DataFrame((data_news), columns=col0)
                df_news = df_news.loc[:30]
                df_news = df_news[df_news['date'] > '2022-05']
                df_news = df_news[df_news['date'] <= day1]
                df_news.to_pickle(pkl0)
                print(f"[{c}] - {stock} - {msg0}")
            except:
                print(f"BAD TICKER {stock} 5")
                stocks.remove(stock)
    return stocks


def process_news(stocks):
    final_stocks = []
    c = 0.0
    for stock in stocks:
        c += 1
        print(f"\n[ {int(c)} / {int(len(stocks))} ] - {stock}")
        pkl1 = (single_news / f"larry_df_single_news_{stock}.pkl")
        pkl2 = (single_news / f"larry_df_single_news_full{stock}.pkl")
        if exists(pkl2):
            final_stocks.append(stock)
            print(f"[X] - DONE - {stock}")
        else:
            try:
                df = pd.DataFrame(pd.read_pickle(pkl1))
                list =[]                                                          # creating an empty list
                for i in df.index:
                    dict = {}                                                     # create empty dictionary to add articles
                    article = Article(df['link'][i], config=config)               # providing the link
                    try:
                        article.download()                                        # downloading the article 
                        article.parse()                                           # parsing the article
                        article.nlp()                                             # performing natural language processing
                    except:                                                       # exception handling
                        print('error stock download')
                    dict['date']=df['date'][i]                                    # storing results in dictionary from above
                    dict['source']=df['source'][i] 
                    dict['title']=article.title
                    dict['article']=article.text
                    dict['summary']=article.summary
                    dict['key_words']=article.keywords
                    dict['link']=df['link'][i]
                    list.append(dict)
                check_empty = not any(list)
                if check_empty == False:
                    try:
                        news_df=pd.DataFrame(list)                                # creating dataframe
                        p1 = (pkl2)
                        news_df.to_pickle(p1)
                        final_stocks.append(stock)
                        print(f"[X] - DONE - {stock}")                            # exception handling
                    except Exception:
                        print('error save')
            except Exception as e:                                                # exception handling
                print("Exception:" + str(e))
    return final_stocks


def sentiment_analysis(newS, stocks):
    for stock in stocks:               
        (
            dates, 
            sources, 
            titles, 
            articles, 
            summarys, 
            key_words, 
            links
        ) = (
            newS['date'], 
            newS['source'], 
            newS['title'], 
            newS['article'], 
            newS['summary'], 
            newS['key_words'], 
            newS['link']
        )
        parsed_news=[]
        for r in range(len(newS)):
            parsed_news.append(
                [
                    
                    stock, 
                    dates[r], 
                    sources[r], 
                    titles[r], 
                    articles[r], 
                    summarys[r], 
                    key_words[r], 
                    links[r]
                ]
            )
        columns = [
            "ticker", 
            "date", 
            'source', 
            "title", 
            'article', 
            'summary', 
            'key_words', 
            "link"
        ]
    # Sentiment Analysis
        analyzer = SentimentIntensityAnalyzer()
        news = pd.DataFrame(parsed_news, columns=columns).dropna()       
        scores = news["summary"].apply(analyzer.polarity_scores).tolist()        
        df_scores = pd.DataFrame(scores)
        news = news.join(df_scores, rsuffix="_right")     
    # View Data
        news["date"] = pd.to_datetime(news['date']).dt.date
        unique_ticker = news["ticker"].unique().tolist()
        news_dict = {
            name: news.loc[news["ticker"] == name] for name in unique_ticker
            }
        values = []
    for stock in stocks:
        dataframe = news_dict[stock]
        dataframe = dataframe.set_index("ticker")
        mean = round(dataframe["compound"].mean() * 100, 0)
        values.append(mean)
    df = pd.DataFrame(stocks, columns=["ticker"])
    df["sentiment_score"] = values
    return df


def run_sentiment(stocks):
    df = pd.DataFrame()
    symbols = []
    sentiments = []
    bad_stocks = []
    for stock in stocks:
        pkl3 = (single_news / f"larry_df_single_news_full{stock}.pkl")
        pkl4 = (sentiment / f"larry_{stock}_sentiment.pkl")        
        try:           
            newS = pd.read_pickle(pkl3)
            fd = sentiment_analysis(newS, [stock])
            symbols.append(fd["ticker"].loc[0])
            sentiments.append(fd["sentiment_score"].loc[0])
            fd.to_pickle(pkl4)
        except Exception:
            print(f"BAD TICKER {stock} 4")
            stocks.remove(stock)
            bad_stocks.append(stock)
    df["ticker"] = symbols
    df["sentiment_score"] = sentiments
    return df, bad_stocks


def run_rec_3(rec_03_tickers):             
    df_final, bad_stocks = run_sentiment(rec_03_tickers)
    df_final = df_final[df_final['sentiment_score'] >= 0.0]
    fin_len = len(df_final['ticker'])
    pkl4 = saveRec / "larry_recommender_03_return_dataFrame.pkl"
    df_final.to_pickle(pkl4)
    print(f"[3] Sentiment Analysis - Successful Securities = [{fin_len}]]")
    return df_final.sort_values('sentiment_score', ascending=False), bad_stocks

In [None]:
data_2 = pd.read_pickle(saveRec / "larry_recommender_02_return_dataFrame.pkl")
sentiment_ticker_list = list(data_2['ticker'])

sentiment_news_tickers = mini_news(sentiment_ticker_list)

In [None]:
sentiment_news_tickers = process_news(sentiment_ticker_list)

In [None]:
data_3, bad_stocks = run_rec_3(sentiment_news_tickers)
print(f"Bad Stocks: {len(bad_stocks)}")
data_3 = pd.DataFrame(data_3).sort_values('sentiment_score', ascending=False)
data_3

In [None]:
def run_rec_4(rec_final_01, rec_final_02, rec_final_03):            


    def fix_rec_01(rec_final_01):
        rec_final_01 = pd.DataFrame(rec_final_01)
        rec_final_01.columns = [x.lower() for x in rec_final_01.columns]
        rec_final_01 = rec_final_01.rename(columns={"analyst_recom": "ar"})
        ar = [
            3.0, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3, 2.2, 2.1, 
            2.0, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.3, 1.2, 1.1, 1.0
            ]
        analyst_recom = list(np.arange(50.0, 101.0, 2.5).round())
        d1 = dict(zip(ar, analyst_recom))
        adj__analyst_lst = []
        for i in rec_final_01["ar"]:
            for key, val in d1.items():
                if i == key:
                    adj__analyst_lst.append(val)
        del rec_final_01["ar"]
        rec_final_01["analyst_recom"] = adj__analyst_lst            
        return rec_final_01


    def fix_rec_02(rec_final_02):
        rec_final_02 = pd.DataFrame(rec_final_02)
        rec_final_02.columns = [x.lower() for x in rec_final_02.columns]
        del rec_final_02['sma_50']
        del rec_final_02['sma_200']
        del rec_final_02['low_52_week']
        del rec_final_02['high_52_week']
        rec_final_02 = rec_final_02.round(2)
        return rec_final_02    
    

    def fix_rec_03(rec_final_03):
        rec_final_03 = pd.DataFrame(rec_final_03)
        rec_final_03.columns = [x.lower() for x in rec_final_03.columns]
        return rec_final_03   
    

    def merge_dataframes(rec_01, rec_02, rec_03):
        rec_01 = pd.DataFrame(rec_01[rec_01["ticker"].isin(list(rec_03["ticker"]))])
        rec_02 = pd.DataFrame(rec_02[rec_02["ticker"].isin(list(rec_03["ticker"]))])
        a = pd.DataFrame(rec_01.merge(rec_02, how="inner", on="ticker"))
        b = a.merge(rec_03, how="inner", on="ticker")   
        final_df = pd.DataFrame(b.copy())
        return final_df
    

    def create_new_cols(df):
        df["my_score"] = (
            ((df["analyst_recom"]) + (df["rs_rating"]) + (df["sentiment_score"])) / 3
            )
        return df   


    def finish_stage_4(final_len):
        final_len = len(final_df['ticker'])
        final_df.to_pickle(saveRec / "larry_recommender_04_return_dataFrame.pkl")
        print(f'[4] Recommender Stage #04 - [Total Passed == {final_len}]')        
        
    
    rec_final_01 = fix_rec_01(rec_final_01)
    rec_final_02 = fix_rec_02(rec_final_02)
    rec_final_03 = fix_rec_03(rec_final_03)
    final_df0 = merge_dataframes(rec_final_01, rec_final_02, rec_final_03)
    final_df = create_new_cols(final_df0)
    finish_stage_4(final_df)    
    return final_df

In [None]:
df_4 = pd.DataFrame(run_rec_4(data_1, data_2, data_3))
print(df_4.shape)
df_4.head(3)

In [None]:
def run_rec_5(data):


    def re_order_cols(fd):          
        fd = pd.DataFrame(fd)
        col_1 = fd.pop('company')
        col_2 = fd.pop('ticker')        
        col_3 = fd.pop('my_score')
        col_4 = fd.pop('sentiment_score')
        col_5 = fd.pop('rs_rating')
        col_7 = fd.pop('analyst_recom')                    
        col_8 = fd.pop('returns_multiple')
        col_9 = fd.pop('price')
        col_10 = fd.pop('target_price')
        fd.insert(0, 'target_price', col_10)   
        fd.insert(0, 'price', col_9)   
        fd.insert(0, 'returns_multiple', col_8)           
        fd.insert(0, 'analyst_recom', col_7)                
        fd.insert(0, 'rs_rating', col_5)           
        fd.insert(0, 'sentiment_score', col_4)     
        fd.insert(0, 'my_score', col_3)  
        fd.insert(0, 'ticker', col_2)
        fd.insert(0, 'company', col_1)
        return fd
    

    def trim_1(fd):        
        fd = pd.DataFrame(fd)
        q_1 = fd['returns_multiple'].quantile(0.1)
        q_2 = fd['returns_multiple'].quantile(0.2)
        q_3 = fd['returns_multiple'].quantile(0.3)
        q_4 = fd['returns_multiple'].quantile(0.4)
        q_5 = fd['returns_multiple'].quantile(0.5)
        q_6 = fd['returns_multiple'].quantile(0.6)
        q_7 = fd['returns_multiple'].quantile(0.7)
        q_8 = fd['returns_multiple'].quantile(0.8)
        q_9 = fd['returns_multiple'].quantile(0.9)        
        for r in fd['returns_multiple']:
            if r <= q_1:
                fd['my_score'] * 1.01
            if r > q_1 and r <= q_2:
                fd['my_score'] * 1.02
            if r > q_2 and r <= q_3:
                fd['my_score'] * 1.03
            if r > q_3 and r <= q_4:
                fd['my_score'] * 1.04
            if r > q_4 and r <= q_5:
                fd['my_score'] * 1.05
            if r > q_5 and r <= q_6:
                fd['my_score'] * 1.06
            if r > q_6 and r <= q_7:
                fd['my_score'] * 1.07
            if r > q_7 and r <= q_8:
                fd['my_score'] * 1.08
            if r > q_8 and r <= q_9:
                fd['my_score'] * 1.09
            if r > q_9:
                fd['my_score'] * 1.10
        fd_len = len(fd['ticker'])
        print(f'[5] Recommender Stage #05 - [Total Passed == {fd_len}]')                
        return fd
    
    
    def trim_2(fd):
        std1 = fd['relative_strength_index_14'].std()
        fd = pd.DataFrame(fd[fd['relative_strength_index_14'] <= (70.0 - (std1 * 1.0))])
        # fd = pd.DataFrame(fd[fd['relative_strength_index_14'] <= 60.0])
        # fd = pd.DataFrame(fd[fd['relative_strength_index_14'] >= 30.0])
        fd_len = len(fd['ticker'])
        print(f'[5] Stage #05 - Total Passed [[ FINAL ]] = [{fd_len}]]')        
        return fd.sort_values('my_score', ascending=False).round(2)
    

    fd = pd.DataFrame(data).reset_index()
    fd = re_order_cols(fd)                         
    fd = trim_1(fd)           
    # fd = trim_2(fd)
    return fd

In [None]:
df_5 = pd.DataFrame(run_rec_5(df_4)).fillna(0.0)
print(df_5.shape)
df_5.head()

In [None]:
df_5.to_pickle(saveRec / "recommender_05_return_dataFrame.pkl")

In [None]:
df_5.head(30).round(2).sort_values('my_score', ascending=False)