In [None]:
# Install dependencies (uncomment the following lines by removing ''' if you haven't installed the dependencies yet)
'''
%pip install finvizfinance
%pip install pandas
%pip install transformers
%pip install yfinance
%pip install goose3
%pip install requests
%pip install ipywidgets

%pip install torch
%pip install tensorflow
%pip install nltk
import nltk
nltk.download('punkt')
'''


In [None]:
# Import libraries
from finvizfinance.screener.overview import Overview # type: ignore
from finvizfinance.quote import finvizfinance        # type: ignore
from IPython.display import display                  # type: ignore

import pandas as pd                 # type: ignore
from transformers import pipeline   # type: ignore
import yfinance as yf               # type: ignore
from goose3 import Goose            # type: ignore
from requests import get            # type: ignore

from nltk.tokenize import sent_tokenize # type: ignore
from transformers import AutoTokenizer  # type: ignore
from bs4 import BeautifulSoup

import csv
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Other general settings
pd.set_option('display.max_colwidth', None) # Display full text in pandas dataframe / no line wrapping


In [None]:
# Create filters dictionary, i.e filter the stocks based on the following criteria:
FILTERS_DICT = {
    'Performance': 'Today +10%',     # Day increase 10%
    'Relative Volume': 'Over 5',       # High Relative Volume
    'Price': 'Under $20',              # Price under 20 USD
    'Float': 'Under 10M'               # Float under 10 million
}

# Alternative filtering to consider:
'''
FILTERS_DICT = {'Debt/Equity':'Under 1',                 # Positive Operating Margin
                'PEG':'Low (<1)',                        # Debt-to-Equity ratio under 1
                'Operating Margin':'Positive (>0%)',     # Low P/B (under 1)
                'P/B':'Low (<1)',                        # Low P/E ratio (under 15)
                'P/E':'Low (<15)',                       # Low PEG ratio (under 1)
                'InsiderTransactions':'Positive (>0%)<'} # Positive Insider Transactions
'''


# The filters and general manual link for the finvizfinance library: https://finvizfinance.readthedocs.io/_/downloads/en/latest/pdf/ 
# Possible filters can be found by running the following code:

#from finvizfinance.screener.overview import Overview # type: ignore
#foverview = Overview()    # Create Overview object
#foverview.get_filters()   # Get list of all possible filters

# And after to see the possible options for a filter, run the following code:
#foverview.get_filter_options('Relative Volume') # Get list of all possible options for a filter, example on 'Relative Volume'


In [None]:
# Function to get the filtered stocks:
def get_filtered_stocks():
    """
    Returns a list of tickers with:

    """
    
    foverview = Overview()
    foverview.set_filter(filters_dict=FILTERS_DICT)
    df_overview = foverview.screener_view()
    if not os.path.exists('out'): #ensures you have an 'out' folder ready
        os.makedirs('out')
    df_overview.to_csv('out/Overview.csv', index=False)
    
    tickers = df_overview['Ticker'].to_list()
    display(df_overview)
    return tickers



undervalued = get_filtered_stocks()


In [None]:
# Function to get the sentiment of the news articles for a given ticker.
# This may run for a good few minutes, depending on the number of filtered tickers / articles. (seen approx 2-7 minutes total)


ALLOW_TOKENIZATION = False # True: the model will feed the article into the model in chunks of 512 tokens, 
#                            False: the model will consider only the first sentences of the article until the total number of tokens does not exceed 512

def get_ticker_news_sentiment(ticker):
    """
    Returns a Pandas dataframe of the given ticker's most recent news article headlines,
    with the overal sentiment of each article.

    Args:
        ticker (string)

    Returns:
        pd.DataFrame: {'Date', 'Article title', Article sentiment'}
    """
    
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

    ticker_news = yf.Ticker(ticker)
    news_list = ticker_news.get_news()
    extractor = Goose()
    pipe = pipeline("text-classification", model="ProsusAI/finbert")
    data = []
    
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
                
    for dic in news_list:
        title = dic['title']
        response = get(dic['link'], headers=headers)
        article = extractor.extract(raw_html=response.content)
        text = article.cleaned_text
        date = article.publish_date
        
        if date == None: # If the date is not found in the article, try to find it in the article's html
            soup = BeautifulSoup(response.text, 'html.parser')

            # Yahoo Finance usually stores the publication date in a 'time' tag with the class 'caas-attr-meta-time'
            date_tag = soup.find('time', {'class': 'caas-attr-meta-time'})
            if date_tag:
                date = date_tag['datetime']
            else:
                print('Publication date not found, article link for debugging', dic['link'])
            
            
        if len(text) > 512:
            if ALLOW_TOKENIZATION: # feed the article into the model in chunks of 512 tokens
                inputs = tokenizer.encode_plus(
                    text,
                    max_length=510,
                    truncation='longest_first',  # Truncate the longest sequences first
                    padding='max_length',  # Pad sequences to the max length
                    return_tensors='pt',  # Return PyTorch tensors
                )
                
                # Convert tensor to list and then to string
                input_ids = inputs["input_ids"].tolist()[0]
                new_text = tokenizer.decode(input_ids)
            
            else: # count the sentences until the total number of tokens does not exceed 512 (consider only first sentences of the article)
                # Split the text into sentences
                sentences = sent_tokenize(text)

                # Initialize an empty string for the new text
                new_text = ''

                # Add sentences to the new text until it exceeds 512 tokens
                for sentence in sentences:
                    if len(new_text.split()) + len(sentence) > 512:
                        new_text += ' ' + sentence
                    break

            # Now you can pass 'inputs' to your model
            results = pipe(new_text)

            data.append({'Ticker':f'{ticker}',
                         'Date':f'{date}',
                         'Article title':f'{title}',
                         'Article sentiment':results[0]['label']})

        else:
            results = pipe(text)
            data.append({'Ticker':f'{ticker}',
                         'Date':f'{date}',
                         'Article title':f'{title}',
                         'Article sentiment':results[0]['label']})
    df = pd.DataFrame(data)
    return df

def generate_csv(ticker):
    get_ticker_news_sentiment(ticker).to_csv(f'out/{ticker}.csv', index=False)



#undervalued = get_filtered_stocks()

sentiments = []
for ticker in undervalued:
    generate_csv(ticker)
    sentiments.append(get_ticker_news_sentiment(ticker))
    print(f'{ticker} done, {len(undervalued) - undervalued.index(ticker) - 1} stock tickers left')	



In [None]:
# Print the news:
for i in range(len(sentiments)):
    display(sentiments[i])  # This will print the first element of each inner list


In [None]:
# Random junk code:
quote = finvizfinance('SGE')

df = quote.ticker_inside_trader()
from datetime import datetime
# Get today's date
today = datetime.today().date()

# Get the news and filter it to get only today's news
df = quote.ticker_news()
df = df[df['Date'].dt.date == today]
df


df = quote.ticker_fundament()
df

In [None]:
timeframe