In [1]:
# import sys
# !{sys.executable} -m pip install word2number
# !{sys.executable} -m pip install yfinance

In [2]:
# List of articles we have processed
ARTICLES_FILENAME = 'articles.json'

# Cache stock prices - to making multiple calls to yfinance
# during testing
price_cache = {}

# Map for mapping Uncertainity Rating to a number
uncertainity_map = {'Very Low':1,'Low':2,'Medium':3,'High':4,'Very High':5}

In [3]:
import re
from word2number import w2n

# pattern = r"\w+\s+\w+"
# The patern to search
pattern = r"★+"

def convert_to_stars(text:str) -> str:
    match = re.search(pattern, text)
    # Already has '*'?
    if match:
        # No need to convert, return the same text
        return text
    # No match, convert to a number first
    try:
        count = w2n.word_to_num(text)
        # Return count X ★
        return count * '★'
    except ValueError:
        return 'error'

In [4]:
# For DataFrame
import pandas as pd
# Listing file patterns
import glob
# To acces os level functions
import os
# Reading json files
import json
# To get the current stock price
import yfinance as yf

def get_current_price(symbol:str) -> float:
    if symbol not in price_cache:
        stock = yf.Ticker(symbol)
        price_cache[symbol] = stock.info.get('currentPrice')
    # else:
    #   print(f'found {symbol} in cache')
    return price_cache[symbol]

def merge_results() -> pd.DataFrame:
    # Loads if the file already exists
    if not os.path.isfile(ARTICLES_FILENAME):
        # Return an empty DF if the file doesn't exist
        return pd.DataFrame()
        
    with open(ARTICLES_FILENAME, 'r') as file:
        json_data = json.load(file)
    article_list = json_data['articles']

    # List of items to create a DF
    rows = []
    for article in article_list:#.articles:
        full_pattern = os.path.join('llm', 'out', f"{article['key']}*.json")
        matching_files = glob.glob(full_pattern)
        for file in matching_files:
            with open(file, 'r') as f:
                json_data = json.load(f)
            stock_list = json_data['stocks']
            for stock in stock_list:
                row = stock.copy()
                row['price'] = get_current_price(symbol=f"{row['symbol']}.AX")
                row['discount'] = ((row['fair_value'] - row['price'])/row['fair_value'])*100
                # Need to convert to str as some outputs only contain int for the rating
                row['star_rating'] = convert_to_stars(text=str(stock['star_rating']))
                row['url'] = (article['url'], article['title'][:25])
                row['pub_date'] = pd.to_datetime(article['pub_date'])
                rows.append(row)

    df = pd.DataFrame(rows)
    # Sort DF on symbol, publish date - in that order
    df = df.sort_values(by=['symbol', 'pub_date'])
    # We only want the latest, drop old entries
    df = df.drop_duplicates(subset=['symbol'], keep='last') 
    
    # Reset the index
    df = df.reset_index(drop=True)
    # Summary is too long to display in the DF; only use the first sentence
    df['summary'] = df['summary'].apply(lambda x: re.split(r'[.!?]\s+', x)[0])
    return df

In [5]:
# df = merge_results()
# df

In [6]:
# Table title to include the current date
from datetime import date

def get_tooltips(data:pd.DataFrame) -> pd.DataFrame:
    # Tool tips data; empty
    tt_data = [['' for x in range(len(data.columns))] for y in range(len(data))]
    # Set up the columns
    tt_df = pd.DataFrame(tt_data, columns=data.columns)
    tt_df['Name'] = df['summary']
    # Drop the summary columns from both tt and sd
    tt_df.drop('summary', axis=1, inplace=True)
    return tt_df
    
def tooltips(styler, data:pd.DataFrame):
    styler.set_tooltips(data, css_class='tt-add',
                        props=[
                            ('visibility', 'hidden'),
                            ('position', 'absolute'),
                            ('background-color', 'lightblue'),
                            ('color', 'black'),
                            ('z-index', 1),
                            ('padding', '3px 3px'),
                            ('margin', '2px')
                        ])
    return styler

def style_stars(stars):
    match stars.count('★'):
        case 1:
            star_style = 'color: darkgreen;'
        case 2:
            star_style = 'color: green;'
        case 3:
            star_style = 'color: forestgreen;'
        case 4:
            star_style = 'color: limegreen;'
        case 5:
            star_style = 'color: lime;'
        case _:
            star_style = 'color: firebrick;'
    return star_style    

# Merge all the results
df = merge_results()

# Get tool tips
ttips = get_tooltips(data=df)
# TT done; drop summary column
df.drop('summary', axis=1, inplace=True)

# Define the new order of columns
new_order = ['name','symbol','fair_value','price','discount','industry','uncertainity_rating',
             'star_rating','pub_date','url']

# Reassign the DataFrame with the new column order
df = df[new_order]
# Rename columns
df.columns = ['Name','Symbol','Fair Value','Price','Discount','Industry',
              'Uncertainity Rating','Star Rating','Date','Reference']

# Table title
table_caption = f'Morningstar ASX Watchlist - {date.today()}'

# Maps uncertainity rating to a number as a series for background gradient
uncertainity_rating_ser = df['Uncertainity Rating'].apply(lambda x: uncertainity_map[x])

df.style.pipe(tooltips, data=ttips).\
    hide(axis='index').\
    set_caption(table_caption).\
    set_properties(**{'border': '0.1px solid black'}).\
    set_properties(subset=['Name', 'Symbol', 'Industry', 'Uncertainity Rating', 'Star Rating',
                           'Date', 'Reference'], **{'text-align': 'left'}).\
    set_table_styles([
        {'selector': 'th.col_heading', 'props': 'text-align: center'},
        {'selector': 'caption', 'props': [('text-align', 'center'), ('font-size', '12pt')]}]).\
    background_gradient(subset=['Discount'], cmap='Oranges').\
    background_gradient(subset=['Uncertainity Rating'], gmap=uncertainity_rating_ser, cmap='Greens').\
    map(style_stars, subset=['Star Rating']).\
    format({'Date': '{:%Y-%m-%d}','Fair Value': '${:.2f}','Price': '${:.2f}','Discount': '{:.2f}%',
            'Reference': lambda x: f'<a target="_blank" href="{x[0]}">{x[1]}...</a>'})

Name,Symbol,Fair Value,Price,Discount,Industry,Uncertainity Rating,Star Rating,Date,Reference
Audinate Group Ltd,AD8,$19.00,$6.38,66.42%,Technology,Very High,★★★★★,2025-03-27,Two controversial five-st...
AGL Energy (AGL),AGL,$12.00,$9.78,18.50%,Utilities,High,★★★★,2025-07-15,Our top ASX picks in ever...
Auckland International Airport Ltd (AIA),AIA,$8.60,$7.12,17.21%,Transportation/Airlines,Low,★★★★,2025-01-28,The ASX share to own fore...
ASX Limited,ASX,$77.00,$70.21,8.82%,Financial Services,Low,★★★★,2025-07-15,Our top ASX picks in ever...
Aurizon Holdings (AZJ),AZJ,$4.40,$3.26,25.91%,Industrials,Medium,★★★★,2025-07-15,Our top ASX picks in ever...
Beach Energy Ltd,BPT,$2.55,$1.34,47.25%,Oil and Gas,High,★★★★★,2025-06-26,3 cheap fully franked sto...
DigiCo Infrastructure REIT Stapled (1 Ord & 1 Unt),DGT,$3.40,$3.28,3.53%,Real Estate,High,★★★,2025-06-09,3 ASX players with exposu...
Domino’s Pizza Enterprises,DMP,$58.00,$18.74,67.69%,Consumer Cyclical,High,★★★★★,2025-07-15,Our top ASX picks in ever...
Dexus (DXS),DXS,$9.60,$6.95,27.60%,Real Estate,Medium,★★★★,2025-07-15,Our top ASX picks in ever...
Goodman Group,GMG,$28.00,$35.02,-25.07%,Real Estate,Medium,★★,2025-06-09,3 ASX players with exposu...
