In [1]:
# Download all libraries
!pip install simfin
import simfin as sf
from simfin.names import *
import pandas as pd
from IPython.display import display
import numpy as np



In [2]:
# Set API Key and Data Path
sf.set_api_key('bcafba62-9901-4dfd-847f-302ad1b0d4af') 
sf.set_data_dir('simfin_data')              

# Loading KO's Annual Financial Statements for the U.S. Market
income_raw  = sf.load_income(variant='annual', market='us')
balance_raw = sf.load_balance(variant='annual', market='us')
cashflow_raw= sf.load_cashflow(variant='annual', market='us')

income_ko  = income_raw.xs('KO', level=TICKER)
balance_ko = balance_raw.xs('KO', level=TICKER)
cashflow_ko= cashflow_raw.xs('KO', level=TICKER)

Dataset "us-income-annual" on disk (1 days old).
- Loading from disk ... Done!
Dataset "us-balance-annual" on disk (1 days old).
- Loading from disk ... Done!
Dataset "us-cashflow-annual" on disk (1 days old).
- Loading from disk ... Done!


  df = pd.read_csv(path, sep=';', header=0,
  df = pd.read_csv(path, sep=';', header=0,
  df = pd.read_csv(path, sep=';', header=0,


In [3]:
def tidy(df, years=5):  # Defines a function tidy that takes a DataFrame df and an optional years parameter
    df = df.copy()  # Creates a copy of the input DataFrame so the original is not modified
    df['Year'] = df.index.year  # Adds a new column 'Year' containing the year extracted from the datetime index
    df_y = df.groupby('Year').last()  # Groups rows by year and keeps the last row for each year
    df_y = df_y.tail(years)  # Keeps only the most recent 'years'years from the grouped DataFrame
    df_t = df_y.T  # Transposes the DataFrame so rows become line items and columns become years
    return df_t

income_t  = tidy(income_ko,  years=5)
balance_t = tidy(balance_ko, years=5)
cashflow_t= tidy(cashflow_ko,years=5)
# Applies 'tidy' to the statements to get the last 5 years format

In [4]:
def to_hundred_million_usd(df):  # Defines a function that converts all numeric values in DataFrame df into units of hundred million USD
    df_num = df.apply(pd.to_numeric, errors='coerce')  # Converts all entries in df to numeric values, turning non‑convertible entries into NaN
    return (df_num / 1e8).round(2)  # Divides all numeric values by 100,000,000 to get “hundred million USD” and rounds them to 2 decimal places

income_hm  = to_hundred_million_usd(income_t)
balance_hm = to_hundred_million_usd(balance_t)
cashflow_hm= to_hundred_million_usd(cashflow_t)
# Applies this conversion to the statements

print('KO last 5 years income statement (unit: hundred million USD)')
display(income_hm)

print('KO last 5 years balance sheet (unit: hundred million USD)')
display(balance_hm)

print('KO last 5 years cash flow statement (unit: hundred million USD)')
display(cashflow_hm)
# Print the tables

KO last 5 years income statement (unit: hundred million USD)


Year,2020,2021,2022,2023,2024
SimFinId,0.0,0.0,0.0,0.0,0.0
Currency,,,,,
Fiscal Year,0.0,0.0,0.0,0.0,0.0
Fiscal Period,,,,,
Publish Date,,,,,
Restated Date,,,,,
Shares (Basic),42.95,43.18,43.28,43.23,43.09
Shares (Diluted),43.23,43.44,43.5,43.39,43.2
Revenue,330.14,386.55,430.04,457.54,470.61
Cost of Revenue,-134.33,-153.57,-180.0,-185.2,-183.24


KO last 5 years balance sheet (unit: hundred million USD)


Year,2020,2021,2022,2023,2024
SimFinId,0.0,0.0,0.0,0.0,0.0
Currency,,,,,
Fiscal Year,0.0,0.0,0.0,0.0,0.0
Fiscal Period,,,,,
Publish Date,,,,,
Restated Date,,,,,
Shares (Basic),42.95,43.18,43.28,43.23,43.09
Shares (Diluted),43.23,43.44,43.5,43.39,43.2
"Cash, Cash Equivalents & Short Term Investments",85.66,126.25,116.31,136.63,145.71
Accounts & Notes Receivable,54.92,35.12,34.87,34.1,35.69


KO last 5 years cash flow statement (unit: hundred million USD)


Year,2020,2021,2022,2023,2024
SimFinId,0.0,0.0,0.0,0.0,0.0
Currency,,,,,
Fiscal Year,0.0,0.0,0.0,0.0,0.0
Fiscal Period,,,,,
Publish Date,,,,,
Restated Date,,,,,
Shares (Basic),42.95,43.18,43.28,43.23,43.09
Shares (Diluted),43.23,43.44,43.5,43.39,43.2
Net Income/Starting Line,77.68,98.04,95.71,107.03,106.49
Depreciation & Amortization,15.36,14.52,12.6,11.28,10.75


In [5]:
# To reduce KeyErrors, print the index first
print("Income rows:", income_hm.index.tolist())
print("Balance rows:", balance_hm.index.tolist())
print("Cashflow rows:", cashflow_hm.index.tolist())

# Naming Accounting Accounts
rev = income_hm.loc["Revenue"]                             # Revenue
gp  = income_hm.loc["Gross Profit"]                        # Gross profit
op  = income_hm.loc["Operating Income (Loss)"]             # Operating income
ni  = income_hm.loc["Net Income"]                          # Net income
ta  = balance_hm.loc["Total Assets"]                       # Total assets
te  = balance_hm.loc["Total Equity"]                       # Total equity
tl  = balance_hm.loc["Total Liabilities"]                  # Total liabilities
tca = balance_hm.loc["Total Current Assets"]               # Current assets
tcl = balance_hm.loc["Total Current Liabilities"]          # Current liabilities
inv = balance_hm.loc["Inventories"]                        # Inventories
int_exp = income_hm.loc["Interest Expense, Net"]           # Interest expense (negative value)
cor = income_hm.loc["Cost of Revenue"]                     # Cost of revenue

# ===== Profitability ratios =====
gross_margin      = gp / rev                              # Gross margin
operating_margin  = op / rev                              # Operating margin
net_margin        = ni / rev                              # Net margin
roa               = ni / ta                               # ROA (using ending total assets)
roe               = ni / te                               # ROE (using ending total equity)

# ===== Leverage ratios =====
debt_ratio        = tl / ta                               # Debt ratio (liabilities/assets)
equity_multiplier = ta / te                               # Equity multiplier
interest_coverage = op / abs(int_exp)                     # Interest coverage ratio

# ===== Efficiency ratios =====
asset_turnover    = rev / ta                              # Asset turnover
inventory_turnover= cor / inv                             # Inventory turnover (using cost of revenue)

# ===== Liquidity ratios =====
current_ratio     = tca / tcl                             # Current ratio
quick_ratio       = (tca - inv) / tcl                     # Quick ratio

# ===== Growth rates (YoY) =====
revenue_growth    = rev.pct_change()                      # Revenue growth rate
net_income_growth = ni.pct_change()                       # Net income growth rate

# ===== Aggregate into a DataFrame, rows = indicators, columns = years =====
ratios = pd.DataFrame({
    "Gross Margin": gross_margin,
    "Operating Margin": operating_margin,
    "Net Margin": net_margin,
    "ROA": roa,
    "ROE": roe,
    "Debt Ratio": debt_ratio,
    "Equity Multiplier": equity_multiplier,
    "Interest Coverage": interest_coverage,
    "Asset Turnover": asset_turnover,
    "Inventory Turnover": inventory_turnover,
    "Current Ratio": current_ratio,
    "Quick Ratio": quick_ratio,
    "Revenue Growth": revenue_growth,
    "Net Income Growth": net_income_growth
}).T.round(4)

display(ratios)

Income rows: ['SimFinId', 'Currency', 'Fiscal Year', 'Fiscal Period', 'Publish Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Cost of Revenue', 'Gross Profit', 'Operating Expenses', 'Selling, General & Administrative', 'Research & Development', 'Depreciation & Amortization', 'Operating Income (Loss)', 'Non-Operating Income (Loss)', 'Interest Expense, Net', 'Pretax Income (Loss), Adj.', 'Abnormal Gains (Losses)', 'Pretax Income (Loss)', 'Income Tax (Expense) Benefit, Net', 'Income (Loss) from Continuing Operations', 'Net Extraordinary Gains (Losses)', 'Net Income', 'Net Income (Common)']
Balance rows: ['SimFinId', 'Currency', 'Fiscal Year', 'Fiscal Period', 'Publish Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Cash, Cash Equivalents & Short Term Investments', 'Accounts & Notes Receivable', 'Inventories', 'Total Current Assets', 'Property, Plant & Equipment, Net', 'Long Term Investments & Receivables', 'Other Long Term Assets', 'Total Noncurre

Year,2020,2021,2022,2023,2024
Gross Margin,0.5931,0.6027,0.5814,0.5952,0.6106
Operating Margin,0.2725,0.2667,0.2537,0.2472,0.2123
Net Margin,0.2347,0.2528,0.2219,0.2342,0.2259
ROA,0.0887,0.1036,0.1029,0.1097,0.1057
ROE,0.364,0.393,0.3695,0.3899,0.4031
Debt Ratio,0.7562,0.7365,0.7216,0.7187,0.7377
Equity Multiplier,4.1015,3.7954,3.5918,3.5554,3.8127
Interest Coverage,8.4321,7.8032,25.194,18.2435,14.9581
Asset Turnover,0.3782,0.4097,0.4636,0.4683,0.468
Inventory Turnover,-4.113,-4.4982,-4.2523,-4.1863,-3.8756


In [6]:
# Retrieve the required accounting accounts from the existing three tables
rev      = income_hm.loc["Revenue"]                            # Revenue
ni       = income_hm.loc["Net Income"]                         # Net income

cfo      = cashflow_hm.loc["Net Cash from Operating Activities"]   # Net cash from operating activities (CFO)
capex_ln = cashflow_hm.loc["Change in Fixed Assets & Intangibles"] # Change in fixed assets & intangibles (usually negative, i.e. capex)
div_paid = cashflow_hm.loc["Dividends Paid"]                       # Dividends paid (negative cash outflow)

cash     = balance_hm.loc["Cash, Cash Equivalents & Short Term Investments"]  # Cash and cash equivalents
std      = balance_hm.loc["Short Term Debt"]                                   # Short-term debt
ltd      = balance_hm.loc["Long Term Debt"]                                    # Long-term debt
equity   = balance_hm.loc["Total Equity"]                                      # Total equity


# FCF ≈ CFO + “Change in Fixed Assets & Intangibles”
fcf = cfo + capex_ln

# Cash flow quality: operating cash flow / net income
cfo_ni = cfo / ni

# FCF ratios
fcf_to_rev    = fcf / rev          # FCF / Revenue
fcf_to_equity = fcf / equity       # FCF / Equity

# Dividend-related
dividend = -div_paid               # Convert negative dividend cash outflow to a positive amount

div_payout_ni  = dividend / ni     # Dividend payout ratio relative to net income
div_payout_cfo = dividend / cfo    # Dividend payout ratio relative to operating cash flow

# Net debt and net debt ratio
total_debt = std + ltd
net_debt   = total_debt - cash         # Net debt = interest-bearing debt − cash
net_debt_eq= net_debt / equity         # Net debt to equity ratio


# Merge into the original ratios table
extra_ratios = pd.DataFrame({
    "CFO / Net Income":         cfo_ni,
    "FCF":                      fcf,
    "FCF / Revenue":            fcf_to_rev,
    "FCF / Equity":             fcf_to_equity,
    "Dividend Payout (NI)":     div_payout_ni,
    "Dividend Payout (CFO)":    div_payout_cfo,
    "Net Debt":                 net_debt,
    "Net Debt / Equity":        net_debt_eq
}).T

all_ratios = pd.concat([ratios, extra_ratios], axis=0).round(4)

display(all_ratios)

Year,2020,2021,2022,2023,2024
Gross Margin,0.5931,0.6027,0.5814,0.5952,0.6106
Operating Margin,0.2725,0.2667,0.2537,0.2472,0.2123
Net Margin,0.2347,0.2528,0.2219,0.2342,0.2259
ROA,0.0887,0.1036,0.1029,0.1097,0.1057
ROE,0.364,0.393,0.3695,0.3899,0.4031
Debt Ratio,0.7562,0.7365,0.7216,0.7187,0.7377
Equity Multiplier,4.1015,3.7954,3.5918,3.5554,3.8127
Interest Coverage,8.4321,7.8032,25.194,18.2435,14.9581
Asset Turnover,0.3782,0.4097,0.4636,0.4683,0.468
Inventory Turnover,-4.113,-4.4982,-4.2523,-4.1863,-3.8756


In [7]:
base_year = fcf.index.max()  # Use the latest year in the FCF index as the base year
shares_diluted = income_hm.loc["Shares (Diluted)", base_year]  # Get diluted shares outstanding (in hundred millions) for the base year


# FCF Growth Rate = CAGR over the past 5 years
fcf_non_null = fcf.dropna()                   # Remove any years with missing FCF values
start_year = fcf_non_null.index.min()         # Get the earliest year with valid FCF
end_year   = fcf_non_null.index.max()         # Get the latest year with valid FCF
n_years    = end_year - start_year            # Calculate the number of years between start and end

fcf_start = fcf_non_null.loc[start_year]      # FCF value in the first year of the period
fcf_end   = fcf_non_null.loc[end_year]        # FCF value in the last year of the period


# CAGR = (Final value/Initial value)^(1/n) - 1
growth_rate_5y = (fcf_end / fcf_start) ** (1 / n_years) - 1   # Compute FCF CAGR over the period from start_year to end_year
print(f"FCF {start_year}-{end_year} CAGR: {growth_rate_5y:.4f}")  # Print the FCF CAGR with 4 decimal places

# Compare the average growth rates:
fcf_growth_hist = fcf_non_null.pct_change().dropna()          # Calculate year-over-year FCF growth rates and drop missing values
print("FCF average growth rates::", fcf_growth_hist.mean())   # Print the average of the yearly FCF growth rates



# WACC = E/V * Re + D/V * Rd * (1 - T)
# 1) Use the latest year (base_year) balance sheet to estimate D and E
equity = balance_hm.loc["Total Equity", base_year]                 # Equity (hundred million USD)
std    = balance_hm.loc["Short Term Debt", base_year]              # Short-term debt
ltd    = balance_hm.loc["Long Term Debt", base_year]               # Long-term debt
debt   = std + ltd                                                 # Interest-bearing debt D (hundred million USD)
V      = equity + debt                                             # Total capital (D + E)

E_weight = equity / V                                              # Equity weight in capital structure
D_weight = debt / V                                                # Debt weight in capital structure

# 2) Tax rate T: approximate as income tax expense / pretax income for the latest year
tax_exp   = income_hm.loc["Income Tax (Expense) Benefit, Net", base_year]  # Negative value means tax expense
pretax    = income_hm.loc["Pretax Income (Loss)", base_year]               # Pretax income
tax_rate  = -tax_exp / pretax if pretax != 0 else 0.21   # Fallback to 21% corporate tax rate if pretax is zero

# 3) Cost of debt Rd: interest expense / interest-bearing debt
int_exp = income_hm.loc["Interest Expense, Net", base_year]        # Typically a negative number
if debt > 0:
    Rd = -int_exp / debt                                           # Convert to positive and divide by total debt
else:
    Rd = 0.03  # If there is no debt, assume a small dummy rate (has little impact)

# 4) Cost of equity Re: using CAPM
risk_free_rate  = 0.04   # Current U.S. 10-year Treasury yield
market_premium  = 0.09   # Equity market risk premium (Aswath Damodaran “Country Default Spreads and Risk Premiums”)
beta            = 0.39   # 5-year monthly beta from Yahoo Finance

Re = risk_free_rate + beta * market_premium  # Cost of equity

# 5) WACC
WACC = E_weight * Re + D_weight * Rd * (1 - tax_rate)  # Weighted average cost of capital

print(f"\nWeights: E/V={E_weight:.3f}, D/V={D_weight:.3f}")
print(f"Tax rate T ≈ {tax_rate:.3f}")
print(f"Costs: Re={Re:.3f}, Rd={Rd:.3f}")
print(f"WACC = {WACC:.4f}")


# Use FCF CAGR + WACC to run a DCF valuation
discount_rate   = WACC                 # Discount rate = WACC
terminal_growth = 0.025                # Perpetual growth rate (can be adjusted based on judgement)
projection_years = 5                   # Number of years in the explicit forecast period

fcf_0 = fcf.loc[base_year]             # Base-year FCF (hundred million USD, at base_year)

years = np.arange(1, projection_years + 1)  # 1, 2, ..., projection_years

# 1) Forecast FCF for the next 5 years using CAGR
fcf_forecast = [fcf_0 * ((1 + growth_rate_5y) ** t) for t in years]  # Projected FCF each year

# 2) Discount the next 5 years of FCF back to present value
discount_factors = [(1 / ((1 + discount_rate) ** t)) for t in years]  # Discount factor for each year
pv_fcf = [fcf_forecast[t-1] * discount_factors[t-1] for t in years]   # Present value of each year's FCF

# 3) Terminal value
fcf_terminal   = fcf_forecast[-1] * (1 + terminal_growth)             # FCF in year 6 (first year of terminal period)
terminal_value = fcf_terminal / (discount_rate - terminal_growth)     # Gordon growth model terminal value
pv_terminal    = terminal_value * discount_factors[-1]                # Present value of terminal value

enterprise_value = sum(pv_fcf) + pv_terminal   # Using equity FCF, approximate this as equity value
equity_value     = enterprise_value            # Adjust here if you want to add/subtract net debt

intrinsic_value_per_share = equity_value / shares_diluted  # Intrinsic value per share (USD)

print("\n=== DCF results ===")
print(f"Present value of FCF over next 5 years (hundred million USD): {sum(pv_fcf):.2f}")
print(f"Present value of terminal value (hundred million USD): {pv_terminal:.2f}")
print(f"Estimated equity value (hundred million USD): {equity_value:.2f}")
print(f"Estimated intrinsic value per share (USD): {intrinsic_value_per_share:.2f}")

FCF 2020-2024 CAGR: -0.1428
FCF average growth rates:: -0.09057088983160608

Weights: E/V=0.372, D/V=0.628
Tax rate T ≈ 0.186
Costs: Re=0.075, Rd=0.015
WACC = 0.0356

=== DCF results ===
Present value of FCF over next 5 years (hundred million USD): 140.45
Present value of terminal value (hundred million USD): 1795.31
Estimated equity value (hundred million USD): 1935.76
Estimated intrinsic value per share (USD): 44.81


In [8]:
# Retrieve the required accounting accounts from the existing three tables
years = income_hm.columns  # 2020–2024

net_income = income_hm.loc["Net Income", years]      
revenue    = income_hm.loc["Revenue", years]         
shares     = income_hm.loc["Shares (Diluted)", years]  
total_debt = balance_hm.loc["Short Term Debt", years] + balance_hm.loc["Long Term Debt", years]
cash       = balance_hm.loc["Cash, Cash Equivalents & Short Term Investments", years]

# EBITDA
interest   = -income_hm.loc["Interest Expense, Net", years]                
tax        = -income_hm.loc["Income Tax (Expense) Benefit, Net", years]      
try:
    d_and_a = cashflow_hm.loc["Depreciation & Amortization", years]
except KeyError:
    d_and_a = 0  

ebitda     = net_income + interest + tax + d_and_a

# Stock prices are based on year-end closing prices.
price_dict = {
    2020: 54.84,
    2021: 59.21,
    2022: 63.61,
    2023: 58.93,
    2024: 62.26,
}
price = pd.Series(price_dict).reindex(years)

# Calculate the Multiple Indicator
market_cap = price * shares
ev = market_cap + total_debt - cash
eps = net_income / shares
pe  = price / eps
ps  = market_cap / revenue         
ev_ebitda = ev / ebitda             

multiples_hist = pd.DataFrame({
    "Price": price,
    "P/E": pe,
    "P/S": ps,
    "EV/EBITDA": ev_ebitda
})
print("Historical Multiple Indicator：")
display(multiples_hist.round(2))

# Take the average multiple over the past 5 years
pe_mean        = pe.mean()
ps_mean        = ps.mean()
ev_ebitda_mean = ev_ebitda.mean()

print(" 5-year average multiple：")
print(f"average P/E: {pe_mean:.2f}")
print(f"average P/S: {ps_mean:.2f}")
print(f"average EV/EBITDA: {ev_ebitda_mean:.2f}")

# Use 2024 income × average multiple to determine valuation
year_val = years.max() 

ni_2024   = net_income[year_val]
rev_2024  = revenue[year_val]
ebitda_24 = ebitda[year_val]
shares_24 = shares[year_val]
debt_24   = total_debt[year_val]
cash_24   = cash[year_val]

# PE valuation
mc_pe = ni_2024 * pe_mean

# PS valuation
mc_ps = rev_2024 * ps_mean

# EV/EBITDA valuation
ev_target = ebitda_24 * ev_ebitda_mean
net_debt_24 = debt_24 - cash_24
mc_ev = ev_target - net_debt_24

# Stock Prices Under Three Valuation Methods
price_pe = mc_pe / shares_24
price_ps = mc_ps / shares_24
price_ev = mc_ev / shares_24

summary = pd.DataFrame({
    "Method": ["P/E", "P/S", "EV/EBITDA"],
    "Implied Market Cap (hundred million USD)": [mc_pe, mc_ps, mc_ev],
    "Implied Price (USD/share)": [price_pe, price_ps, price_ev]
})
print(f"\nvaluation based on income of {year_val} & 5-year average multiple：")
display(summary.round(2))


Historical Multiple Indicator：


Unnamed: 0_level_0,Price,P/E,P/S,EV/EBITDA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,54.84,30.6,7.18,22.0
2021,59.21,26.32,6.65,18.95
2022,63.61,29.0,6.43,22.79
2023,58.93,23.87,5.59,19.31
2024,62.26,25.3,5.72,20.18


 5-year average multiple：
average P/E: 27.02
average P/S: 6.31
average EV/EBITDA: 20.65

valuation based on income of 2024 & 5-year average multiple：


Unnamed: 0,Method,Implied Market Cap (hundred million USD),Implied Price (USD/share)
0,P/E,2872.28,66.49
1,P/S,2971.72,68.79
2,EV/EBITDA,2758.41,63.85


In [None]:
!pip install --upgrade openai
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_API_KEY"]



In [54]:
# Construct 5-year key financial indicators as financials_5y
years = income_hm.columns

financials = pd.DataFrame({
    "Year": years,
    "Revenue": income_hm.loc["Revenue", years].values,
    "Net Income": income_hm.loc["Net Income", years].values,
    "FCF": fcf[years].values,
    "Total Assets": balance_hm.loc["Total Assets", years].values,
    "Total Equity": balance_hm.loc["Total Equity", years].values,
    "Total Debt": (
        balance_hm.loc["Short Term Debt", years] +
        balance_hm.loc["Long Term Debt", years]
    ).values,
}).set_index("Year")

# Construct ratios
ratios_5y_df = all_ratios

# Combine DCF and Multiple Indicator into a valuation dictionary
dcf_block = {
    "base_year": int(base_year),
    "fcf_cagr_5y": float(growth_rate_5y),
    "discount_rate_wacc": float(WACC),
    "terminal_growth": float(terminal_growth),
    "projection_years": int(projection_years),
    "equity_value_hundred_million_usd": float(enterprise_value),
    "intrinsic_value_per_share_usd": float(intrinsic_value_per_share),
}

multiples_block = {

    "historical_multiples_5y": multiples_hist.round(4).to_dict(orient="index"),

    "average_multiples_5y": {
        "pe_mean": float(pe_mean),
        "ps_mean": float(ps_mean),
        "ev_ebitda_mean": float(ev_ebitda_mean),
    },

    "implied_prices_2024": summary.round(4).to_dict(orient="records"),
}

valuation = {
    "dcf": dcf_block,
    "multiples": multiples_block,
}

# Implement build_llm_input and include valuation in it.
def build_llm_input(financials, ratios, valuation):
    return {
        "company": "The Coca-Cola Company",
        "ticker": "KO",
        "financials_5y": financials.to_dict(orient="records"),
        "ratios_5y": ratios.to_dict(orient="records"),
        "valuation": valuation,
        "data_sources": [
            "SEC EDGAR companyfacts API",
            "US-GAAP 10-K filings"
        ]
    }

# Final input to the LLM
llm_input = build_llm_input(
    financials=financials,
    ratios=ratios_5y_df.T, 
    valuation=valuation
)

llm_input

{'company': 'The Coca-Cola Company',
 'ticker': 'KO',
 'financials_5y': [{'Revenue': 330.14,
   'Net Income': 77.47,
   'FCF': 88.56,
   'Total Assets': 872.96,
   'Total Equity': 212.84,
   'Total Debt': 427.93},
  {'Revenue': 386.55,
   'Net Income': 97.71,
   'FCF': 113.66,
   'Total Assets': 943.54,
   'Total Equity': 248.6,
   'Total Debt': 427.61},
  {'Revenue': 430.04,
   'Net Income': 95.42,
   'FCF': 96.09,
   'Total Assets': 927.63,
   'Total Equity': 258.26,
   'Total Debt': 391.49},
  {'Revenue': 457.54,
   'Net Income': 107.14,
   'FCF': 98.21,
   'Total Assets': 977.03,
   'Total Equity': 274.8,
   'Total Debt': 420.64000000000004},
  {'Revenue': 470.61,
   'Net Income': 106.31,
   'FCF': 47.81,
   'Total Assets': 1005.49,
   'Total Equity': 263.72,
   'Total Debt': 445.22}],
 'ratios_5y': [{'Gross Margin': 0.5931,
   'Operating Margin': 0.2725,
   'Net Margin': 0.2347,
   'ROA': 0.0887,
   'ROE': 0.364,
   'Debt Ratio': 0.7562,
   'Equity Multiplier': 4.1015,
   'Interes

In [55]:
# LLM Policy
INVESTMENT_MEMO_POLICY = """
1) Abstract: summary of the investment memo, including rationale and results. Keep this section under 200 words.
2) Key Financial Metrics and Ratios Analysis:Select the five metrics you deem most critical and representative of the company's overall performance for analysis. Explain how the values of these metrics reflect the company's situation. Keep this section under 400 words.
3) Valuation:The results of the two valuation methods are explained separately, along with the respective conclusions drawn from them. Keep this section under 400 words.
4) Investment Recommendation:You can only choose buy/hold/sell, and explain step-by-step why you reached this conclusion and which indicators were used. Keep this section under 400 words.
5) Risks and limitations:Describe the risks and limitations associated with the investment conclusions derived from this method. Keep this section under 300 words.
"""

# Generate Investment Memo (LLM Agent)
client = OpenAI()
json.dumps(llm_input)

def generate_investment_memo(llm_input):
    """
    Sends the packaged data to GPT-4o-mini and returns the professional memo.
    """
    response = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[
            {"role": "system", "content": INVESTMENT_MEMO_POLICY},
            {"role": "user", "content": json.dumps(llm_input, indent=2)}
        ],
        temperature=0.2 
    )
    return response.choices[0].message.content

In [59]:
# Export the investment memo to Markdown format
markdown_content = f"# Investment Memo\n\n{memo_content}"
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
markdown_file_path = os.path.join(desktop_path, 'investment_memo.md')
with open(markdown_file_path, 'w', encoding='utf-8') as file:
    file.write(markdown_content)

print(f"The file has been saved to: {markdown_file_path}")

The file has been saved to: /Users/aurora/Desktop/investment_memo.md
