In [1]:
pip install requests pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import pandas as pd
SEC_HEADERS = {
    "User-Agent": "Guyu (UCL MSc Coursework) zhangguyu2021@163.com",
    "Accept": "application/json,text/plain,*/*",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
}


In [3]:
# Defines a function that takes a stock ticker as a string
def get_cik_from_ticker(ticker: str) -> str:
    url = "https://www.sec.gov/files/company_tickers.json"
    r = requests.get(url, headers=SEC_HEADERS, timeout=30)
    r.raise_for_status()
    data = r.json()

    ticker = ticker.upper()
    for _, row in data.items():
        if row["ticker"].upper() == ticker: 
            return str(int(row["cik_str"])).zfill(10)  
    raise ValueError(f"Ticker {ticker} not found.")

In [4]:
def get_company_facts(cik10: str) -> dict:
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik10}.json"
    r = requests.get(url, headers=SEC_HEADERS, timeout=30)
    r.raise_for_status()
    return r.json()

In [5]:
def extract_10k_annual_series(facts: dict, tag: str) -> pd.Series:
    try:
        units = facts["facts"]["us-gaap"][tag]["units"]
    except KeyError:
        return pd.Series(dtype="float64")

    unit_key = "USD" if "USD" in units else list(units.keys())[0] #Prioritises values reported in USD
    items = units[unit_key]

    rows = []

# retains only those originating from annual 10-K filings, excluding interim reports such as 10-Q.
    for it in items:
        if it.get("form") != "10-K":
            continue
        # Extracts the fiscal year (fy) and the reported value (val) from each XBRL record.
        fy = it.get("fy")
        val = it.get("val")
        if fy is not None and val is not None:
            rows.append((int(fy), float(val)))

    if not rows:
        return pd.Series(dtype="float64") # Returns an empty Series if no valid annual observations are found, ensuring robustness in downstream analysis.

    # Converts the results into a Series; if multiple filings exist for the same fiscal year (e.g. amendments), the most recent value is retained.
    s = pd.Series({fy: v for fy, v in rows}).sort_index()
    s.index = s.index.astype(str)
    s.name = tag
    return s


In [6]:
# income statement
INCOME_TAGS = {
    "Total Revenue": ["Revenues", "SalesRevenueNet"],
    "COGS": ["CostOfGoodsAndServicesSold"],
    "Operating Income": ["OperatingIncomeLoss"],
    "Net Income": ["NetIncomeLoss"],
}

# balance sheet
BALANCE_TAGS = {
    "Total Assets": ["Assets"],
    "Total Liabilities": ["Liabilities"],
    "Equity": ["StockholdersEquity"],
    "Cash & Cash Equivalents": ["CashAndCashEquivalentsAtCarryingValue"],
    "Long Term Debt": ["LongTermDebt"],
    "Short Term Debt": ["DebtCurrent"],
}

# cash flow statement
CASHFLOW_TAGS = {
    "Operating Cash Flow": ["NetCashProvidedByUsedInOperatingActivities"],
    "CapEx": ["PaymentsToAcquirePropertyPlantAndEquipment"],
}


In [7]:
def first_available(facts: dict, candidates: list[str]) -> pd.Series:
    for tag in candidates:
        s = extract_10k_annual_series(facts, tag)#extraction of a 10-K annual time series for the given tag
        if not s.empty:
            return s
    return pd.Series(dtype="float64")

In [8]:
def build_5y_statements_from_sec(ticker: str):
    cik10 = get_cik_from_ticker(ticker)
    facts = get_company_facts(cik10)


    base = first_available(facts, ["Revenues", "SalesRevenueNet", "NetIncomeLoss"])
    if base.empty:
        raise ValueError("Cannot infer fiscal years from SEC facts (base series empty).")
# Takes the last 5 fiscal years from the base series (string index) and uses them as statement columns.
    last5_years = list(base.index[-5:]) 
    cols = last5_years

    income = pd.DataFrame(index=INCOME_TAGS.keys(), columns=cols, dtype="float64")
    balance = pd.DataFrame(index=BALANCE_TAGS.keys(), columns=cols, dtype="float64")
    cashflow = pd.DataFrame(index=CASHFLOW_TAGS.keys(), columns=cols, dtype="float64")

# income statement
    for k, tags in INCOME_TAGS.items():
        s = first_available(facts, tags).reindex(cols)
        income.loc[k, cols] = s.values #For each standard item, finds the first available annual series among candidate tags, aligns it to the target years, then writes values into the income statement row.

# balance sheet
    for k, tags in BALANCE_TAGS.items():
        s = first_available(facts, tags).reindex(cols)
        balance.loc[k, cols] = s.values

# cash flow statement
    for k, tags in CASHFLOW_TAGS.items():
        s = first_available(facts, tags).reindex(cols)
        cashflow.loc[k, cols] = s.values

    # Total Debt = LT + ST
    if "Long Term Debt" in balance.index and "Short Term Debt" in balance.index:
        balance.loc["Total Debt", cols] = (
            balance.loc["Long Term Debt", cols].fillna(0)
            + balance.loc["Short Term Debt", cols].fillna(0)
        )
# Returns the three standardised statements aligned to the most recent five fiscal years.
    return income, balance, cashflow


In [9]:
income_5y, balance_5y, cashflow_5y = build_5y_statements_from_sec("KO")

print("Income (5Y):")
print(income_5y)

print("\nBalance (5Y):")
print(balance_5y)

print("\nCashflow (5Y):")
print(cashflow_5y)

Income (5Y):
                          2020          2021          2022          2023  \
Total Revenue     3.301400e+10  3.865500e+10  4.300400e+10  4.575400e+10   
COGS              1.343300e+10  1.535700e+10  1.800000e+10  1.852000e+10   
Operating Income  8.997000e+09  1.030800e+10  1.090900e+10  1.131100e+10   
Net Income        7.747000e+09  9.771000e+09  9.542000e+09  1.071400e+10   

                          2024  
Total Revenue     4.706100e+10  
COGS              1.832400e+10  
Operating Income  9.992000e+09  
Net Income        1.063100e+10  

Balance (5Y):
                                 2020          2021          2022  \
Total Assets             8.729600e+10  9.435400e+10  9.276300e+10   
Total Liabilities                 NaN           NaN           NaN   
Equity                   1.929900e+10  2.299900e+10  2.410500e+10   
Cash & Cash Equivalents  6.795000e+09  9.684000e+09  9.519000e+09   
Long Term Debt           4.061000e+10  3.945400e+10  3.677600e+10   
Short Term D

In [10]:
# Assigns the 5-year income statement to income as the base input for profitability ratio analysis.
income = income_5y
ratios = pd.DataFrame(index=income.columns)

# Gross margin
ratios["Gross Margin"] = (
    (income.loc["Total Revenue"] - income.loc["COGS"]) 
    / income.loc["Total Revenue"]
)

# Operating Margin
ratios["Operating Margin"] = (
    income.loc["Operating Income"] / income.loc["Total Revenue"]
)

# Net Margin
ratios["Net Margin"] = (
    income.loc["Net Income"] / income.loc["Total Revenue"]
)

In [11]:
balance = balance_5y.copy()

ratios["Asset Turnover"] = (
    income.loc["Total Revenue"] / balance.loc["Total Assets"]
)


In [12]:


# Assets = Liabilities + Equity
balance.loc["Total Liabilities"] = (
    balance.loc["Total Assets"] - balance.loc["Equity"]
)


# ROA
ratios["ROA"] = (
    income.loc["Net Income"] / balance.loc["Total Assets"]
)

# ROE
ratios["ROE"] = (
    income.loc["Net Income"] / balance.loc["Equity"]
)

In [13]:
# Debt-to-Equity Ratio
ratios["Debt / Equity"] = (
    balance.loc["Total Debt"] / balance.loc["Equity"]
)

# Debt-to-Assets Ratio
ratios["Debt / Assets"] = (
    balance.loc["Total Debt"] / balance.loc["Total Assets"]
)

In [14]:
# Revenue Growth
ratios["Revenue Growth"] = income.loc["Total Revenue"].pct_change()

# Net Income Growth
ratios["Net Income Growth"] = income.loc["Net Income"].pct_change()

ratios

Unnamed: 0,Gross Margin,Operating Margin,Net Margin,Asset Turnover,ROA,ROE,Debt / Equity,Debt / Assets,Revenue Growth,Net Income Growth
2020,0.593112,0.272521,0.234658,0.378185,0.088744,0.40142,2.104254,0.465199,,
2021,0.602716,0.266667,0.252775,0.409681,0.103557,0.424845,1.715466,0.418149,0.170867,0.261262
2022,0.581434,0.253674,0.221886,0.46359,0.102864,0.395851,1.525659,0.396451,0.112508,-0.023437
2023,0.595227,0.247213,0.234165,0.468297,0.109659,0.413014,1.445858,0.383888,0.063948,0.122825
2024,0.610633,0.21232,0.225898,0.46804,0.10573,0.427704,0.0,0.0,0.028566,-0.007747


In [15]:
# Creates a copy of the 5-year cash flow statement
cashflow = cashflow_5y.copy()

fcf = pd.DataFrame(index=cashflow.columns)

# FCF=Operating Cash Flowâˆ’Capital Expenditure
fcf["Free Cash Flow"] = (
    cashflow.loc["Operating Cash Flow"] - cashflow.loc["CapEx"]
)
fcf = fcf.sort_index()
fcf["Free Cash Flow"] = fcf["Free Cash Flow"].astype(float)

fcf


Unnamed: 0,Free Cash Flow
2020,8667000000.0
2021,11258000000.0
2022,9534000000.0
2023,9747000000.0
2024,4741000000.0


In [16]:
# Computes the compound annual growth rate (CAGR) of free cash flow
fcf_cagr = (
    (fcf.iloc[-1] / fcf.iloc[0]) ** (1/4) - 1
)

fcf_cagr # fcf_cagr represents the average annual growth rate of free cash flow over the past five years.

Free Cash Flow   -0.139996
dtype: float64

In [17]:
import numpy as np

latest_year = balance_5y.columns[-1] # Selects the most recent fiscal year as the valuation date for capital structure estimation.

E = balance_5y.loc["Equity", latest_year]
D = balance_5y.loc["Total Debt", latest_year] # Extracts equity and total debt from the balance sheet.


if D <= 0 or np.isnan(D):
    w_e = 1.0
    w_d = 0.0
else:
    w_e = E / (E + D)
    w_d = D / (E + D)


# reference: Vicente, C. D. S. (2024). Equity valuation: Coca-Cola Company (Master's thesis).
rf = 0.0418      
beta = 0.5878   
mrp = 0.0460     

Re = rf + beta * mrp



if D <= 0 or np.isnan(D):
    Rd = 0.0
else:
    interest_expense = (
        income_5y.loc["Operating Income", latest_year]
        - income_5y.loc["Net Income", latest_year]
    )
    Rd = abs(interest_expense) / D # When explicit interest expense is unavailable, interest cost is approximated using operating income minus net income, scaled by total debt.



tax_rate = 0.1871   # use reference: Vicente, C. D. S. (2024). Equity valuation: Coca-Cola Company (Master's thesis).


wacc = w_e * Re + w_d * Rd * (1 - tax_rate)

wacc


0.06883879999999999

In [18]:
import numpy as np

fcf_series = fcf["Free Cash Flow"].dropna().astype(float)

if len(fcf_series) < 2:
    raise ValueError("Not enough FCF observations to compute CAGR.")

start, end = fcf_series.iloc[0], fcf_series.iloc[-1]
n = len(fcf_series) - 1

if start <= 0:
    raise ValueError("FCF start value must be > 0 for CAGR.")

fcf_cagr = (end / start) ** (1 / n) - 1 # Computes the compound annual growth rate of free cash flow over the sample period.

g = min(float(fcf_cagr), 0.04)  


In [19]:

fcf_forecast = []
fcf_last = float(fcf["Free Cash Flow"].iloc[-1])


for t in range(1, 6):
    fcf_t = fcf_last * (1 + g) ** t
    fcf_forecast.append(fcf_t) # Assumes free cash flow grows at a constant sustainable rate g over the next five years and projects it forward using compound growth.

fcf_forecast

g_terminal = 0.0264  #use reference: Vicente, C. D. S. (2024). Equity valuation: Coca-Cola Company (Master's thesis).

last_fcf = float(fcf_forecast[-1])  # Takes the final-year forecasted FCF as the base for terminal value calculation.

if wacc <= g_terminal: # Ensures that the discount rate exceeds the terminal growth rate, a necessary condition for the Gordon Growth Model.
    raise ValueError("Invalid assumptions: wacc must be greater than g_terminal for terminal value.")

terminal_value = last_fcf * (1 + g_terminal) / (wacc - g_terminal) #Computes terminal value using the Gordon Growth Model
terminal_value


53941740056.17159

In [20]:
# Discounts each forecasted free cash flow over the five-year explicit forecast period using WACC as the discount rate.
pv_fcf = sum(
    fcf_forecast[t-1] / (1 + wacc) ** t
    for t in range(1, 6)
)

pv_terminal = terminal_value / (1 + wacc) ** 5 # Discounts the terminal value

# Enterprise value
enterprise_value = pv_fcf + pv_terminal

enterprise_value

51608726793.18436

In [21]:
latest_year = balance.columns[-1]

# Net debt
net_debt = (
    balance.loc["Total Debt", latest_year]
    - balance.loc["Cash & Cash Equivalents", latest_year]
)

equity_value = enterprise_value - net_debt
equity_value

np.float64(62436726793.18436)

In [22]:
shares_outstanding = 4_300_000_000   # use references: Vicente, C. D. S. (2024). Equity valuation: Coca-Cola Company (Master's thesis).
# intrinsic value
intrinsic_value_per_share = equity_value / shares_outstanding

In [23]:
# use the website source
price = 70.44   
upside = (intrinsic_value_per_share / price) - 1

# margin of safety thresholds
buy_threshold = 0.20     # +20%
sell_threshold = -0.12   # -12%

if upside >= buy_threshold:
    rec = "Buy"
elif upside <= sell_threshold:
    rec = "Sell"
else:
    rec = "Hold"

rec


'Sell'

In [24]:
pip install openai reportlab



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_API_KEY"]

In [27]:

import os
import json
from openai import OpenAI
from reportlab.lib.pagesizes import LETTER
from reportlab.pdfbase.pdfmetrics import stringWidth
from reportlab.pdfgen import canvas


api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
    raise EnvironmentError("OPENAI_API_KEY is not set in environment variables.")

client = OpenAI(api_key=api_key)

# generate memo
def generate_memo_text(payload: dict) -> str:
    prompt = f"""
You are a buy-side equity analyst.

Write a ~1-page investment memo in English with the following sections:

1) Recommendation & Upside
2) Valuation Summary
3) Latest Financial Snapshot
4) Interpretation
5) Risks & Next Steps

Rules:
- Use ONLY the JSON data below.
- If something is missing, explicitly say it is missing.
- Do NOT add external facts.

JSON:
{json.dumps(payload, ensure_ascii=False, indent=2)}
""".strip()

    response = client.responses.create(
        model="gpt-4.1-mini",
        input=prompt,
        max_output_tokens=900
    )

    # SDK convenience field
    return response.output_text

# PDF
def text_to_pdf(
    text: str,
    pdf_path: str,
    pagesize=LETTER,
    font_name="Helvetica",
    font_size=11,
    margin=54
):
    c = canvas.Canvas(pdf_path, pagesize=pagesize)
    width, height = pagesize

    x = margin
    y = height - margin
    line_height = font_size * 1.35
    max_width = width - 2 * margin

    c.setFont(font_name, font_size)

    def wrap_line(line: str):
        words = line.split()
        if not words:
            return [""]

        lines = []
        cur = words[0]
        for w in words[1:]:
            test = cur + " " + w
            if stringWidth(test, font_name, font_size) <= max_width:
                cur = test
            else:
                lines.append(cur)
                cur = w
        lines.append(cur)
        return lines

    for raw in text.replace("\r\n", "\n").split("\n"):
        if raw.strip() == "":
            y -= line_height
            continue

        for wl in wrap_line(raw):
            if y < margin:
                c.showPage()
                c.setFont(font_name, font_size)
                y = height - margin
            c.drawString(x, y, wl)
            y -= line_height

    c.save()
    return pdf_path


price = 70.44
upside = float(intrinsic_value_per_share) / price - 1

buy_threshold = 0.20
sell_threshold = -0.12

if upside >= buy_threshold:
    rec = "Buy"
elif upside <= sell_threshold:
    rec = "Sell"
else:
    rec = "Hold"

payload = {
    "ticker": "KO",
    "market_price": float(price),
    "intrinsic_value_per_share": float(intrinsic_value_per_share),
    "upside": float(upside),
    "recommendation_rule": {
        "buy_threshold": buy_threshold,
        "sell_threshold": sell_threshold
    },
    "recommendation": rec,
    "valuation": {
        "method": "DCF (Two-stage)",
        "assumptions": {
            "wacc": float(wacc),
            "terminal_growth": float(g_terminal),
        },
        "outputs": {
            "intrinsic_value_per_share": float(intrinsic_value_per_share)
        }
    }
}

memo = generate_memo_text(payload)
pdf = text_to_pdf(memo, "KO_memo GUYU ZHANG.pdf")

print("Saved:", pdf)
print("Working dir:", os.getcwd())
print("rec used:", rec, "upside used:", upside)


Saved: KO_memo GUYU ZHANG.pdf
Working dir: /Users/skywalker
rec used: Sell upside used: -0.7938647214413574
