In [1]:
# ============================================================
# 014 TSE IPO Revenue Analysis
# ============================================================
#
# Overview
# --------
# This notebook analyzes pre-IPO revenue scale and trajectories of
# companies listed on the Tokyo Stock Exchange (TSE) Growth Market.
#
# Using historical IPO disclosure data, we construct empirical revenue
# benchmarks for the five fiscal years prior to IPO (N-5 to N-1).
# All revenue figures are normalized to Oku-yen (¥100 million),
# assuming a fixed FX rate of 1 USD = 150 JPY.
#
# The primary use case is to evaluate whether a prospective investment
# candidate’s projected pre-IPO revenue trajectory is broadly
# consistent with, above, or below historical TSE Growth IPO outcomes.
#
# Structure
# ---------
# 1. Data Loading and Normalization
#    - Load normalized IPO revenue data (NASDAQ / NYSE / TSE mixed)
#    - Filter to TSE Growth IPOs only
#    - Convert revenues to Oku-yen using a fixed FX assumption
#
# 2. Pre-IPO Year Alignment
#    - Define N-1 as the last completed fiscal year before IPO
#    - Align revenues to relative pre-IPO years (N-5 to N-1)
#
# 3. Benchmark Construction
#    - Compute empirical p25 / median / p75 revenue benchmarks
#    - Track sample sizes for each pre-IPO year
#
# 4. Forward-Looking Comparison
#    - Manually input projected pre-IPO revenues for a target company
#    - Evaluate percentile positioning vs. TSE Growth benchmarks
#    - Visualize the target revenue trajectory against benchmark
#      ranges to support interpretation and discussion
#
# Notes
# -----
# - Benchmarks reflect realized IPO outcomes and should be interpreted
#   as reference ranges rather than targets or guarantees.
# - Sample sizes decrease for earlier pre-IPO years (e.g., N-5) and
#   should be interpreted with appropriate caution.
#
# ============================================================


In [2]:
# ------------------------------------------------------------
# 1. Data Loading and Normalization
# ------------------------------------------------------------
#
# Load the normalized IPO revenue dataset and restrict the
# analysis universe to companies listed on the TSE Growth Market.
#
# All revenue figures are converted from USD to Japanese Yen
# using a fixed exchange rate (1 USD = 150 JPY) and scaled to
# Oku-yen (¥100 million) for consistency with TSE disclosures.
#
# ------------------------------------------------------------

import pandas as pd
import numpy as np

# Load normalized revenue data
csv_path = "data/outputs/20251225_073815/filtered_revenue_long.csv"
df = pd.read_csv(csv_path)

# Filter to TSE Growth IPOs only
df["market_norm"] = (
    df["market"]
    .astype(str)
    .str.upper()
    .str.replace(" ", "_")
)

tse = df[df["market_norm"].isin([
    "TSE_GROWTH",
    "TSE_GROWTH_MARKET",
    "TSEGROWTH"
])].copy()

# Convert revenue from USD to JPY, then to Oku-yen
USD_JPY = 150.0
tse["revenue_jpy"] = tse["revenue"] * USD_JPY
tse["revenue_oku"] = tse["revenue_jpy"] / 1e8

# Basic column cleanup
KEY  = "company_key"
YEAR = "fiscal_year"

tse = tse.dropna(subset=[KEY, YEAR, "revenue_oku"]).copy()
tse[YEAR] = tse[YEAR].astype(int)

tse.head()


Unnamed: 0,company_key,market,company_name,ticker,cik,fiscal_year,revenue,unit,market_norm,revenue_jpy,revenue_oku
252,TSE Growth||338A||（株）ＺｅｎｍｕＴｅｃｈ,TSE Growth,（株）ＺｅｎｍｕＴｅｃｈ,338A,,2019,900806.7,USD,TSE_GROWTH,135121000.0,1.35121
253,TSE Growth||334A||（株）ビジュアル・プロセッシング・ジャパン,TSE Growth,（株）ビジュアル・プロセッシング・ジャパン,334A,,2019,5020740.0,USD,TSE_GROWTH,753111000.0,7.53111
254,TSE Growth||480A||（株）リブ・コンサルティング,TSE Growth,（株）リブ・コンサルティング,480A,,2020,14738850.0,USD,TSE_GROWTH,2210827000.0,22.10827
255,TSE Growth||479A||PRONI（株）,TSE Growth,PRONI（株）,479A,,2020,5818660.0,USD,TSE_GROWTH,872799000.0,8.72799
256,TSE Growth||472A||（株）ミラティブ,TSE Growth,（株）ミラティブ,472A,,2020,13279910.0,USD,TSE_GROWTH,1991987000.0,19.91987


In [3]:
# ------------------------------------------------------------
# 2. Pre-IPO Year Alignment
# ------------------------------------------------------------
#
# Align revenue observations to relative pre-IPO years in order
# to make revenue trajectories comparable across companies.
#
# We define N-1 as the last completed fiscal year prior to IPO.
# In this simplified implementation, N-1 is approximated as the
# most recent fiscal year available for each company in the
# historical IPO dataset.
#
# Revenue years are then expressed in relative terms (N-5 to N-1),
# enabling cross-sectional benchmarking of pre-IPO revenue scale
# and growth patterns.
#
# ------------------------------------------------------------

# Define N-1 (last pre-IPO fiscal year) for each company
nminus1 = (
    tse.groupby(KEY)[YEAR]
    .max()
    .rename("n_minus_1")
    .reset_index()
)

# Align fiscal years to relative pre-IPO years
panel = tse.merge(nminus1, on=KEY, how="left")
panel["rel_year"] = panel[YEAR] - panel["n_minus_1"]

# Retain the five pre-IPO years (N-5 to N-1)
panel_preipo = panel[panel["rel_year"].between(-4, 0)].copy()

panel_preipo.head()


Unnamed: 0,company_key,market,company_name,ticker,cik,fiscal_year,revenue,unit,market_norm,revenue_jpy,revenue_oku,n_minus_1,rel_year
0,TSE Growth||338A||（株）ＺｅｎｍｕＴｅｃｈ,TSE Growth,（株）ＺｅｎｍｕＴｅｃｈ,338A,,2019,900806.7,USD,TSE_GROWTH,135121000.0,1.35121,2023,-4
1,TSE Growth||334A||（株）ビジュアル・プロセッシング・ジャパン,TSE Growth,（株）ビジュアル・プロセッシング・ジャパン,334A,,2019,5020740.0,USD,TSE_GROWTH,753111000.0,7.53111,2023,-4
2,TSE Growth||480A||（株）リブ・コンサルティング,TSE Growth,（株）リブ・コンサルティング,480A,,2020,14738850.0,USD,TSE_GROWTH,2210827000.0,22.10827,2024,-4
3,TSE Growth||479A||PRONI（株）,TSE Growth,PRONI（株）,479A,,2020,5818660.0,USD,TSE_GROWTH,872799000.0,8.72799,2024,-4
4,TSE Growth||472A||（株）ミラティブ,TSE Growth,（株）ミラティブ,472A,,2020,13279910.0,USD,TSE_GROWTH,1991987000.0,19.91987,2024,-4


In [4]:
# ------------------------------------------------------------
# 3. Benchmark Construction
# ------------------------------------------------------------
#
# Construct empirical revenue benchmarks for TSE Growth IPOs
# based on realized pre-IPO outcomes.
#
# For each relative pre-IPO year (N-5 to N-1), we compute the
# 25th percentile, median (50th percentile), and 75th percentile
# of revenue in Oku-yen. These statistics represent the
# cross-sectional distribution of revenue scale at which
# companies have historically gone public on the TSE Growth
# Market.
#
# Sample sizes are also tracked for each relative year to
# provide transparency on data coverage, particularly for
# earlier pre-IPO years.
#
# ------------------------------------------------------------

# Compute percentile benchmarks by relative pre-IPO year
bench_df = (
    panel_preipo
    .groupby("rel_year")["revenue_oku"]
    .agg(
        p25=lambda x: np.percentile(x, 25),
        median=lambda x: np.percentile(x, 50),
        p75=lambda x: np.percentile(x, 75),
        n="count"
    )
    .reset_index()
    .sort_values("rel_year")
)

bench_df


Unnamed: 0,rel_year,p25,median,p75,n
0,-4,5.947777,10.89034,22.062748,30
1,-3,5.915125,12.80105,32.243387,32
2,-2,6.047087,15.223405,33.272245,34
3,-1,9.63054,21.26036,37.560975,34
4,0,11.235435,24.15368,52.485213,34


In [5]:
# ------------------------------------------------------------
# 4. Forward-Looking Comparison
# ------------------------------------------------------------
#
# Manually input a prospective company's projected pre-IPO
# revenues (N-5 to N-1, unit: Oku-yen / ¥100 million) and
# compare them against historical TSE Growth IPO benchmarks.
#
# Clicking "Update" refreshes the results and immediately shows:
# (1) a table with each year's percentile position vs. the
#     empirical TSE Growth distribution, and
# (2) a chart overlaying the target trajectory on the benchmark
#     band (p25–p75) and median.
#
# ------------------------------------------------------------


import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Header / unit label
unit_label = widgets.HTML("<b>Revenue Input (Unit: Oku-yen / ¥100 million)</b>")

# Company name input
company_name = widgets.Text(
    description="Company:",
    placeholder="Target company name",
    layout=widgets.Layout(width="400px")
)

# Revenue inputs (rel_year = -4..0 corresponds to N-5..N-1)
revenue_inputs = {
    -4: widgets.FloatText(description="N-5"),
    -3: widgets.FloatText(description="N-4"),
    -2: widgets.FloatText(description="N-3"),
    -1: widgets.FloatText(description="N-2"),
     0: widgets.FloatText(description="N-1"),
}

run_button = widgets.Button(description="Update", button_style="primary")
output = widgets.Output()

def compute_percentile(rel_year, value):
    dist = panel_preipo.loc[
        panel_preipo["rel_year"] == rel_year, "revenue_oku"
    ].dropna().values
    if len(dist) == 0 or value is None:
        return np.nan
    return (dist <= value).mean() * 100

def rel_to_label(rel):
    # rel = -4..0  ->  N-5..N-1
    return f"N-{abs(rel) + 1}"

def render_plot(input_df):
    """
    input_df: columns = ["rel_year", "revenue_oku"] (already sorted)
    Uses global bench_df for p25/median/p75.
    """
    if input_df.empty:
        print("Please enter at least one revenue value.")
        return

    viz_df = input_df.merge(bench_df, on="rel_year", how="left").sort_values("rel_year")

    plt.figure(figsize=(9, 4))
    plt.fill_between(viz_df["rel_year"], viz_df["p25"], viz_df["p75"], alpha=0.2, label="p25–p75 range")
    plt.plot(viz_df["rel_year"], viz_df["median"], linewidth=2, label="Median (TSE Growth)")
    plt.plot(
        viz_df["rel_year"],
        viz_df["revenue_oku"],
        marker="o",
        linewidth=2,
        label=company_name.value.strip() or "Target Company"
    )

    plt.xticks(viz_df["rel_year"], [rel_to_label(int(r)) for r in viz_df["rel_year"]])
    plt.ylabel("Revenue (Oku-yen)")
    plt.title("Pre-IPO Revenue Trajectory vs. TSE Growth Benchmarks")
    plt.legend()
    plt.grid(axis="y", alpha=0.3)
    plt.show()

def run_comparison(_):
    with output:
        clear_output(wait=True)

        # Build input table
        rows = []
        for rel, w in revenue_inputs.items():
            rows.append({
                "rel_year": rel,
                "Year": rel_to_label(rel),
                "Revenue (Oku-yen)": w.value,
                "Percentile (%)": compute_percentile(rel, w.value),
            })

        result_df = (
            pd.DataFrame(rows)
            .sort_values("rel_year")          # correct chronological order
        )

        # Display table
        display(result_df.drop(columns=["rel_year"]))

        # Plot (use only non-null inputs)
        input_df = result_df[["rel_year"]].copy()
        input_df["revenue_oku"] = result_df["Revenue (Oku-yen)"]
        input_df = input_df.dropna(subset=["revenue_oku"]).sort_values("rel_year")

        render_plot(input_df)

run_button.on_click(run_comparison)

ui = widgets.VBox([
    company_name,
    unit_label,
    widgets.HBox([revenue_inputs[-4], revenue_inputs[-3], revenue_inputs[-2]]),
    widgets.HBox([revenue_inputs[-1], revenue_inputs[0]]),
    run_button,
    output
])

display(ui)

VBox(children=(Text(value='', description='Company:', layout=Layout(width='400px'), placeholder='Target compan…