# Titre 1 : Audit via données randomisées 

In [1]:
# STEP 1 — Setup (10Y version)
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

ticker = "AAPL"
tkr = yf.Ticker(ticker)

# ✅ Quarterly statements (gives a longer history than annual)
income = tkr.quarterly_financials
bs = tkr.quarterly_balance_sheet
cf = tkr.quarterly_cashflow

print("✅ Loaded yfinance QUARTERLY statements for:", ticker)
print("Income shape:", income.shape)
print("Balance sheet shape:", bs.shape)
print("Cashflow shape:", cf.shape)


  end = pd.Timestamp.utcnow().ceil("D")
  end = pd.Timestamp.utcnow().ceil("D")
  end = pd.Timestamp.utcnow().ceil("D")


✅ Loaded yfinance QUARTERLY statements for: AAPL
Income shape: (33, 5)
Balance sheet shape: (65, 6)
Cashflow shape: (46, 7)


In [2]:
# STEP 2 — Inspect available row names (10Y / quarterly)
# (same idea, just keep it clean + show how many rows/cols you have)

print("=== INCOME STATEMENT rows (first 40) ===")
print(income.index.tolist()[:40])
print("Columns (dates):", list(income.columns[:8]), "...")

print("\n=== BALANCE SHEET rows (first 40) ===")
print(bs.index.tolist()[:40])
print("Columns (dates):", list(bs.columns[:8]), "...")

print("\n=== CASHFLOW rows (first 40) ===")
print(cf.index.tolist()[:40])
print("Columns (dates):", list(cf.columns[:8]), "...")


=== INCOME STATEMENT rows (first 40) ===
['Tax Effect Of Unusual Items', 'Tax Rate For Calcs', 'Normalized EBITDA', 'Net Income From Continuing Operation Net Minority Interest', 'Reconciled Depreciation', 'Reconciled Cost Of Revenue', 'EBITDA', 'EBIT', 'Normalized Income', 'Net Income From Continuing And Discontinued Operation', 'Total Expenses', 'Total Operating Income As Reported', 'Diluted Average Shares', 'Basic Average Shares', 'Diluted EPS', 'Basic EPS', 'Diluted NI Availto Com Stockholders', 'Net Income Common Stockholders', 'Net Income', 'Net Income Including Noncontrolling Interests', 'Net Income Continuous Operations', 'Tax Provision', 'Pretax Income', 'Other Income Expense', 'Other Non Operating Income Expenses', 'Operating Income', 'Operating Expense', 'Research And Development', 'Selling General And Administration', 'Gross Profit', 'Cost Of Revenue', 'Total Revenue', 'Operating Revenue']
Columns (dates): [Timestamp('2025-12-31 00:00:00'), Timestamp('2025-09-30 00:00:00'), 

In [5]:
# STEP 3 FIX — You don't have 10Y because yfinance quarterly fundamentals are LIMITED.
# Let's CHECK how many quarters you actually received, then build "as much history as available".

print("Income quarters:", len(income.columns))
print("Balance sheet quarters:", len(bs.columns))
print("Cashflow quarters:", len(cf.columns))

print("\nIncome date range:", min(income.columns), "→", max(income.columns))
print("BS date range:", min(bs.columns), "→", max(bs.columns))
print("CF date range:", min(cf.columns), "→", max(cf.columns))


Income quarters: 5
Balance sheet quarters: 6
Cashflow quarters: 7

Income date range: 2024-12-31 00:00:00 → 2025-12-31 00:00:00
BS date range: 2024-09-30 00:00:00 → 2025-12-31 00:00:00
CF date range: 2024-06-30 00:00:00 → 2025-12-31 00:00:00


In [6]:
# STEP 3 (robust) — build annual from whatever quarters exist (no forced 10Y)

def pick_q(df, row_name, col_name):
    s = df.loc[row_name]
    s.index = pd.to_datetime(s.index)
    s = pd.to_numeric(s, errors="coerce").sort_index()
    s.name = col_name
    return s

def fiscal_year_from_date(dt):
    return dt.year + (1 if dt.month >= 10 else 0)

q = pd.concat([
    pick_q(income, "Total Revenue", "revenue"),
    pick_q(income, "Net Income", "net_income"),
    pick_q(income, "EBIT", "ebit"),
    pick_q(income, "Diluted Average Shares", "diluted_shares"),

    pick_q(bs, "Total Assets", "assets"),
    pick_q(bs, "Stockholders Equity", "equity"),
    pick_q(bs, "Total Debt", "total_debt"),

    pick_q(cf, "Free Cash Flow", "fcf"),
    pick_q(cf, "Repurchase Of Capital Stock", "buybacks"),
    pick_q(cf, "Capital Expenditure", "capex"),
    pick_q(cf, "Cash Dividends Paid", "dividends_paid"),
], axis=1)

q["fiscal_year"] = [fiscal_year_from_date(d) for d in q.index]

flow_cols = ["revenue","net_income","ebit","fcf","buybacks","capex","dividends_paid"]
stock_cols = ["assets","equity","total_debt","diluted_shares"]

annual_flow = q.groupby("fiscal_year")[flow_cols].sum(min_count=1)
annual_stock = q.groupby("fiscal_year")[stock_cols].last()

data = annual_flow.join(annual_stock, how="inner").sort_index()

# drop any fully-empty fiscal year (like your 2024 NaN row)
data = data.dropna(subset=["revenue","net_income","assets","equity"], how="any")

# derived metrics
data["net_margin"] = data["net_income"] / data["revenue"]
data["roe"] = data["net_income"] / data["equity"]
data["leverage"] = data["assets"] / data["equity"]
data["eps"] = data["net_income"] / data["diluted_shares"]
data["buybacks_to_fcf"] = data["buybacks"] / data["fcf"]
data["asset_turnover"] = data["revenue"] / data["assets"]

print("✅ Fiscal years available:", data.index.tolist())
data


✅ Fiscal years available: [2025, 2026]


  q = pd.concat([


Unnamed: 0_level_0,revenue,net_income,ebit,fcf,buybacks,capex,dividends_paid,assets,equity,total_debt,diluted_shares,net_margin,roe,leverage,eps,buybacks_to_fcf,asset_turnover
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2025,416161000000.0,112010000000.0,133050000000.0,98767000000.0,-90711000000.0,-12715000000.0,-15421000000.0,359241000000.0,73733000000.0,98657000000.0,14863610000.0,0.269151,1.51913,4.872187,7.535855,-0.918434,1.158445
2026,143756000000.0,42097000000.0,50852000000.0,51552000000.0,-24701000000.0,-2373000000.0,-3921000000.0,379297000000.0,88190000000.0,90509000000.0,14810360000.0,0.292836,0.477344,4.300907,2.842403,-0.479147,0.379006


In [1]:
! pip install matplotlib seaborn

Collecting matplotlib
  Downloading matplotlib-3.10.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (52 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (114 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.8 kB)
Collecting pyparsing>=3 (from matp

In [2]:
#première cellule : est la cellule des impprts de packages
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt #ggplot2 en R
import seaborn as sns


In [3]:
#cell 2 is about configuration
sns.set_theme(style="whitegrid")

In [None]:
#pour l'exercice d'un dataset
#on utilise NUMPY