In [1]:
# Install required libraries for PDF generation / progress display
!pip -q install reportlab tqdm


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/2.0 MB[0m [31m5.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m1.7/2.0 MB[0m [31m24.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# --- Imports & params
import os, zipfile, textwrap
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm

# ReportLab for PDFs
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from reportlab.lib.utils import ImageReader

# Colab file download helper (optional)
try:
    from google.colab import files as colab_files
    IN_COLAB = True
except:
    IN_COLAB = False

# ---------- User-editable parameter ----------
CANDIDATE = "ds_RishabhVerma"   # <- CHANGE this before final packaging to your name/ID

# ---------- Paths (Colab default working dir = /content) ----------
WORKDIR = "/content"
FG_PATH = os.path.join(WORKDIR, "fear_greed_index.csv")
HIST_PATH = os.path.join(WORKDIR, "historical_data.csv")

OUTPUT_ROOT = os.path.join(WORKDIR, "outputs_final")
FIG_GLOBAL   = os.path.join(OUTPUT_ROOT, "global_figs")
FIG_TRADERS  = os.path.join(OUTPUT_ROOT, "trader_figs")
REPORTS_MD   = os.path.join(OUTPUT_ROOT, "reports_md")
REPORTS_PDF  = os.path.join(OUTPUT_ROOT, "reports_pdf")
MASTER_DIR   = os.path.join(OUTPUT_ROOT, "master")

for d in [OUTPUT_ROOT, FIG_GLOBAL, FIG_TRADERS, REPORTS_MD, REPORTS_PDF, MASTER_DIR]:
    os.makedirs(d, exist_ok=True)

plt.rcParams["figure.figsize"] = (10,4)
plt.rcParams["axes.grid"] = True

print("Working dir:", WORKDIR)
print("Expecting files:", FG_PATH, HIST_PATH)
print("Outputs:", OUTPUT_ROOT)


Working dir: /content
Expecting files: /content/fear_greed_index.csv /content/historical_data.csv
Outputs: /content/outputs_final


In [3]:
# --- LOAD with checks
assert os.path.exists(FG_PATH), f"Missing {FG_PATH} — upload fear_greed_index.csv to Colab files."
assert os.path.exists(HIST_PATH), f"Missing {HIST_PATH} — upload historical_data.csv to Colab files."

# Read
fear_greed = pd.read_csv(FG_PATH)
historical = pd.read_csv(HIST_PATH)

# Exact parsing rules provided by you
# Fear & Greed dataset date: yyyy-mm-dd
if 'date' in fear_greed.columns:
    fear_greed['date'] = pd.to_datetime(fear_greed['date'], format='%Y-%m-%d', errors='coerce')
elif 'Date' in fear_greed.columns:
    fear_greed['date'] = pd.to_datetime(fear_greed['Date'], format='%Y-%m-%d', errors='coerce')
else:
    fear_greed['date'] = pd.to_datetime(fear_greed.iloc[:,0], errors='coerce')

# Historical dataset timestamp parse (explicit format dd-mm-YYYY HH:MM)
ts_candidates = ['Timestamp IST','Timestamp','timestamp','time','Time']
TS_COL = next((c for c in ts_candidates if c in historical.columns), None)
if TS_COL is None:
    raise ValueError("No timestamp column found in historical_data.csv. Expected 'Timestamp IST' or similar.")

historical[TS_COL] = pd.to_datetime(historical[TS_COL], format='%d-%m-%Y %H:%M', errors='coerce')
# fallback flexible parse if many NaT
if historical[TS_COL].isna().mean() > 0.5:
    historical[TS_COL] = pd.to_datetime(historical[TS_COL], errors='coerce')

# create merge date (date only)
historical['date'] = historical[TS_COL].dt.date
historical['date'] = pd.to_datetime(historical['date'], format='%Y-%m-%d', errors='coerce')

# map sentiment from fear_greed (use provided 'classification' or 'Classification' column)
fg_sent_col = 'classification' if 'classification' in fear_greed.columns else ('Classification' if 'Classification' in fear_greed.columns else fear_greed.columns[1])
historical['sentiment'] = historical['date'].map(fear_greed.set_index('date')[fg_sent_col])

# Column picks - require at least Account, Closed PnL
def pick_col(df, choices):
    for c in choices:
        if c in df.columns:
            return c
    return None

COL_ACCOUNT = pick_col(historical, ["Account","account","ACCOUNT","Trader","Client","Account ID"])
COL_PNL     = pick_col(historical, ["Closed PnL","closedPnL","ClosedPnL","closed_pnl","PnL","pnl"])
COL_SIZE    = pick_col(historical, ["Size USD","size_usd","sizeUSD","Size","Notional"])
COL_COIN    = pick_col(historical, ["Coin","symbol","Symbol","Ticker"])
COL_TS      = TS_COL
COL_SENT    = "sentiment"

if COL_ACCOUNT is None or COL_PNL is None or COL_TS is None:
    raise ValueError("Missing required columns: need Account, Closed PnL, and Timestamp.")

# Clean numeric columns
historical[COL_PNL] = historical[COL_PNL].astype(str).str.replace('[,$]', '', regex=True)
historical[COL_PNL] = pd.to_numeric(historical[COL_PNL], errors='coerce')
if COL_SIZE:
    historical[COL_SIZE] = historical[COL_SIZE].astype(str).str.replace('[,$]', '', regex=True)
    historical[COL_SIZE] = pd.to_numeric(historical[COL_SIZE], errors='coerce')

# Base engineered columns
df = historical.sort_values([COL_ACCOUNT, COL_TS]).reset_index(drop=True)
df['Win'] = (df[COL_PNL] > 0).astype(int)
df['Hour'] = df[COL_TS].dt.hour.fillna(-1).astype(int)
df['Day'] = df[COL_TS].dt.date

print("Loaded: rows=", len(df), "unique traders=", df[COL_ACCOUNT].nunique())
display(df.head(3))


Loaded: rows= 211224 unique traders= 32


Unnamed: 0,Account,Coin,Execution Price,Size Tokens,Size USD,Side,Timestamp IST,Start Position,Direction,Closed PnL,...,Order ID,Crossed,Fee,Trade ID,Timestamp,date,sentiment,Win,Hour,Day
0,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,3209.6,0.7145,2293.26,SELL,2024-11-11 08:28:00,0.0,Open Short,0.0,...,45991568323,True,0.80264,526000000000000.0,1730000000000.0,2024-11-11,Extreme Greed,0,8,2024-11-11
1,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,3209.6,2.0,6419.2,SELL,2024-11-11 08:28:00,-0.7145,Open Short,0.0,...,45991568323,True,2.24672,578000000000000.0,1730000000000.0,2024-11-11,Extreme Greed,0,8,2024-11-11
2,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,3209.6,4.6273,14851.78,SELL,2024-11-11 08:28:00,-2.7145,Open Short,0.0,...,45991568323,True,5.198123,549000000000000.0,1730000000000.0,2024-11-11,Extreme Greed,0,8,2024-11-11


In [4]:
# Engineering per trader
def engineer_trader(t, window=30):
    t = t.sort_values(COL_TS).copy()
    t['CumPnL'] = t[COL_PNL].cumsum()
    t['Equity'] = t['CumPnL']
    t['Peak'] = t['Equity'].cummax()
    t['Drawdown'] = t['Equity'] - t['Peak']
    t['DrawdownPct'] = np.where(t['Peak'] != 0, t['Drawdown'] / t['Peak'] * 100, 0.0)
    t['RollingWinRate'] = t['Win'].rolling(window).mean() * 100
    roll_mean = t[COL_PNL].rolling(window).mean()
    roll_std  = t[COL_PNL].rolling(window).std(ddof=0)
    t['RollingSharpe'] = roll_mean / (roll_std + 1e-9)
    return t

trader_dfs = {}
for acc, g in df.groupby(COL_ACCOUNT):
    trader_dfs[acc] = engineer_trader(g)

print("Built trader_dfs for", len(trader_dfs), "traders.")


Built trader_dfs for 32 traders.


In [5]:
# Helper to save figures
def savefig(p):
    plt.savefig(p, bbox_inches='tight', dpi=150); plt.close()

# 1) Aggregate equity curve (sum of PnL over time)
plt.figure()
agg = df.groupby(COL_TS)[COL_PNL].sum().cumsum()
plt.plot(agg.index, agg.values)
plt.title("Aggregate Equity Curve (All Traders)")
plt.xlabel("Time"); plt.ylabel("Cum PnL")
savefig(os.path.join(FIG_GLOBAL, "01_agg_equity.png"))

# 2) Trade frequency by day
plt.figure()
df.groupby('Day').size().plot(kind='bar')
plt.title("Trade Frequency by Day")
plt.xlabel("Day"); plt.ylabel("Trades")
savefig(os.path.join(FIG_GLOBAL, "02_trade_freq_day.png"))

# 3) PnL distribution
plt.figure()
plt.hist(df[COL_PNL].dropna().values, bins=80)
plt.title("PnL Distribution (All Trades)")
plt.xlabel("PnL"); plt.ylabel("Frequency")
savefig(os.path.join(FIG_GLOBAL, "03_pnl_hist.png"))

# 4) PnL by sentiment (boxplot)
plt.figure()
groups = [g[COL_PNL].dropna().values for _, g in df.groupby(COL_SENT)]
labels = [str(s) for s in df[COL_SENT].dropna().unique()]
if len(groups)==0:
    groups=[df[COL_PNL].dropna().values]; labels=["All"]
plt.boxplot(groups, showfliers=False)
plt.xticks(range(1, len(labels)+1), labels, rotation=30, ha='right')
plt.title("PnL by Sentiment (All Trades)")
savefig(os.path.join(FIG_GLOBAL, "04_pnl_by_sentiment.png"))

# 5) PnL by hour (box)
plt.figure()
byh = df.groupby('Hour')[COL_PNL].apply(lambda x: x.dropna().values)
data = [np.array(byh.get(h, np.array([]))) for h in range(24)]
plt.boxplot(data, showfliers=False)
plt.xticks(range(1,25), list(range(24)))
plt.title("PnL by Hour of Day")
savefig(os.path.join(FIG_GLOBAL, "05_pnl_by_hour.png"))

print("Saved global EDA figures to:", FIG_GLOBAL)


Saved global EDA figures to: /content/outputs_final/global_figs


In [6]:
from reportlab.lib.utils import ImageReader

TRADER_LIMIT = None   # set to an integer for testing (e.g., 5), set None for all
accounts = list(trader_dfs.keys()) if TRADER_LIMIT is None else list(trader_dfs.keys())[:TRADER_LIMIT]

def plot_10_for_trader(acc, t):
    tag = str(acc).replace("/","_")
    figs = {}
    # 1 Cum PnL
    plt.figure(); plt.plot(t[COL_TS], t['Equity']); plt.title(f"{acc} - Cumulative PnL"); savefig(os.path.join(FIG_TRADERS, f"{tag}_01_cum_pnl.png")); figs[1]=os.path.join(FIG_TRADERS, f"{tag}_01_cum_pnl.png")
    # 2 Histogram
    plt.figure(); plt.hist(t[COL_PNL].dropna().values, bins=50); plt.title(f"{acc} - PnL Histogram"); savefig(os.path.join(FIG_TRADERS, f"{tag}_02_pnl_hist.png")); figs[2]=os.path.join(FIG_TRADERS, f"{tag}_02_pnl_hist.png")
    # 3 Rolling WinRate
    plt.figure(); plt.plot(t['RollingWinRate'].values); plt.title(f"{acc} - Rolling WinRate (30)"); savefig(os.path.join(FIG_TRADERS, f"{tag}_03_winrate.png")); figs[3]=os.path.join(FIG_TRADERS, f"{tag}_03_winrate.png")
    # 4 Size vs PnL scatter colored by sentiment codes
    sent_codes = pd.Categorical(t[COL_SENT].astype(str)).codes
    plt.figure()
    if COL_SIZE and COL_SIZE in t.columns:
        plt.scatter(t[COL_SIZE].fillna(0).values, t[COL_PNL].fillna(0).values, c=sent_codes, alpha=0.7)
        plt.xlabel("Size USD")
    else:
        plt.scatter(np.arange(len(t)), t[COL_PNL].fillna(0).values, c=sent_codes, alpha=0.7)
        plt.xlabel("Trade Index")
    plt.title(f"{acc} - Size vs PnL (colored by sentiment)"); savefig(os.path.join(FIG_TRADERS, f"{tag}_04_size_vs_pnl.png")); figs[4]=os.path.join(FIG_TRADERS, f"{tag}_04_size_vs_pnl.png")
    # 5 PnL by Sentiment boxplot
    groups = [g[COL_PNL].dropna().values for _, g in t.groupby(COL_SENT)]
    labels = [str(s) for s in t[COL_SENT].dropna().unique()]
    plt.figure()
    if len(groups)==0:
        groups=[t[COL_PNL].dropna().values]; labels=["All"]
    plt.boxplot(groups, showfliers=False)
    plt.xticks(range(1, len(labels)+1), labels, rotation=30, ha='right')
    plt.title(f"{acc} - PnL by Sentiment"); savefig(os.path.join(FIG_TRADERS, f"{tag}_05_pnl_by_sent.png")); figs[5]=os.path.join(FIG_TRADERS, f"{tag}_05_pnl_by_sent.png")
    # 6 Coin heatmap
    plt.figure()
    if COL_COIN and COL_COIN in t.columns:
        pv = t.pivot_table(index=COL_COIN, columns=COL_SENT, values=COL_PNL, aggfunc='mean')
        if pv.size>0:
            plt.imshow(pv.values, aspect='auto')
            plt.xticks(range(pv.shape[1]), [str(c) for c in pv.columns], rotation=30, ha='right')
            plt.yticks(range(pv.shape[0]), [str(i) for i in pv.index])
            plt.title(f"{acc} - Mean PnL Coin x Sentiment")
        else:
            plt.text(0.5,0.5,"Insufficient coin x sentiment data", ha='center'); plt.axis('off')
    else:
        plt.text(0.5,0.5,"No coin column", ha='center'); plt.axis('off')
    savefig(os.path.join(FIG_TRADERS, f"{tag}_06_coin_sent_heatmap.png")); figs[6]=os.path.join(FIG_TRADERS, f"{tag}_06_coin_sent_heatmap.png")
    # 7 Drawdown curve
    plt.figure(); plt.plot(t[COL_TS], t['DrawdownPct']); plt.title(f"{acc} - Drawdown %"); savefig(os.path.join(FIG_TRADERS, f"{tag}_07_drawdown.png")); figs[7]=os.path.join(FIG_TRADERS, f"{tag}_07_drawdown.png")
    # 8 Trade freq timeline (daily)
    plt.figure(); t.groupby(t[COL_TS].dt.date).size().plot(kind='bar'); plt.title(f"{acc} - Trade Freq by Day"); savefig(os.path.join(FIG_TRADERS, f"{tag}_08_trade_freq.png")); figs[8]=os.path.join(FIG_TRADERS, f"{tag}_08_trade_freq.png")
    # 9 Rolling Sharpe
    plt.figure(); plt.plot(t['RollingSharpe'].values); plt.title(f"{acc} - Rolling Sharpe (30)"); savefig(os.path.join(FIG_TRADERS, f"{tag}_09_sharpe.png")); figs[9]=os.path.join(FIG_TRADERS, f"{tag}_09_sharpe.png")
    # 10 PnL by hour (box)
    plt.figure()
    byh = t.groupby('Hour')[COL_PNL].apply(lambda x: x.dropna().values)
    data = [np.array(byh.get(h, np.array([]))) for h in range(24)]
    plt.boxplot(data, showfliers=False)
    plt.xticks(range(1,25), list(range(24)))
    plt.title(f"{acc} - PnL by Hour"); savefig(os.path.join(FIG_TRADERS, f"{tag}_10_pnl_by_hour.png")); figs[10]=os.path.join(FIG_TRADERS, f"{tag}_10_pnl_by_hour.png")
    return figs

def write_trader_md_and_pdf(acc, t, figs):
    # Build text metrics
    total_trades = len(t); wins = int(t['Win'].sum()); losses = total_trades - wins
    winrate = 100*wins/max(1,total_trades)
    total_pnl = float(t[COL_PNL].sum(skipna=True))
    max_dd = float(t['DrawdownPct'].min())
    sharpe_last = float(t['RollingSharpe'].dropna().iloc[-1]) if t['RollingSharpe'].notna().any() else np.nan
    sent_stats = t.groupby(COL_SENT)[COL_PNL].agg(['count','mean','sum']).reset_index()

    # Markdown
    md_lines = []
    md_lines.append(f"# Trader Report – {acc}\n")
    md_lines.append(f"**Total Trades:** {total_trades}  ")
    md_lines.append(f"**Wins / Losses:** {wins} / {losses} (Win Rate: {winrate:.2f}%)  ")
    md_lines.append(f"**Total PnL:** {total_pnl:.2f}  ")
    md_lines.append(f"**Max Drawdown (%):** {max_dd:.2f}  ")
    md_lines.append(f"**Last Rolling Sharpe (30):** {sharpe_last:.3f}\n")
    md_lines.append("## Sentiment Breakdown")
    for _, r in sent_stats.iterrows():
        md_lines.append(f"- **{r[COL_SENT]}** → count: {int(r['count'])}, mean: {r['mean']:.4f}, sum: {r['sum']:.4f}")
    md_lines.append("\n## Figures\n")
    for i in range(1,11):
        md_lines.append(f"![fig{i}]({os.path.relpath(figs[i], start=REPORTS_MD)})")
    md_path = os.path.join(REPORTS_MD, f"{str(acc).replace('/','_')}.md")
    with open(md_path, "w", encoding="utf-8") as f:
        f.write("\n\n".join(md_lines))

    # PDF (simple)
    pdf_path = os.path.join(REPORTS_PDF, f"{str(acc).replace('/','_')}.pdf")
    c = canvas.Canvas(pdf_path, pagesize=A4)
    W, H = A4
    c.setFont("Helvetica-Bold", 16); c.drawCentredString(W/2, H-1.0*inch, f"Trader Report – {acc}")
    y = H-1.6*inch
    c.setFont("Helvetica", 11)
    for ln in md_lines[:6]:
        for chunk in textwrap.wrap(ln, width=95):
            c.drawString(1*inch, y, chunk); y -= 14
    # Place figures two per page
    idxs = list(range(1,11))
    for i in range(0, len(idxs), 2):
        c.showPage()
        c.setFont("Helvetica-Bold", 14); c.drawCentredString(W/2, H-1.0*inch, f"{acc} - Figures")
        y_img = H-1.4*inch
        for j in [idxs[i], idxs[i+1] if i+1 < len(idxs) else None]:
            if j is None: continue
            p = figs[j]
            if os.path.exists(p):
                try:
                    img = ImageReader(p)
                    iw, ih = img.getSize()
                    aspect = iw/ih; h = 3.0*inch; w = h*aspect
                    if w > (W-2*inch): w = W-2*inch; h = w/aspect
                    x = (W-w)/2
                    c.drawImage(p, x, y_img-h, w, h, preserveAspectRatio=True, mask='auto')
                    y_img -= (h + 0.4*inch)
                except:
                    pass
    c.save()
    return md_path, pdf_path

# iterate
generated = []
for acc in tqdm(accounts, desc="Traders"):
    t = trader_dfs[acc]
    figs = plot_10_for_trader(acc, t)
    md_path, pdf_path = write_trader_md_and_pdf(acc, t, figs)
    generated.append((acc, md_path, pdf_path))

print("Per-trader artifacts generated:", len(generated))


Traders: 100%|██████████| 32/32 [02:34<00:00,  4.84s/it]

Per-trader artifacts generated: 32





In [7]:
# Build master summary table
summary_rows = []
for acc, t in trader_dfs.items():
    total_trades = len(t); wins = int(t['Win'].sum())
    winrate = 100*wins/max(1,total_trades)
    total_pnl = float(t[COL_PNL].sum(skipna=True))
    max_dd = float(t['DrawdownPct'].min())
    sharpe_last = float(t['RollingSharpe'].dropna().iloc[-1]) if t['RollingSharpe'].notna().any() else np.nan
    summary_rows.append((acc, total_trades, winrate, total_pnl, max_dd, sharpe_last))

summary = pd.DataFrame(summary_rows, columns=["Account","Trades","WinRate%","TotalPnL","MaxDD%","Sharpe30"])
summary_path = os.path.join(OUTPUT_ROOT, "master_trader_summary.csv")
summary.to_csv(summary_path, index=False)

# Create Master PDF
master_pdf = os.path.join(MASTER_DIR, f"{CANDIDATE}_Master_Report.pdf")
c = canvas.Canvas(master_pdf, pagesize=A4)
W, H = A4

def draw_title(txt, y):
    c.setFont("Helvetica-Bold", 18); c.drawCentredString(W/2, y, txt)

def draw_text_block(lines, start_y):
    y = start_y
    c.setFont("Helvetica", 11)
    for ln in lines:
        for chunk in textwrap.wrap(ln, width=95):
            c.drawString(1*inch, y, chunk); y -= 14
    return y

# Title page
c.showPage()
draw_title("Trader Behavioural Analysis – Executive Report", H-1.0*inch)
c.setFont("Helvetica", 11)
c.drawCentredString(W/2, H-1.4*inch, f"Candidate: {CANDIDATE}   |   Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
c.showPage()

# Executive summary
draw_title("Executive Summary", H-1.0*inch)
summary_stats = [
    f"Traders analyzed: {len(trader_dfs)}",
    f"Total trades: {len(df)}",
    f"Aggregate PnL: {df[COL_PNL].sum():.2f}",
    f"Overall win rate: {df['Win'].mean()*100:.2f}%",
    f"Median Rolling Sharpe (30): {summary['Sharpe30'].median():.3f}"
]
y = draw_text_block(summary_stats, H-1.6*inch)
c.showPage()

# Add global EDA figures
for fn in sorted(os.listdir(FIG_GLOBAL)):
    path = os.path.join(FIG_GLOBAL, fn)
    c.showPage()
    draw_title("Global EDA Snapshot", H-1.0*inch)
    try:
        img = ImageReader(path); iw, ih = img.getSize()
        aspect = iw/ih; h = 5.0*inch; w = h*aspect
        if w > (W-2*inch): w = W-2*inch; h = w/aspect
        c.drawImage(path, (W-w)/2, (H-h)/2, w, h, preserveAspectRatio=True, mask='auto')
    except:
        pass

# Top 10 table by Sharpe
c.showPage()
draw_title("Top 10 Traders (by Sharpe30)", H-1.0*inch)
top10 = summary.sort_values(['Sharpe30','TotalPnL'], ascending=[False, False]).head(10)
y = H-1.4*inch
c.setFont("Helvetica", 10)
for _, r in top10.iterrows():
    line = f"{str(r['Account'])[:20]:<22} Trades:{int(r['Trades']):<5} WinRate:{r['WinRate%']:.1f}%  PnL:{r['TotalPnL']:.0f}  MaxDD:{r['MaxDD%']:.1f}%  Sharpe:{r['Sharpe30']:.3f}"
    for chunk in textwrap.wrap(line, width=95):
        c.drawString(1*inch, y, chunk); y -= 12

c.save()
print("Master PDF:", master_pdf)
print("Master summary CSV:", summary_path)


Master PDF: /content/outputs_final/master/ds_RishabhVerma_Master_Report.pdf
Master summary CSV: /content/outputs_final/master_trader_summary.csv


In [8]:
# Create final folder structure and zip it
final_root = os.path.join(WORKDIR, f"{CANDIDATE}")
if os.path.exists(final_root):
    import shutil
    shutil.rmtree(final_root)
os.makedirs(final_root, exist_ok=True)

# Copy selected artifacts
import shutil
shutil.copy(summary_path, final_root)
shutil.copy(master_pdf, final_root)
# create subfolders and copy
shutil.copytree(FIG_GLOBAL, os.path.join(final_root, "global_figs"))
shutil.copytree(FIG_TRADERS, os.path.join(final_root, "trader_figs"))
shutil.copytree(REPORTS_MD, os.path.join(final_root, "reports_md"))
shutil.copytree(REPORTS_PDF, os.path.join(final_root, "reports_pdf"))

# Zip
zipname = f"{CANDIDATE}.zip"
zip_path = os.path.join(WORKDIR, zipname)
if os.path.exists(zip_path):
    os.remove(zip_path)
shutil.make_archive(base_name=os.path.join(WORKDIR, CANDIDATE), format='zip', root_dir=WORKDIR, base_dir=CANDIDATE)
print("Final zip created:", zip_path)

# Auto-download if in Colab
if IN_COLAB:
    colab_files.download(zip_path)
else:
    print("Not in Colab; download zip from notebook file explorer:", zip_path)


Final zip created: /content/ds_RishabhVerma.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>