# PDF Chart & Table Rebuilder (Vector‑First with Vision Fallback)

This notebook **detects tables and charts** (bar & pie) from a PDF page, **extracts their data**, and **recreates** them.

### Why this works reliably
- **Vector-first**: we inspect PDF drawing objects (rectangles, paths, images) with **PyMuPDF** and text/lines with **pdfplumber**. Vector PDFs are most reliably parsed.
- **Vision fallback**: when a page is scanned/non-vector, we use **OpenCV** (morphology & Hough) and **OCR** to recover tables and chart values.

### Outputs
- `detections.csv` — list of detected regions with type and bounding boxes
- For **tables**: CSV files with reconstructed cell text
- For **bar charts**: CSV of values (+ optional x labels), plus a recreated bar chart PNG
- For **pie charts**: CSV of slice percentages, plus a recreated pie chart PNG
- Annotated page preview PNG of all detections
- (Optional) A PPTX summarizing the recreations

### Install (run locally if needed)
- `pip install pymupdf pdfplumber opencv-python pytesseract pdf2image pandas numpy matplotlib python-pptx`
- OS deps: **tesseract** OCR, **poppler** (for pdf2image), optionally **ghostscript** for some table cases


## 1) Configure Inputs

In [None]:
from pathlib import Path
PDF_PATH = Path('your_file.pdf')  # <-- set to your PDF
PAGE_INDEX = 0                    # 0-based page index
DPI = 400                         # higher DPI improves fallback OCR
OUT_DIR = Path('rebuilder_output')
OUT_DIR.mkdir(parents=True, exist_ok=True)
PDF_PATH.resolve(), PAGE_INDEX, OUT_DIR.resolve()

## 2) Rasterize Page (for Vision & Preview)

In [None]:
import importlib
def rasterize(pdf_path, page_index, dpi):
    if importlib.util.find_spec('fitz') is not None:
        import fitz
        doc = fitz.open(pdf_path)
        page = doc[page_index]
        mat = fitz.Matrix(dpi/72, dpi/72)
        pix = page.get_pixmap(matrix=mat, alpha=False)
        return pix
    elif importlib.util.find_spec('pdf2image') is not None:
        from pdf2image import convert_from_path
        img = convert_from_path(pdf_path, dpi=dpi, first_page=page_index+1, last_page=page_index+1)[0]
        return img
    else:
        raise RuntimeError('Install PyMuPDF or pdf2image for rasterization')

pix = rasterize(PDF_PATH, PAGE_INDEX, DPI)
from PIL import Image
if hasattr(pix, 'samples'):
    import numpy as np
    import fitz  # type: ignore
    page_png = OUT_DIR / f'page_{PAGE_INDEX+1:03d}.png'
    pix.save(page_png.as_posix())
else:
    page_png = OUT_DIR / f'page_{PAGE_INDEX+1:03d}.png'
    pix.save(page_png)
page_png

## 3) Vector-First Signals (pdfplumber + PyMuPDF)

In [None]:
import cv2, numpy as np, json, importlib
page_bgr = cv2.imread(str(page_png))
H, W = page_bgr.shape[:2]
scale = DPI/72.0
vector = {'text_boxes':[], 'images':[], 'rect_hits':None, 'circ_hits':None}

# pdfplumber: words (text boxes) + embedded images
if importlib.util.find_spec('pdfplumber') is not None:
    import pdfplumber
    with pdfplumber.open(PDF_PATH) as pdf:
        pg = pdf.pages[PAGE_INDEX]
        words = pg.extract_words(use_text_flow=True) or []
        lines = {}
        for w in words:
            yc = (w['top']+w['bottom'])/2
            key = round(yc/6)*6
            lines.setdefault(key, []).append(w)
        for _, ws in lines.items():
            x0 = min(w['x0'] for w in ws); y0 = min(w['top'] for w in ws)
            x1 = max(w['x1'] for w in ws); y1 = max(w['bottom'] for w in ws)
            vector['text_boxes'].append((int(x0*scale), int(y0*scale), int(x1*scale), int(y1*scale)))
        for im in pg.images:
            x0,y0,x1,y1 = im['x0'],im['top'],im['x1'],im['bottom']
            vector['images'].append((int(x0*scale), int(y0*scale), int(x1*scale), int(y1*scale)))

# PyMuPDF: drawings → rectangle/circle heatmaps (hints for bars/pies)
if importlib.util.find_spec('fitz') is not None:
    import fitz
    doc = fitz.open(PDF_PATH)
    pg = doc[PAGE_INDEX]
    rect_hits = np.zeros((H,W), dtype=np.uint8)
    circ_hits = np.zeros((H,W), dtype=np.uint8)
    for d in pg.get_drawings():
        for item in d['items']:
            if item[0] == 'rect':
                x0,y0,x1,y1 = item[1]
                X0,X1 = int(x0*scale), int(x1*scale)
                Y0,Y1 = int(y0*scale), int(y1*scale)
                rect_hits[max(0,Y0):min(H,Y1), max(0,X0):min(W,X1)] = 1
            elif item[0] == 'curve':
                pts = item[1]
                xs = [p[0] for p in pts]; ys=[p[1] for p in pts]
                X0,X1 = int(min(xs)*scale), int(max(xs)*scale)
                Y0,Y1 = int(min(ys)*scale), int(max(ys)*scale)
                circ_hits[max(0,Y0):min(H,Y1), max(0,X0):min(W,X1)] = 1
    vector['rect_hits'] = rect_hits
    vector['circ_hits'] = circ_hits

len(vector['text_boxes']), len(vector['images'])

## 4) Vision Proposals + Classification (table / bar / pie)

In [None]:
gray = cv2.cvtColor(page_bgr, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150)
dil = cv2.dilate(edges, cv2.getStructuringElement(cv2.MORPH_RECT,(5,5)), iterations=2)
cnts,_ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cands=[]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    if w*h>8000 and w>60 and h>60 and w<W*0.98 and h<H*0.98:
        cands.append((x,y,w,h))

def merge_xyxy(boxes, pad=8):
    if not boxes: return []
    boxes=[(x-pad,y-pad,x+w+pad,y+h+pad) for (x,y,w,h) in sorted(boxes)]
    changed=True
    while changed:
        changed=False; new=[]
        while boxes:
            a=boxes.pop(0)
            ax0,ay0,ax1,ay1=a
            merged=False
            for i,b in enumerate(boxes):
                bx0,by0,bx1,by1=b
                if not (ax1<bx0 or bx1<ax0 or ay1<by0 or by1<ay0):
                    a=(min(ax0,bx0),min(ay0,by0),max(ax1,bx1),max(ay1,by1))
                    boxes.pop(i); changed=True; merged=True; break
            new.append(a)
        boxes=new
    out=[]
    for (x0,y0,x1,y1) in boxes:
        out.append((max(0,x0),max(0,y0),min(W-1,x1),min(H-1,y1)))
    return out

regions = merge_xyxy(cands)

def table_mask(gray):
    thr = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    inv = 255-thr
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (max(10,W//60),1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,max(10,H//60)))
    h_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, h_kernel)
    v_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, v_kernel)
    return cv2.bitwise_and(h_lines, v_lines)

def bar_score(roi):
    g=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
    t=cv2.threshold(cv2.GaussianBlur(g,(3,3),0),0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    if (t==0).sum()>(t==255).sum(): t=cv2.bitwise_not(t)
    t=cv2.morphologyEx(t, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)), iterations=1)
    cnts,_=cv2.findContours(t,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    bars=[]
    for c in cnts:
        x,y,w,h=cv2.boundingRect(c)
        if h/max(1.0,w)>1.3 and w*h>200 and h>20:
            bars.append((x,y,w,h))
    if len(bars)<3: return 0.0, bars
    import numpy as np
    bottoms=[y+h for (x,y,w,h) in bars]
    std=np.std(bottoms) if len(bottoms)>1 else 999
    return float(len(bars)/(1+std/5.0)), bars

def pie_score(roi):
    g=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
    g=cv2.medianBlur(g,5)
    circles=cv2.HoughCircles(g,cv2.HOUGH_GRADIENT,dp=1.2,minDist=30,param1=120,param2=40,minRadius=20,maxRadius=0)
    if circles is None: return 0.0,None,[]
    import numpy as np, math
    c=max(np.uint16(np.around(circles))[0], key=lambda z:z[2])
    cx,cy,r=int(c[0]),int(c[1]),int(c[2])
    edges=cv2.Canny(g,60,180)
    lines=cv2.HoughLinesP(edges,1,np.pi/180,threshold=60,minLineLength=int(r*0.6),maxLineGap=10)
    radials=[]
    if lines is not None:
        for l in lines[:,0,:]:
            x1,y1,x2,y2=l
            def dist_point_line(px,py,a,b):
                ax,ay=a; bx,by=b
                import math
                lab=math.hypot(bx-ax,by-ay)
                if lab==0: return math.hypot(px-ax,py-ay)
                t=max(0,min(1,((px-ax)*(bx-ax)+(py-ay)*(by-ay))/(lab*lab)))
                qx=ax+t*(bx-ax); qy=ay+t*(by-ay)
                return math.hypot(px-qx,py-qy)
            if dist_point_line(cx,cy,(x1,y1),(x2,y2)) < r*0.08:
                radials.append((x1,y1,x2,y2))
    score = 1.0 + 0.3*len(radials)
    return float(score),(cx,cy,r),radials

tmask = table_mask(gray)
tcnts,_ = cv2.findContours(tmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
table_regions=[]
for c in tcnts:
    x,y,w,h = cv2.boundingRect(c)
    if w*h>5000 and w>80 and h>60:
        table_regions.append((x,y,x+w,y+h))

text_mask = np.zeros((H,W), dtype=np.uint8)
for (x0,y0,x1,y1) in vector['text_boxes']:
    cv2.rectangle(text_mask,(x0,y0),(x1,y1),255,-1)

rect_hits = vector['rect_hits']; circ_hits = vector['circ_hits']
detections=[]
for (x0,y0,x1,y1) in regions:
    roi = page_bgr[y0:y1, x0:x1]
    is_table=False
    for (tx0,ty0,tx1,ty1) in table_regions:
        if not (x1<tx0 or tx1<x0 or y1<ty0 or ty1<y0):
            iw=min(x1,tx1)-max(x0,tx0); ih=min(y1,ty1)-max(y0,ty0)
            if iw>10 and ih>10: is_table=True; break
    if is_table:
        detections.append({'type':'table','x0':x0,'y0':y0,'x1':x1,'y1':y1}); continue
    tr = text_mask[y0:y1,x0:x1].mean()/255.0 if (y1-y0)*(x1-x0)>0 else 0.0
    if tr>0.35:
        detections.append({'type':'text','x0':x0,'y0':y0,'x1':x1,'y1':y1}); continue
    bs,_ = bar_score(roi)
    ps,_,_ = pie_score(roi)
    rsum=int(rect_hits[y0:y1,x0:x1].sum()) if rect_hits is not None else 0
    csum=int(circ_hits[y0:y1,x0:x1].sum()) if circ_hits is not None else 0
    if ps >= max(1.2, bs*1.3) or (csum>rsum and csum>1000):
        detections.append({'type':'pie_chart','x0':x0,'y0':y0,'x1':x1,'y1':y1})
    elif bs >= max(1.0, ps*1.2) or (rsum>csum and rsum>1000):
        detections.append({'type':'bar_chart','x0':x0,'y0':y0,'x1':x1,'y1':y1})
    else:
        detections.append({'type':'image_other','x0':x0,'y0':y0,'x1':x1,'y1':y1})

import pandas as pd
det_df = pd.DataFrame(detections)
DET_CSV = OUT_DIR / 'detections.csv'
det_df.to_csv(DET_CSV, index=False)
DET_CSV, det_df['type'].value_counts().to_dict()

## 5) Extractors: Table → CSV, Bar → Values, Pie → Percentages + Rebuild Plots

In [None]:
import csv, os, pandas as pd, numpy as np, cv2, importlib, math
EX_DIR = OUT_DIR / 'extracts'; EX_DIR.mkdir(exist_ok=True)
def crop_save(label, idx, x0,y0,x1,y1):
    roi = page_bgr[y0:y1, x0:x1]
    p = EX_DIR / f'{label}_{idx}.png'
    cv2.imwrite(p.as_posix(), roi)
    return p, roi

def extract_table(idx, x0,y0,x1,y1):
    p, roi = crop_save('table', idx, x0,y0,x1,y1)
    out_csv = EX_DIR / f'table_{idx}.csv'
    rows=[]
    if importlib.util.find_spec('pytesseract') is None:
        rows=[["(Install pytesseract for OCR table extraction)"]]
    else:
        import pytesseract
        g=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
        g=cv2.threshold(g,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
        data=pytesseract.image_to_data(g, output_type=pytesseract.Output.DICT, config='--psm 6')
        line_bins={}
        for i,txt in enumerate(data['text']):
            s=txt.strip()
            if not s: continue
            key=round(data['top'][i]/10)*10
            line_bins.setdefault(key,[]).append((data['left'][i], s))
        for _,cells in sorted(line_bins.items()):
            rows.append([t for _,t in sorted(cells)])
        if not rows:
            rows=[["(No OCR text found in table region)"]]
    with open(out_csv,'w',newline='',encoding='utf-8') as f:
        writer=csv.writer(f); writer.writerows(rows)
    return out_csv

def ocr_ticks(roi, side='left', margin=50):
    if importlib.util.find_spec('pytesseract') is None:
        return []
    import pytesseract
    h,w=roi.shape[:2]
    sub = roi[:, :min(margin,w)] if side=='left' else roi[:, max(0,w-margin):]
    g=cv2.cvtColor(sub,cv2.COLOR_BGR2GRAY)
    g=cv2.threshold(g,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    data=pytesseract.image_to_data(g, output_type=pytesseract.Output.DICT, config='--psm 6')
    ticks=[]
    for i,s in enumerate(data['text']):
        v=s.strip().replace(',','')
        if v.replace('.','',1).isdigit():
            y=data['top'][i]+data['height'][i]//2
            ticks.append({'y':y, 'val':float(v)})
    ticks=sorted(ticks, key=lambda d:d['y'])
    uniq=[]
    for t in ticks:
        if not uniq or abs(t['y']-uniq[-1]['y'])>6: uniq.append(t)
    return uniq

def find_bars(roi):
    g=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
    t=cv2.threshold(cv2.GaussianBlur(g,(3,3),0),0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    if (t==0).sum()>(t==255).sum(): t=cv2.bitwise_not(t)
    t=cv2.morphologyEx(t, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)), iterations=1)
    cnts,_=cv2.findContours(t, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bars=[]
    for c in cnts:
        x,y,w,h=cv2.boundingRect(c)
        if h/max(1.0,w)>1.3 and w*h>200 and h>20: bars.append((x,y,w,h))
    return sorted(bars, key=lambda b:b[0])

def map_heights(bars, ypix_base, ypix_top, ymin, ymax):
    span = max(1, ypix_base-ypix_top)
    out=[]
    for i,(x,y,w,h) in enumerate(bars, start=1):
        ratio=(ypix_base - y)/span
        out.append({'index':i,'x':x,'y_top':y,'w':w,'h_px':h,'value':ymin + ratio*(ymax-ymin)})
    return out

def extract_bar(idx, x0,y0,x1,y1):
    p, roi = crop_save('bar', idx, x0,y0,x1,y1)
    bars = find_bars(roi)
    ticks = ocr_ticks(roi,'left') or ocr_ticks(roi,'right')
    if len(ticks)>=2:
        t1, t2 = ticks[0], ticks[-1]
        ypix_base = max(t1['y'], t2['y']); ypix_top = min(t1['y'], t2['y'])
        ymin, ymax = min(t1['val'],t2['val']), max(t1['val'],t2['val'])
        mapped = map_heights(bars, ypix_base, ypix_top, ymin, ymax)
        mapping = {'source':'ocr_ticks','ypix_base':ypix_base,'ypix_top':ypix_top,'ymin':ymin,'ymax':ymax}
    else:
        if bars:
            ypix_base = max(y+h for (x,y,w,h) in bars); ypix_top = min(y for (x,y,w,h) in bars)
        else:
            ypix_base = roi.shape[0]-5; ypix_top = 5
        mapped = map_heights(bars, ypix_base, ypix_top, 0.0, 1.0)
        mapping = {'source':'normalized','ypix_base':ypix_base,'ypix_top':ypix_top,'ymin':0.0,'ymax':1.0}
    df = pd.DataFrame(mapped)
    out_csv = EX_DIR / f'bar_{idx}.csv'
    df.to_csv(out_csv, index=False)
    # try x labels OCR beneath baseline
    try:
        import pytesseract
        baseline = mapping['ypix_base']
        band = roi[min(roi.shape[0]-1, baseline+3): min(roi.shape[0], baseline+50), :]
        g=cv2.cvtColor(band,cv2.COLOR_BGR2GRAY)
        g=cv2.threshold(g,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
        data=pytesseract.image_to_data(g, output_type=pytesseract.Output.DICT, config='--psm 6')
        centers=[x+w//2 for (x,y,w,h) in bars]; labels=['']*len(centers)
        items=[]
        for i,s in enumerate(data['text']):
            t=s.strip();
            if not t: continue
            cx = data['left'][i]+data['width'][i]//2
            items.append((cx,t))
        items.sort()
        for j,c in enumerate(centers):
            if items:
                ii=min(range(len(items)), key=lambda k: abs(items[k][0]-c))
                labels[j]=items[ii][1]
        if len(labels)==len(df):
            df['label']=labels
            df.to_csv(out_csv, index=False)
    except Exception:
        pass
    # Recreate
    import matplotlib.pyplot as plt
    plt.figure(figsize=(8,5))
    xs = df['label'] if 'label' in df.columns else df['index'].astype(str)
    plt.bar(xs, df['value'].astype(float).values)
    plt.ylabel('Value')
    plt.title(f'Bar {idx}')
    plt.tight_layout()
    png = EX_DIR / f'bar_{idx}.png'
    plt.savefig(png.as_posix(), dpi=200)
    plt.show()
    return out_csv, png, mapping

def pie_geometry(roi):
    g=cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
    g=cv2.medianBlur(g,5)
    circles=cv2.HoughCircles(g,cv2.HOUGH_GRADIENT,dp=1.2,minDist=30,param1=120,param2=40,minRadius=20,maxRadius=0)
    if circles is None: return None
    import numpy as np
    c=max(np.uint16(np.around(circles))[0], key=lambda z:z[2])
    return int(c[0]), int(c[1]), int(c[2])

def pie_slices(roi, cx,cy,r):
    edges=cv2.Canny(cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY),60,180)
    lines=cv2.HoughLinesP(edges,1,np.pi/180,threshold=60,minLineLength=int(r*0.6),maxLineGap=10)
    angles=[]
    if lines is not None:
        for l in lines[:,0,:]:
            x1,y1,x2,y2=l
            def dist_point_line(px,py,a,b):
                ax,ay=a; bx,by=b
                lab=math.hypot(bx-ax,by-ay)
                if lab==0: return math.hypot(px-ax,py-ay)
                t=max(0,min(1,((px-ax)*(bx-ax)+(py-ay)*(by-ay))/(lab*lab)))
                qx=ax+t*(bx-ax); qy=ay+t*(by-ay)
                return math.hypot(px-qx,py-qy)
            if dist_point_line(cx,cy,(x1,y1),(x2,y2)) < r*0.08:
                import math
                angles+=[(math.degrees(math.atan2(y1-cy,x1-cx))+360)%360,
                         (math.degrees(math.atan2(y2-cy,x2-cx))+360)%360]
    angles = sorted(list(set(int(round(a)) for a in angles)))
    if len(angles)<3:
        return [{'slice':1,'angle_deg':360,'pct':100.0}]
    diffs=[]
    for i in range(len(angles)):
        a1=angles[i]; a2=angles[(i+1)%len(angles)]
        diffs.append((a2-a1)%360)
    total=sum(diffs)
    return [{'slice':i+1,'angle_deg':d,'pct':(d/total*100.0 if total else 0)} for i,d in enumerate(diffs)]

def extract_pie(idx, x0,y0,x1,y1):
    p, roi = crop_save('pie', idx, x0,y0,x1,y1)
    geom = pie_geometry(roi)
    if geom is None:
        data=[{'slice':1,'angle_deg':360,'pct':100.0}]
    else:
        cx,cy,r=geom
        data=pie_slices(roi, cx,cy,r)
    df=pd.DataFrame(data)
    out_csv = EX_DIR / f'pie_{idx}.csv'
    df.to_csv(out_csv, index=False)
    import matplotlib.pyplot as plt
    plt.figure(figsize=(5,5))
    plt.pie(df['pct'].astype(float).values, labels=df['slice'].astype(str).values, autopct='%1.1f%%')
    plt.title(f'Pie {idx}')
    plt.tight_layout()
    png = EX_DIR / f'pie_{idx}.png'
    plt.savefig(png.as_posix(), dpi=200)
    plt.show()
    return out_csv, png

summary=[]
for i,row in det_df.iterrows():
    x0,y0,x1,y1 = map(int, (row.x0,row.y0,row.x1,row.y1))
    if row.type=='table':
        csvp = extract_table(i+1, x0,y0,x1,y1)
        summary.append({'type':'table','index':i+1,'csv':str(csvp)})
    elif row.type=='bar_chart':
        csvp, pngp, mapping = extract_bar(i+1, x0,y0,x1,y1)
        summary.append({'type':'bar_chart','index':i+1,'csv':str(csvp),'png':str(pngp),'mapping':mapping})
    elif row.type=='pie_chart':
        csvp, pngp = extract_pie(i+1, x0,y0,x1,y1)
        summary.append({'type':'pie_chart','index':i+1,'csv':str(csvp),'png':str(pngp)})
    else:
        pass

SUM_JSON = OUT_DIR / 'extraction_summary.json'
with open(SUM_JSON,'w',encoding='utf-8') as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)
SUM_JSON, len(summary)

## 6) Annotated Preview & Optional PPTX

In [None]:
annot = page_bgr.copy()
colors = {
    'text': (255,0,0),
    'table': (0,255,255),
    'bar_chart': (0,255,0),
    'pie_chart': (0,165,255),
    'image_other': (0,0,255)
}
for _,r in det_df.iterrows():
    x0,y0,x1,y1 = int(r.x0),int(r.y0),int(r.x1),int(r.y1)
    cv2.rectangle(annot,(x0,y0),(x1,y1),colors.get(r.type,(255,255,255)),2)
    cv2.putText(annot,r.type,(x0,max(0,y0-5)),cv2.FONT_HERSHEY_SIMPLEX,0.6,colors.get(r.type,(255,255,255)),2,cv2.LINE_AA)
ANN = OUT_DIR / 'annotated_preview.png'
cv2.imwrite(ANN.as_posix(), annot)
ANN

### (Optional) Build PPTX

In [None]:
import importlib
if importlib.util.find_spec('pptx') is None:
    print('python-pptx not installed; skipping PPTX')
else:
    from pptx import Presentation
    from pptx.util import Inches
    prs = Presentation()
    s = prs.slides.add_slide(prs.slide_layouts[0])
    s.shapes.title.text = 'Chart & Table Rebuilder Summary'
    s.placeholders[1].text = f'{PDF_PATH.name} — Page {PAGE_INDEX+1}'
    s2 = prs.slides.add_slide(prs.slide_layouts[6])
    s2.shapes.add_picture(ANN.as_posix(), Inches(0.5), Inches(0.8), width=Inches(9))
    bars=[x for x in summary if x['type']=='bar_chart']
    pies=[x for x in summary if x['type']=='pie_chart']
    tbls=[x for x in summary if x['type']=='table']
    if bars:
        s3 = prs.slides.add_slide(prs.slide_layouts[6])
        s3.shapes.add_picture(bars[0]['png'], Inches(1), Inches(1), width=Inches(8))
    if pies:
        s4 = prs.slides.add_slide(prs.slide_layouts[6])
        s4.shapes.add_picture(pies[0]['png'], Inches(2.5), Inches(1.5), width=Inches(5))
    if tbls:
        import pandas as pd
        df = pd.read_csv(tbls[0]['csv'], header=None)
        s5 = prs.slides.add_slide(prs.slide_layouts[6])
        rows, cols = df.shape
        table = s5.shapes.add_table(rows+1, cols, Inches(0.5), Inches(1), Inches(9), Inches(5)).table
        for j in range(cols): table.cell(0,j).text = f'Col {j+1}'
        for i in range(rows):
            for j in range(cols):
                table.cell(i+1,j).text = str(df.iat[i,j])
    PPTX = OUT_DIR / 'rebuilder_summary.pptx'
    prs.save(PPTX.as_posix())
    PPTX