In [1]:
import pdfplumber
import pandas as pd
import os

sections = []
current_title = None
current_content = ""
root_title = None  # ilk başlığı tutacak

def is_close_color(c, target=(0.1882, 0.3176, 0.3922), tol=0.01):
    if not c or len(c) != 3:
        return False
    return all(abs(ci - ti) < tol for ci, ti in zip(c, target))

# PDF'i işle
with pdfplumber.open("ornek.pdf") as pdf:
    for page in pdf.pages:
        lines = {}
        for char in page.chars:
            top = round(char["top"], 1)
            if top not in lines:
                lines[top] = []
            lines[top].append(char)
        
        for top in sorted(lines.keys()):
            line_chars = lines[top]
            text = "".join(c["text"] for c in line_chars).strip()
            sizes = set(c["size"] for c in line_chars)  # size yine çıkarılıyor
            colors = set(c["non_stroking_color"] for c in line_chars)

            # sadece color kontrolü
            correct_color = any(is_close_color(c) for c in colors)

            if correct_color:
                # ilk başlık geldiğinde root_title olarak sakla
                if root_title is None:
                    root_title = text
                else:
                    text = f"{root_title}>{text}"

                # önceki başlık varsa kaydet
                if current_title:
                    sections.append({
                        "title": current_title,
                        "content": current_content.strip()
                    })
                current_title = text
                current_content = ""
            else:
                current_content += text + " "

# Son başlığı ekle
if current_title:
    sections.append({"title": current_title, "content": current_content.strip()})

# Yeni dataframe
new_df = pd.DataFrame(sections)

# Eski CSV varsa oku, yoksa boş DataFrame
if os.path.exists("data.csv"):
    old_df = pd.read_csv("data.csv")
    combined_df = pd.concat([old_df, new_df], ignore_index=True)
else:
    combined_df = new_df

# Kaydet
combined_df.to_csv("data.csv", index=False, encoding="utf-8")
print(combined_df)

                                                title  \
0                                     Getting Started   
1   Getting Started>Setting Up Your 3DEXPERIENCE P...   
2   Getting Started>Starting the 3DEXPERIENCE Plat...   
3   Getting Started>Discovering Roles, Apps, Busin...   
4               Getting Started>Accessing a Dashboard   
5     Getting Started>Opening an App from the Compass   
6   Getting Started>Opening a Native App from a De...   
7   Getting Started>Selecting a User Profile for D...   
8   Getting Started>Selecting a Mouse Profile for ...   
9                Getting Started>Selecting a Language   
10                       Getting Started>Getting Help   
11                       Getting Started>Getting Help   
12  Getting Started>Displaying Tooltips and Long H...   
13            Getting Started>Getting Contextual Help   
14    Getting Started>Using the User Assistance Panel   
15                Getting Started>Accessing Tutorials   
16  Getting Started>Accessing I

In [2]:
import pdfplumber
import pandas as pd
import os

# ✅ Ayarlar
PDF_ROOT_DIR = "pdf_folder"
OUTPUT_CSV = "data.csv"
TARGET_COLOR = (0.1882, 0.3176, 0.3922)
MIN_TITLE_GAP = 15  # başlıklar arası dikey mesafe farkı (yani çok yakınsa iptal edilecek)

# 📄 CSV birleştirme için
all_sections = []

# 📂 Klasördeki tüm PDF dosyalarını tara
for root, _, files in os.walk(PDF_ROOT_DIR):
    for file in sorted(files):
        if file.lower().endswith(".pdf"):
            pdf_path = os.path.join(root, file)
            print(f"🔍 İşleniyor: {pdf_path}")

            sections = []
            root_title = None
            current_title = None
            current_content = ""
            current_size = None

            with pdfplumber.open(pdf_path) as pdf:
                for page in pdf.pages:
                    lines = {}
                    for char in page.chars:
                        top = round(char["top"], 1)
                        lines.setdefault(top, []).append(char)

                    sorted_tops = sorted(lines.keys())

                    # 🔍 Başlık adayı satırları topla
                    candidates = []
                    for idx, top in enumerate(sorted_tops):
                        line_chars = lines[top]
                        text = "".join(c["text"] for c in line_chars).strip()
                        sizes = set(c["size"] for c in line_chars)
                        colors = set(c["non_stroking_color"] for c in line_chars)

                        has_correct_color = TARGET_COLOR in colors
                        has_correct_size = any(s > 10 for s in sizes)

                        is_title = has_correct_color and has_correct_size
                        candidates.append({
                            "top": top,
                            "text": text,
                            "line_chars": line_chars,
                            "is_title": is_title
                        })

                    # 🔁 Gerçek başlıkları belirle
                    valid_titles = []
                    for i, item in enumerate(candidates):
                        if not item["is_title"]:
                            continue

                        prev = candidates[i - 1] if i > 0 else None
                        next = candidates[i + 1] if i + 1 < len(candidates) else None

                        too_close_prev = prev and prev["is_title"] and abs(item["top"] - prev["top"]) < MIN_TITLE_GAP
                        too_close_next = next and next["is_title"] and abs(next["top"] - item["top"]) < MIN_TITLE_GAP

                        if too_close_prev or too_close_next:
                            item["is_title"] = False  # başlık olarak işleme
                        else:
                            valid_titles.append(item["top"])

                    # 🔁 Satırları işle
                    for top in sorted_tops:
                        line_chars = lines[top]
                        text = "".join(c["text"] for c in line_chars).strip()
                        sizes = set(c["size"] for c in line_chars)
                        colors = set(c["non_stroking_color"] for c in line_chars)

                        has_correct_color = TARGET_COLOR in colors
                        has_correct_size = any(s > 10 for s in sizes)

                        is_title = has_correct_color and has_correct_size and top in valid_titles

                        if is_title:
                            if root_title is None:
                                root_title = text
                            else:
                                text = f"{root_title}>{text}"

                            if current_title:
                                sections.append({
                                    "title": current_title,
                                    "content": current_content.strip(),
                                    "correct_color": True,
                                    "correct_size": True,
                                    "source_pdf": file,
                                    "font_size": current_size if current_size else ""
                                })

                            current_title = text
                            current_content = ""
                            size_vals = [c["size"] for c in line_chars]
                            current_size = sum(size_vals) / len(size_vals) if size_vals else ""
                        else:
                            current_content += text + " "

            if current_title:
                sections.append({
                    "title": current_title,
                    "content": current_content.strip(),
                    "correct_color": True,
                    "correct_size": True,
                    "source_pdf": file,
                    "font_size": current_size if current_size else ""
                })

            print(f"📘 {file} içinden çıkarılan topic sayısı: {len(sections)}")
            all_sections.extend(sections)

# 📊 DataFrame oluştur ve kaydet
new_df = pd.DataFrame(all_sections)

if os.path.exists(OUTPUT_CSV):
    old_df = pd.read_csv(OUTPUT_CSV)
    combined_df = pd.concat([old_df, new_df], ignore_index=True)
else:
    combined_df = new_df

combined_df.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")
print(f"✅ Tüm veriler kaydedildi: {OUTPUT_CSV}")

🔍 İşleniyor: pdf_folder\ornek10.pdf
📘 ornek10.pdf içinden çıkarılan topic sayısı: 17
🔍 İşleniyor: pdf_folder\ornek100.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek100.pdf içinden çıkarılan topic sayısı: 551
🔍 İşleniyor: pdf_folder\ornek101.pdf
📘 ornek101.pdf içinden çıkarılan topic sayısı: 32
🔍 İşleniyor: pdf_folder\ornek102.pdf
📘 ornek102.pdf içinden çıkarılan topic sayısı: 289
🔍 İşleniyor: pdf_folder\ornek103.pdf
📘 ornek103.pdf içinden çıkarılan topic sayısı: 7
🔍 İşleniyor: pdf_folder\ornek104.pdf
📘 ornek104.pdf içinden çıkarılan topic sayısı: 29
🔍 İşleniyor: pdf_folder\ornek105.pdf
📘 ornek105.pdf içinden çıkarılan topic sayısı: 32
🔍 İşleniyor: pdf_folder\ornek106.pdf
📘 ornek106.pdf içinden çıkarılan topic sayısı: 30
🔍 İşleniyor: pdf_folder\ornek107.pdf
📘 ornek107.pdf içinden çıkarılan topic sayısı: 16
🔍 İşleniyor: pdf_folder\ornek108.pdf
📘 ornek108.pdf içinden çıkarılan topic sayısı: 13
🔍 İşleniyor: pdf_folder\ornek109.pdf
📘 ornek109.pdf içinden çıkarılan topic sayısı: 20
🔍 İşleniyor: pdf_folder\ornek11.pdf
📘 ornek11.pdf içinden çıkarılan topic sayısı: 98
🔍 İşleniyor: pdf_folder\ornek110.pdf
📘 ornek110.pdf içinden çıkarılan topic sayıs

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek13.pdf içinden çıkarılan topic sayısı: 447
🔍 İşleniyor: pdf_folder\ornek130.pdf
📘 ornek130.pdf içinden çıkarılan topic sayısı: 39
🔍 İşleniyor: pdf_folder\ornek131.pdf
📘 ornek131.pdf içinden çıkarılan topic sayısı: 31
🔍 İşleniyor: pdf_folder\ornek132.pdf
📘 ornek132.pdf içinden çıkarılan topic sayısı: 16
🔍 İşleniyor: pdf_folder\ornek133.pdf
📘 ornek133.pdf içinden çıkarılan topic sayısı: 131
🔍 İşleniyor: pdf_folder\ornek134.pdf
📘 ornek134.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek135.pdf
📘 ornek135.pdf içinden çıkarılan topic sayısı: 254
🔍 İşleniyor: pdf_folder\ornek136.pdf
📘 ornek136.pdf içinden çıkarılan topic sayısı: 710
🔍 İşleniyor: pdf_folder\ornek137.pdf
📘 ornek137.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek138.pdf
📘 ornek138.pdf içinden çıkarılan topic sayısı: 294
🔍 İşleniyor: pdf_folder\ornek139.pdf
📘 ornek139.pdf içinden çıkarılan topic sayısı: 293
🔍 İşleniyor: pdf_folder\ornek14.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek14.pdf içinden çıkarılan topic sayısı: 223
🔍 İşleniyor: pdf_folder\ornek140.pdf
📘 ornek140.pdf içinden çıkarılan topic sayısı: 232
🔍 İşleniyor: pdf_folder\ornek141.pdf
📘 ornek141.pdf içinden çıkarılan topic sayısı: 313
🔍 İşleniyor: pdf_folder\ornek142.pdf
📘 ornek142.pdf içinden çıkarılan topic sayısı: 263
🔍 İşleniyor: pdf_folder\ornek143.pdf
📘 ornek143.pdf içinden çıkarılan topic sayısı: 214
🔍 İşleniyor: pdf_folder\ornek144.pdf
📘 ornek144.pdf içinden çıkarılan topic sayısı: 246
🔍 İşleniyor: pdf_folder\ornek145.pdf
📘 ornek145.pdf içinden çıkarılan topic sayısı: 208
🔍 İşleniyor: pdf_folder\ornek146.pdf
📘 ornek146.pdf içinden çıkarılan topic sayısı: 193
🔍 İşleniyor: pdf_folder\ornek147.pdf
📘 ornek147.pdf içinden çıkarılan topic sayısı: 421
🔍 İşleniyor: pdf_folder\ornek148.pdf
📘 ornek148.pdf içinden çıkarılan topic sayısı: 1300
🔍 İşleniyor: pdf_folder\ornek149.pdf
📘 ornek149.pdf içinden çıkarılan topic sayısı: 108
🔍 İşleniyor: pdf_folder\ornek15.pdf
📘 ornek15.pdf içinden çıkarılan t

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek154.pdf içinden çıkarılan topic sayısı: 1099
🔍 İşleniyor: pdf_folder\ornek155.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek155.pdf içinden çıkarılan topic sayısı: 274
🔍 İşleniyor: pdf_folder\ornek156.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek156.pdf içinden çıkarılan topic sayısı: 814
🔍 İşleniyor: pdf_folder\ornek157.pdf
📘 ornek157.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek158.pdf
📘 ornek158.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek159.pdf
📘 ornek159.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek16.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek16.pdf içinden çıkarılan topic sayısı: 452
🔍 İşleniyor: pdf_folder\ornek160.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek160.pdf içinden çıkarılan topic sayısı: 114
🔍 İşleniyor: pdf_folder\ornek161.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek161.pdf içinden çıkarılan topic sayısı: 89
🔍 İşleniyor: pdf_folder\ornek162.pdf
📘 ornek162.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek163.pdf
📘 ornek163.pdf içinden çıkarılan topic sayısı: 9
🔍 İşleniyor: pdf_folder\ornek164.pdf
📘 ornek164.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek165.pdf
📘 ornek165.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek166.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek166.pdf içinden çıkarılan topic sayısı: 11
🔍 İşleniyor: pdf_folder\ornek167.pdf
📘 ornek167.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek168.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek168.pdf içinden çıkarılan topic sayısı: 8
🔍 İşleniyor: pdf_folder\ornek169.pdf
📘 ornek169.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek17.pdf
📘 ornek17.pdf içinden çıkarılan topic sayısı: 32
🔍 İşleniyor: pdf_folder\ornek170.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek170.pdf içinden çıkarılan topic sayısı: 3
🔍 İşleniyor: pdf_folder\ornek171.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek171.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek172.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek172.pdf içinden çıkarılan topic sayısı: 19
🔍 İşleniyor: pdf_folder\ornek173.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek173.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek174.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek174.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek175.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek175.pdf içinden çıkarılan topic sayısı: 11
🔍 İşleniyor: pdf_folder\ornek176.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek176.pdf içinden çıkarılan topic sayısı: 3
🔍 İşleniyor: pdf_folder\ornek177.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek177.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek178.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek178.pdf içinden çıkarılan topic sayısı: 9
🔍 İşleniyor: pdf_folder\ornek179.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek179.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek18.pdf
📘 ornek18.pdf içinden çıkarılan topic sayısı: 58
🔍 İşleniyor: pdf_folder\ornek180.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek180.pdf içinden çıkarılan topic sayısı: 233
🔍 İşleniyor: pdf_folder\ornek181.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek181.pdf içinden çıkarılan topic sayısı: 516
🔍 İşleniyor: pdf_folder\ornek182.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek182.pdf içinden çıkarılan topic sayısı: 296
🔍 İşleniyor: pdf_folder\ornek183.pdf
📘 ornek183.pdf içinden çıkarılan topic sayısı: 40
🔍 İşleniyor: pdf_folder\ornek184.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek184.pdf içinden çıkarılan topic sayısı: 639
🔍 İşleniyor: pdf_folder\ornek185.pdf
📘 ornek185.pdf içinden çıkarılan topic sayısı: 13
🔍 İşleniyor: pdf_folder\ornek186.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek186.pdf içinden çıkarılan topic sayısı: 28
🔍 İşleniyor: pdf_folder\ornek187.pdf
📘 ornek187.pdf içinden çıkarılan topic sayısı: 29
🔍 İşleniyor: pdf_folder\ornek188.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek188.pdf içinden çıkarılan topic sayısı: 284
🔍 İşleniyor: pdf_folder\ornek189.pdf
📘 ornek189.pdf içinden çıkarılan topic sayısı: 79
🔍 İşleniyor: pdf_folder\ornek19.pdf
📘 ornek19.pdf içinden çıkarılan topic sayısı: 106
🔍 İşleniyor: pdf_folder\ornek190.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek190.pdf içinden çıkarılan topic sayısı: 250
🔍 İşleniyor: pdf_folder\ornek191.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek191.pdf içinden çıkarılan topic sayısı: 325
🔍 İşleniyor: pdf_folder\ornek192.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek192.pdf içinden çıkarılan topic sayısı: 587
🔍 İşleniyor: pdf_folder\ornek193.pdf
📘 ornek193.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek194.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek194.pdf içinden çıkarılan topic sayısı: 205
🔍 İşleniyor: pdf_folder\ornek195.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek195.pdf içinden çıkarılan topic sayısı: 362
🔍 İşleniyor: pdf_folder\ornek196.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek196.pdf içinden çıkarılan topic sayısı: 372
🔍 İşleniyor: pdf_folder\ornek197.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek197.pdf içinden çıkarılan topic sayısı: 332
🔍 İşleniyor: pdf_folder\ornek198.pdf
📘 ornek198.pdf içinden çıkarılan topic sayısı: 17
🔍 İşleniyor: pdf_folder\ornek199.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek199.pdf içinden çıkarılan topic sayısı: 36
🔍 İşleniyor: pdf_folder\ornek2.pdf
📘 ornek2.pdf içinden çıkarılan topic sayısı: 108
🔍 İşleniyor: pdf_folder\ornek20.pdf
📘 ornek20.pdf içinden çıkarılan topic sayısı: 42
🔍 İşleniyor: pdf_folder\ornek200.pdf
📘 ornek200.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek201.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek201.pdf içinden çıkarılan topic sayısı: 176
🔍 İşleniyor: pdf_folder\ornek202.pdf
📘 ornek202.pdf içinden çıkarılan topic sayısı: 7
🔍 İşleniyor: pdf_folder\ornek203.pdf
📘 ornek203.pdf içinden çıkarılan topic sayısı: 118
🔍 İşleniyor: pdf_folder\ornek204.pdf
📘 ornek204.pdf içinden çıkarılan topic sayısı: 158
🔍 İşleniyor: pdf_folder\ornek205.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek205.pdf içinden çıkarılan topic sayısı: 36
🔍 İşleniyor: pdf_folder\ornek206.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek206.pdf içinden çıkarılan topic sayısı: 28
🔍 İşleniyor: pdf_folder\ornek207.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek207.pdf içinden çıkarılan topic sayısı: 103
🔍 İşleniyor: pdf_folder\ornek208.pdf
📘 ornek208.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek209.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek209.pdf içinden çıkarılan topic sayısı: 18
🔍 İşleniyor: pdf_folder\ornek21.pdf
📘 ornek21.pdf içinden çıkarılan topic sayısı: 172
🔍 İşleniyor: pdf_folder\ornek210.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek210.pdf içinden çıkarılan topic sayısı: 201
🔍 İşleniyor: pdf_folder\ornek211.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek211.pdf içinden çıkarılan topic sayısı: 67
🔍 İşleniyor: pdf_folder\ornek212.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek212.pdf içinden çıkarılan topic sayısı: 82
🔍 İşleniyor: pdf_folder\ornek213.pdf
📘 ornek213.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek214.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek214.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek215.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek215.pdf içinden çıkarılan topic sayısı: 15
🔍 İşleniyor: pdf_folder\ornek216.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek216.pdf içinden çıkarılan topic sayısı: 42
🔍 İşleniyor: pdf_folder\ornek217.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek217.pdf içinden çıkarılan topic sayısı: 13
🔍 İşleniyor: pdf_folder\ornek218.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek218.pdf içinden çıkarılan topic sayısı: 52
🔍 İşleniyor: pdf_folder\ornek219.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek219.pdf içinden çıkarılan topic sayısı: 7
🔍 İşleniyor: pdf_folder\ornek22.pdf
📘 ornek22.pdf içinden çıkarılan topic sayısı: 221
🔍 İşleniyor: pdf_folder\ornek220.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek220.pdf içinden çıkarılan topic sayısı: 83
🔍 İşleniyor: pdf_folder\ornek221.pdf
📘 ornek221.pdf içinden çıkarılan topic sayısı: 3
🔍 İşleniyor: pdf_folder\ornek222.pdf
📘 ornek222.pdf içinden çıkarılan topic sayısı: 25
🔍 İşleniyor: pdf_folder\ornek223.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek223.pdf içinden çıkarılan topic sayısı: 20
🔍 İşleniyor: pdf_folder\ornek224.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek224.pdf içinden çıkarılan topic sayısı: 20
🔍 İşleniyor: pdf_folder\ornek225.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek225.pdf içinden çıkarılan topic sayısı: 105
🔍 İşleniyor: pdf_folder\ornek226.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek226.pdf içinden çıkarılan topic sayısı: 46
🔍 İşleniyor: pdf_folder\ornek227.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek227.pdf içinden çıkarılan topic sayısı: 48
🔍 İşleniyor: pdf_folder\ornek228.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek228.pdf içinden çıkarılan topic sayısı: 49
🔍 İşleniyor: pdf_folder\ornek229.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek229.pdf içinden çıkarılan topic sayısı: 19
🔍 İşleniyor: pdf_folder\ornek23.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek23.pdf içinden çıkarılan topic sayısı: 42
🔍 İşleniyor: pdf_folder\ornek230.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek230.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek231.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek231.pdf içinden çıkarılan topic sayısı: 27
🔍 İşleniyor: pdf_folder\ornek232.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek232.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek233.pdf
📘 ornek233.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek234.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek234.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek235.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek235.pdf içinden çıkarılan topic sayısı: 38
🔍 İşleniyor: pdf_folder\ornek236.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek236.pdf içinden çıkarılan topic sayısı: 19
🔍 İşleniyor: pdf_folder\ornek237.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek237.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek238.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek238.pdf içinden çıkarılan topic sayısı: 52
🔍 İşleniyor: pdf_folder\ornek239.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek239.pdf içinden çıkarılan topic sayısı: 23
🔍 İşleniyor: pdf_folder\ornek24.pdf
📘 ornek24.pdf içinden çıkarılan topic sayısı: 62
🔍 İşleniyor: pdf_folder\ornek240.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek240.pdf içinden çıkarılan topic sayısı: 80
🔍 İşleniyor: pdf_folder\ornek241.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek241.pdf içinden çıkarılan topic sayısı: 18
🔍 İşleniyor: pdf_folder\ornek242.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek242.pdf içinden çıkarılan topic sayısı: 12
🔍 İşleniyor: pdf_folder\ornek243.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek243.pdf içinden çıkarılan topic sayısı: 9
🔍 İşleniyor: pdf_folder\ornek244.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek244.pdf içinden çıkarılan topic sayısı: 4
🔍 İşleniyor: pdf_folder\ornek245.pdf
📘 ornek245.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek246.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek246.pdf içinden çıkarılan topic sayısı: 82
🔍 İşleniyor: pdf_folder\ornek247.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek247.pdf içinden çıkarılan topic sayısı: 18
🔍 İşleniyor: pdf_folder\ornek248.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBB

📘 ornek248.pdf içinden çıkarılan topic sayısı: 164
🔍 İşleniyor: pdf_folder\ornek249.pdf
📘 ornek249.pdf içinden çıkarılan topic sayısı: 10
🔍 İşleniyor: pdf_folder\ornek25.pdf
📘 ornek25.pdf içinden çıkarılan topic sayısı: 512
🔍 İşleniyor: pdf_folder\ornek250.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek250.pdf içinden çıkarılan topic sayısı: 114
🔍 İşleniyor: pdf_folder\ornek251.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek251.pdf içinden çıkarılan topic sayısı: 47
🔍 İşleniyor: pdf_folder\ornek252.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek252.pdf içinden çıkarılan topic sayısı: 148
🔍 İşleniyor: pdf_folder\ornek253.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek253.pdf içinden çıkarılan topic sayısı: 19
🔍 İşleniyor: pdf_folder\ornek254.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek254.pdf içinden çıkarılan topic sayısı: 44
🔍 İşleniyor: pdf_folder\ornek255.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek255.pdf içinden çıkarılan topic sayısı: 57
🔍 İşleniyor: pdf_folder\ornek256.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek256.pdf içinden çıkarılan topic sayısı: 27
🔍 İşleniyor: pdf_folder\ornek257.pdf
📘 ornek257.pdf içinden çıkarılan topic sayısı: 6
🔍 İşleniyor: pdf_folder\ornek258.pdf
📘 ornek258.pdf içinden çıkarılan topic sayısı: 9
🔍 İşleniyor: pdf_folder\ornek259.pdf
📘 ornek259.pdf içinden çıkarılan topic sayısı: 123
🔍 İşleniyor: pdf_folder\ornek26.pdf
📘 ornek26.pdf içinden çıkarılan topic sayısı: 1398
🔍 İşleniyor: pdf_folder\ornek260.pdf
📘 ornek260.pdf içinden çıkarılan topic sayısı: 5
🔍 İşleniyor: pdf_folder\ornek261.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek261.pdf içinden çıkarılan topic sayısı: 95
🔍 İşleniyor: pdf_folder\ornek262.pdf
📘 ornek262.pdf içinden çıkarılan topic sayısı: 26
🔍 İşleniyor: pdf_folder\ornek263.pdf
📘 ornek263.pdf içinden çıkarılan topic sayısı: 145
🔍 İşleniyor: pdf_folder\ornek264.pdf
📘 ornek264.pdf içinden çıkarılan topic sayısı: 727
🔍 İşleniyor: pdf_folder\ornek265.pdf
📘 ornek265.pdf içinden çıkarılan topic sayısı: 21
🔍 İşleniyor: pdf_folder\ornek266.pdf
📘 ornek266.pdf içinden çıkarılan topic sayısı: 26
🔍 İşleniyor: pdf_folder\ornek267.pdf
📘 ornek267.pdf içinden çıkarılan topic sayısı: 31
🔍 İşleniyor: pdf_folder\ornek268.pdf
📘 ornek268.pdf içinden çıkarılan topic sayısı: 950
🔍 İşleniyor: pdf_folder\ornek269.pdf
📘 ornek269.pdf içinden çıkarılan topic sayısı: 213
🔍 İşleniyor: pdf_folder\ornek27.pdf
📘 ornek27.pdf içinden çıkarılan topic sayısı: 1065
🔍 İşleniyor: pdf_folder\ornek270.pdf
📘 ornek270.pdf içinden çıkarılan topic sayısı: 35
🔍 İşleniyor: pdf_folder\ornek271.pdf
📘 ornek271.pdf içinden çıkarılan topic 

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek30.pdf içinden çıkarılan topic sayısı: 206
🔍 İşleniyor: pdf_folder\ornek300.pdf
📘 ornek300.pdf içinden çıkarılan topic sayısı: 30
🔍 İşleniyor: pdf_folder\ornek301.pdf
📘 ornek301.pdf içinden çıkarılan topic sayısı: 78
🔍 İşleniyor: pdf_folder\ornek302.pdf
📘 ornek302.pdf içinden çıkarılan topic sayısı: 20
🔍 İşleniyor: pdf_folder\ornek303.pdf
📘 ornek303.pdf içinden çıkarılan topic sayısı: 34
🔍 İşleniyor: pdf_folder\ornek304.pdf
📘 ornek304.pdf içinden çıkarılan topic sayısı: 84
🔍 İşleniyor: pdf_folder\ornek305.pdf
📘 ornek305.pdf içinden çıkarılan topic sayısı: 7
🔍 İşleniyor: pdf_folder\ornek306.pdf
📘 ornek306.pdf içinden çıkarılan topic sayısı: 48
🔍 İşleniyor: pdf_folder\ornek307.pdf
📘 ornek307.pdf içinden çıkarılan topic sayısı: 24
🔍 İşleniyor: pdf_folder\ornek308.pdf
📘 ornek308.pdf içinden çıkarılan topic sayısı: 804
🔍 İşleniyor: pdf_folder\ornek309.pdf
📘 ornek309.pdf içinden çıkarılan topic sayısı: 652
🔍 İşleniyor: pdf_folder\ornek31.pdf
📘 ornek31.pdf içinden çıkarılan topic sayıs

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek32.pdf içinden çıkarılan topic sayısı: 219
🔍 İşleniyor: pdf_folder\ornek320.pdf
📘 ornek320.pdf içinden çıkarılan topic sayısı: 240
🔍 İşleniyor: pdf_folder\ornek321.pdf
📘 ornek321.pdf içinden çıkarılan topic sayısı: 25
🔍 İşleniyor: pdf_folder\ornek322.pdf
📘 ornek322.pdf içinden çıkarılan topic sayısı: 81
🔍 İşleniyor: pdf_folder\ornek323.pdf
📘 ornek323.pdf içinden çıkarılan topic sayısı: 40
🔍 İşleniyor: pdf_folder\ornek324.pdf
📘 ornek324.pdf içinden çıkarılan topic sayısı: 62
🔍 İşleniyor: pdf_folder\ornek325.pdf
📘 ornek325.pdf içinden çıkarılan topic sayısı: 39
🔍 İşleniyor: pdf_folder\ornek326.pdf
📘 ornek326.pdf içinden çıkarılan topic sayısı: 62
🔍 İşleniyor: pdf_folder\ornek327.pdf
📘 ornek327.pdf içinden çıkarılan topic sayısı: 77
🔍 İşleniyor: pdf_folder\ornek328.pdf
📘 ornek328.pdf içinden çıkarılan topic sayısı: 66
🔍 İşleniyor: pdf_folder\ornek329.pdf
📘 ornek329.pdf içinden çıkarılan topic sayısı: 41
🔍 İşleniyor: pdf_folder\ornek33.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek33.pdf içinden çıkarılan topic sayısı: 162
🔍 İşleniyor: pdf_folder\ornek330.pdf
📘 ornek330.pdf içinden çıkarılan topic sayısı: 29
🔍 İşleniyor: pdf_folder\ornek331.pdf
📘 ornek331.pdf içinden çıkarılan topic sayısı: 25
🔍 İşleniyor: pdf_folder\ornek332.pdf
📘 ornek332.pdf içinden çıkarılan topic sayısı: 51
🔍 İşleniyor: pdf_folder\ornek333.pdf
📘 ornek333.pdf içinden çıkarılan topic sayısı: 37
🔍 İşleniyor: pdf_folder\ornek334.pdf
📘 ornek334.pdf içinden çıkarılan topic sayısı: 9
🔍 İşleniyor: pdf_folder\ornek335.pdf
📘 ornek335.pdf içinden çıkarılan topic sayısı: 26
🔍 İşleniyor: pdf_folder\ornek336.pdf
📘 ornek336.pdf içinden çıkarılan topic sayısı: 17
🔍 İşleniyor: pdf_folder\ornek337.pdf
📘 ornek337.pdf içinden çıkarılan topic sayısı: 35
🔍 İşleniyor: pdf_folder\ornek338.pdf
📘 ornek338.pdf içinden çıkarılan topic sayısı: 25
🔍 İşleniyor: pdf_folder\ornek339.pdf
📘 ornek339.pdf içinden çıkarılan topic sayısı: 24
🔍 İşleniyor: pdf_folder\ornek34.pdf
📘 ornek34.pdf içinden çıkarılan topic sayısı:

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek364.pdf içinden çıkarılan topic sayısı: 514
🔍 İşleniyor: pdf_folder\ornek365.pdf
📘 ornek365.pdf içinden çıkarılan topic sayısı: 11
🔍 İşleniyor: pdf_folder\ornek366.pdf
📘 ornek366.pdf içinden çıkarılan topic sayısı: 440
🔍 İşleniyor: pdf_folder\ornek367.pdf


Cannot set gray stroke color because /'P4063' is an invalid float value
Cannot set gray non-stroke color because /'P4063' is an invalid float value
Cannot set gray stroke color because /'P4064' is an invalid float value
Cannot set gray non-stroke color because /'P4064' is an invalid float value
Cannot set gray stroke color because /'P4065' is an invalid float value
Cannot set gray non-stroke color because /'P4065' is an invalid float value
Cannot set gray stroke color because /'P4066' is an invalid float value
Cannot set gray non-stroke color because /'P4066' is an invalid float value
Cannot set gray stroke color because /'P4067' is an invalid float value
Cannot set gray non-stroke color because /'P4067' is an invalid float value
Cannot set gray stroke color because /'P4068' is an invalid float value
Cannot set gray non-stroke color because /'P4068' is an invalid float value
Cannot set gray stroke color because /'P4114' is an invalid float value
Cannot set gray non-stroke color because

📘 ornek367.pdf içinden çıkarılan topic sayısı: 1138
🔍 İşleniyor: pdf_folder\ornek368.pdf
📘 ornek368.pdf içinden çıkarılan topic sayısı: 15
🔍 İşleniyor: pdf_folder\ornek369.pdf
📘 ornek369.pdf içinden çıkarılan topic sayısı: 133
🔍 İşleniyor: pdf_folder\ornek37.pdf
📘 ornek37.pdf içinden çıkarılan topic sayısı: 342
🔍 İşleniyor: pdf_folder\ornek370.pdf


Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek370.pdf içinden çıkarılan topic sayısı: 331
🔍 İşleniyor: pdf_folder\ornek371.pdf
📘 ornek371.pdf içinden çıkarılan topic sayısı: 25
🔍 İşleniyor: pdf_folder\ornek372.pdf
📘 ornek372.pdf içinden çıkarılan topic sayısı: 217
🔍 İşleniyor: pdf_folder\ornek373.pdf
📘 ornek373.pdf içinden çıkarılan topic sayısı: 3
🔍 İşleniyor: pdf_folder\ornek374.pdf
📘 ornek374.pdf içinden çıkarılan topic sayısı: 377
🔍 İşleniyor: pdf_folder\ornek375.pdf
📘 ornek375.pdf içinden çıkarılan topic sayısı: 83
🔍 İşleniyor: pdf_folder\ornek376.pdf
📘 ornek376.pdf içinden çıkarılan topic sayısı: 133
🔍 İşleniyor: pdf_folder\ornek377.pdf
📘 ornek377.pdf içinden çıkarılan topic sayısı: 161
🔍 İşleniyor: pdf_folder\ornek378.pdf
📘 ornek378.pdf içinden çıkarılan topic sayısı: 106
🔍 İşleniyor: pdf_folder\ornek379.pdf
📘 ornek379.pdf içinden çıkarılan topic sayısı: 70
🔍 İşleniyor: pdf_folder\ornek38.pdf
📘 ornek38.pdf içinden çıkarılan topic sayısı: 1
🔍 İşleniyor: pdf_folder\ornek380.pdf
📘 ornek380.pdf içinden çıkarılan topic sa

Could get FontBBox from font descriptor because None cannot be parsed as 4 floats
Could get FontBBox from font descriptor because None cannot be parsed as 4 floats


📘 ornek54.pdf içinden çıkarılan topic sayısı: 290
🔍 İşleniyor: pdf_folder\ornek55.pdf
📘 ornek55.pdf içinden çıkarılan topic sayısı: 79
🔍 İşleniyor: pdf_folder\ornek56.pdf
📘 ornek56.pdf içinden çıkarılan topic sayısı: 309
🔍 İşleniyor: pdf_folder\ornek57.pdf
📘 ornek57.pdf içinden çıkarılan topic sayısı: 110
🔍 İşleniyor: pdf_folder\ornek58.pdf
📘 ornek58.pdf içinden çıkarılan topic sayısı: 18
🔍 İşleniyor: pdf_folder\ornek59.pdf
📘 ornek59.pdf içinden çıkarılan topic sayısı: 33
🔍 İşleniyor: pdf_folder\ornek6.pdf
📘 ornek6.pdf içinden çıkarılan topic sayısı: 371
🔍 İşleniyor: pdf_folder\ornek60.pdf
📘 ornek60.pdf içinden çıkarılan topic sayısı: 105
🔍 İşleniyor: pdf_folder\ornek61.pdf
📘 ornek61.pdf içinden çıkarılan topic sayısı: 129
🔍 İşleniyor: pdf_folder\ornek62.pdf
📘 ornek62.pdf içinden çıkarılan topic sayısı: 217
🔍 İşleniyor: pdf_folder\ornek63.pdf
📘 ornek63.pdf içinden çıkarılan topic sayısı: 253
🔍 İşleniyor: pdf_folder\ornek64.pdf
📘 ornek64.pdf içinden çıkarılan topic sayısı: 205
🔍 İşleniy

In [None]:
import pandas as pd

# CSV dosyasını oku
df = pd.read_csv("data.csv")

# Aynı title ve content'e sahip tekrar eden satırları grupla
duplicate_rows = df.duplicated(subset=["title", "content"], keep="false")

# Bu satırların sayısını yazdır
count = duplicate_rows.sum()
print(f"🔁 Aynı title ve content'e sahip satır sayısı: {count}")

🔁 Aynı title ve content'e sahip satır sayısı: 0


In [9]:
# İstenmeyen sütunları çıkar
columns_to_drop = ["correct_color", "correct_size", "source_pdf", "font_size"]
df = df.drop(columns=columns_to_drop, errors='ignore')  # errors='ignore' eksik sütun varsa hata vermez

In [None]:
import pandas as pd

# veri.csv'yi oku
df_veri = pd.read_csv("veri.csv")

# mevcut df ile birleştir
df_birlesik = pd.concat([df, df_veri], ignore_index=True)

# Sonucu yeni dosyaya kaydet
df_birlesik.to_csv("birlesik_veri.csv", index=False)
print("✅ df ile veri.csv başarıyla birleştirildi ve birlesik_veri.csv olarak kaydedildi.")


✅ df ile veri.csv başarıyla birleştirildi ve birlesik_veri.csv olarak kaydedildi.


In [17]:
import pandas as pd
df=pd.read_csv("birlesik_veri.csv")
df.head()

Unnamed: 0,title,content
0,Getting Started,3DEXPERIENCE provides an easy-to-use interface...
1,Getting Started>Setting Up Your 3DEXPERIENCE P...,If you are the platform administrator you need...
2,Getting Started>Starting the 3DEXPERIENCE Plat...,Once you have been granted the required access...
3,"Getting Started>Discovering Roles, Apps, Busin...","You can discover all the roles, apps, business..."
4,Getting Started>Accessing a Dashboard,Dashboards let you gather and reuse content fr...


In [18]:
print(df.iloc[-10]["title"])

Installation and Setup > 3DEXPERIENCE Platform > 3DEXPERIENCE Configuration and Customization > 3DSpace Data Model > Unified Typing Reference Dictionary > Packages > XCADModeler Types


In [1]:
import pandas as pd

# Örneğin mevcut veri çerçevesi
new_df = pd.read_csv("data.csv")

# Yeni df oluştur: son 991 veriyi al
#new_df = combined_df.tail(991)


In [3]:
new_df.tail()

Unnamed: 0,title,content,correct_color,correct_size,source_pdf,font_size
57760,Systems Architecture>Object Naming and Types,The objects created in Preliminary Risk Assess...,True,True,ornek99.pdf,10.665
57761,Systems Architecture>Labels Configuration,Readers can refer to specific components in th...,True,True,ornek99.pdf,10.665
57762,Systems Architecture>Defining Time-Out Value,https://help.3ds.com/2024x/english/DSDoc/Front...,True,True,ornek99.pdf,10.665
57763,Systems Architecture>Attributes on Occurrences,Readers may need to define attributes on occur...,True,True,ornek99.pdf,10.665
57764,Systems Architecture>Custom Attributes for RAM...,"As an Administrator, you can use the Attribute...",True,True,ornek99.pdf,10.665


In [55]:
import pandas as pd

# Boşluk karakterlerini de boş sayabilmek için content'i temizle
new_df["content"] = new_df["content"].fillna("").astype(str).str.strip()

# Yeni DataFrame'e sadece şu satırları al:
# - Eğer title ilk kez görülüyorsa: her durumda dahil et
# - Eğer daha önce görüldüyse ama content boş değilse: dahil et
# - Eğer daha önce görüldüyse ve content boşsa: dahil etme

seen_titles = set()
clean_rows = []

for _, row in new_df.iterrows():
    title = row["title"]
    content = row["content"]

    if title not in seen_titles:
        clean_rows.append(row)
        seen_titles.add(title)
    else:
        if content.strip():  # boş değilse
            clean_rows.append(row)

# Yeni temiz DataFrame
clean_df = pd.DataFrame(clean_rows)

# Sonuç
print(f"🧹 Temizlenmiş veri satır sayısı: {clean_df.shape[0]}")

# CSV'ye kaydetmek istersen:
# clean_df.to_csv("veri_pdf_cleaned.csv", index=False)


🧹 Temizlenmiş veri satır sayısı: 682


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["content"] = new_df["content"].fillna("").astype(str).str.strip()


In [62]:
# content'i boş olan ilk 100 satırı filtrele
empty_content_rows = new_df[new_df["content"].fillna("").str.strip() == ""]

# İlk 100 satırı al
empty_content_rows_head = empty_content_rows

# CSV olarak kaydet
empty_content_rows_head.to_csv("a.csv", index=False, encoding="utf-8")

print("✅ İlk 100 boş içerikli satır a.csv dosyasına kaydedildi.")


✅ İlk 100 boş içerikli satır a.csv dosyasına kaydedildi.


In [32]:
# Aynı title’dan birden fazla varsa tümünü sil (tamamen benzersizler kalsın)
df_unique = new_df[~new_df["title"].duplicated(keep=False)]

# Sonuç
print(f"✅ Eşsiz title'lara sahip kalan satır sayısı: {df_unique.shape[0]}")

# Yeni CSV'ye istersen kaydet
# df_unique.to_csv("veri_pdf_unique.csv", index=False)

✅ Eşsiz title'lara sahip kalan satır sayısı: 317


In [None]:
# 🎯 Hedef font size
hedef_size = 10.665000408824994

# 📊 Bu font size'a sahip satırları filtrele
new_df = new_df[new_df["font_size"] == hedef_size].copy()

# ✅ Sonucu göster
print(f"🎯 Font size {hedef_size} olan satır sayısı: {hedef_df.shape[0]}")

In [24]:
filtered_df.tail

<bound method NDFrame.tail of                                                   title  \
6039                      Information Intelligence Apps   
6040      Information Intelligence Apps>B.I. Essentials   
6041           Information Intelligence Apps>What's New   
6042  Information Intelligence Apps>About B.I. Essen...   
6044  Information Intelligence Apps>Social and Colla...   
...                                                 ...   
7019            Information Intelligence Apps>Detecting   
7020              Information Intelligence Apps>Savings   
7021              Information Intelligence Apps>Tracked   
7022                 Information Intelligence Apps>Task   
7023          Information Intelligence Apps>Progression   

                                                content correct_color  \
6039  Powered by the 3DEXPERIENCE platform, Informat...          True   
6040  B.I. Essentials is an Information Intelligence...          True   
6041         There are no enhancements in 

In [14]:
import pandas as pd

# Boş veya sadece boşluk olan content satırlarını filtrele
new_df = new_df[new_df['content'].notna()]  # NaN olmayanlar
new_df = new_df[new_df['content'].str.strip() != ""]  # sadece boşluk içerenleri de çıkar

print("✅ Boş content'ler silindi ve dosya güncellendi.")


✅ Boş content'ler silindi ve dosya güncellendi.


In [16]:
# "Information Intelligence Apps>" ile başlayan title'ları filtrele
filtered = new_df[new_df['title'].str.startswith("Information Intelligence Apps>")]

# Sonucu yazdır
print(f"🔢 'Information Intelligence Apps>' ile başlayan satır sayısı: {len(filtered)}")

🔢 'Information Intelligence Apps>' ile başlayan satır sayısı: 550


In [8]:
combined_df.info

<bound method DataFrame.info of                                                   title  \
0                                       Getting Started   
1     Getting Started>Setting Up Your 3DEXPERIENCE P...   
2     Getting Started>Starting the 3DEXPERIENCE Plat...   
3     Getting Started>Discovering Roles, Apps, Busin...   
4                 Getting Started>Accessing a Dashboard   
...                                                 ...   
4629                       Fluid Systems>Specifications   
4630                       Fluid Systems>Administration   
4631  Fluid Systems>Data Setup: Fluidic Engineering ...   
4632  Fluid Systems>Accessing Engineering Specificat...   
4633  Fluid Systems>Types for Engineering Specificat...   

                                                content correct_color  \
0     3DEXPERIENCE provides an easy-to-use interface...           NaN   
1     If you are the platform administrator you need...           NaN   
2     Once you have been granted the req

In [None]:
import pandas as pd
df=pd.read_csv("veri_pdf.csv")
print(df.iloc[-2]["content"])