<a href="https://colab.research.google.com/github/sarahibadi/Frankenstein/blob/main/Frankenstein.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install transformers sentencepiece sacrebleu requests -q

from transformers import pipeline
import requests
import textwrap
import sacrebleu

book_url = "https://www.gutenberg.org/cache/epub/84/pg84.txt"

response = requests.get(book_url)
full_text = response.text

print("Raw text preview:\n")
print(full_text[:800])

lines = full_text.splitlines()

clean_lines = lines[50:-200]
clean_text = "\n".join(clean_lines)

print("\nCleaned text preview:\n")
print(clean_text[:800])

paragraphs = [p.strip() for p in clean_text.split("\n\n") if p.strip()]

print(f"\nTotal paragraphs extracted: {len(paragraphs)}")
print("\nExample paragraph:\n")
print(paragraphs[0])

MAX_PARAGRAPHS = 200
paragraphs = paragraphs[:MAX_PARAGRAPHS]
print(f"\nUsing first {len(paragraphs)} paragraphs for translation.")

MAX_CHARS_PER_PARAGRAPH = 400

shortened_paragraphs = []
for p in paragraphs:
    if len(p) > MAX_CHARS_PER_PARAGRAPH:
        shortened_paragraphs.append(p[:MAX_CHARS_PER_PARAGRAPH])
    else:
        shortened_paragraphs.append(p)

paragraphs = shortened_paragraphs

print("\nLoading translation pipeline (this may take a moment)...")

translator = pipeline(
    "translation",
    model="Helsinki-NLP/opus-mt-en-ar"
)

print("Translation pipeline loaded.")


translated_paragraphs = []

print("\nTranslating paragraphs...")

for idx, p in enumerate(paragraphs):
    if not p.strip():
        translated_paragraphs.append("")
        continue

    result = translator(p)[0]["translation_text"]
    translated_paragraphs.append(result)

    if idx < 3:
        print("\n----------------------------------------")
        print(f"Paragraph {idx+1} (English):\n")
        print(textwrap.fill(p, width=80))
        print("\nParagraph {idx+1} (Arabic):\n")
        print(textwrap.fill(result, width=80))

print("\nTranslation finished.")
print(f"Total translated paragraphs: {len(translated_paragraphs)}")


print("\n===== SAMPLE TRANSLATIONS (EN -> AR) =====\n")
for i in [0, 1, 2, 10, 20]:
    if i < len(paragraphs):
        print(f"--- Paragraph {i+1} ---")
        print("English:\n")
        print(textwrap.fill(paragraphs[i], width=80))
        print("\nArabic:\n")
        print(textwrap.fill(translated_paragraphs[i], width=80))
        print("\n" + "="*80 + "\n")


test_sentences_en = [
    "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.",
    "I am already far north of London, and as I walk in the streets of Petersburgh, I feel a cold northern breeze play upon my cheeks.",
    "I have no ambition to lose my life on the post road between St. Petersburgh and Archangel."
]

print("Translating test sentences for BLEU evaluation...\n")

preds_ar = []
for s in test_sentences_en:
    ar = translator(s)[0]["translation_text"]
    preds_ar.append(ar)
    print("EN:", s)
    print("AR (model):", ar)
    print("-" * 80)


references_ar = [
    "سيسرّك أن تسمعي أنه لم تصب أي كارثة بداية هذه الرحلة التي كنت تنظرين إليها بكل هذا التشاؤم.",
    "أنا الآن في شمال لندن، وعندما أمشي في شوارع بطرسبرغ أشعر بنسيم شمالي بارد يلامس خدي.",
    "ليست لدي أي رغبة في أن أفقد حياتي على الطريق البري بين سانت بطرسبرغ وأرخانغيلسك."
]

bleu = sacrebleu.corpus_bleu(preds_ar, [references_ar])

print("\n===== BLEU SCORE (Toy Example) =====")
print(f"BLEU score: {bleu.score:.2f}")
print("\nNote: This BLEU score is computed on only 3 sentences with approximate human references.")
print("It is for demonstration purposes only and is not a rigorous evaluation of the model.")


Raw text preview:

﻿The Project Gutenberg eBook of Frankenstein; Or, The Modern Prometheus
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
before using this eBook.

Title: Frankenstein; Or, The Modern Prometheus

Author: Mary Wollstonecraft Shelley

Release date: October 1, 1993 [eBook #84]
                Most recently updated: September 8, 2025

Language: English

Credits: Judith Boss, Christy Phillips, Lynn Hanninen an

Cleaned text preview:

 Chapter 13
 Chapter 14
 Chapter 15
 Chapter 16
 Chapter 17
 Chapter 18
 Chapter 19
 Chapter 20
 Chapter 21
 Chapter 22
 Chapter 23
 Chapter 24




Letter 1

Device set to use cpu


Translation pipeline loaded.

Translating paragraphs...

----------------------------------------
Paragraph 1 (English):

Chapter 13  Chapter 14  Chapter 15  Chapter 16  Chapter 17  Chapter 18  Chapter
19  Chapter 20  Chapter 21  Chapter 22  Chapter 23  Chapter 24

Paragraph {idx+1} (Arabic):

الفصل 13 الفصل 13 الفصل 13 الفصل 13 الفصل 14 الفصل 15 الفصل 16 الفصل 17 الفصل 18
الفصل 18

----------------------------------------
Paragraph 2 (English):

Letter 1

Paragraph {idx+1} (Arabic):

١ م م م م م م م م م م م م م م م

----------------------------------------
Paragraph 3 (English):

_To Mrs. Saville, England._

Paragraph {idx+1} (Arabic):

إلى السّيدة سافل، إنجلترا.

Translation finished.
Total translated paragraphs: 200

===== SAMPLE TRANSLATIONS (EN -> AR) =====

--- Paragraph 1 ---
English:

Chapter 13  Chapter 14  Chapter 15  Chapter 16  Chapter 17  Chapter 18  Chapter
19  Chapter 20  Chapter 21  Chapter 22  Chapter 23  Chapter 24

Arabic:

الفصل 13 الفصل 13 الفصل 13 الفصل 13 الفصل 1