In [4]:
!pip install transformers datasets

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading

In [5]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, MarianMTModel, MarianTokenizer
from google.colab import files

In [9]:
# Load pre-trained T5 model for summarization
model_name = "t5-large"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def summarize_text(text, max_length=100, min_length=20):
    """Summarize input text using T5 model."""
    input_text = "summarize: " + text
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(input_ids, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to translate text using MarianMT
def translate_text(text, target_language="fr"):
    """Translate input text to the specified language using MarianMT model."""
    model_name = f"Helsinki-NLP/opus-mt-en-{target_language}"
    translator_tokenizer = MarianTokenizer.from_pretrained(model_name)
    translator_model = MarianMTModel.from_pretrained(model_name)

    input_ids = translator_tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)
    translation_ids = translator_model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
    translation = translator_tokenizer.decode(translation_ids[0], skip_special_tokens=True)
    return translation

def summarize_and_translate(text, target_language="ar"):
    """Summarize and then translate text."""
    summary = summarize_text(text)
    translation = translate_text(summary, target_language)
    return summary, translation

# Ask user for task
print("Choose a task: 1 for Summarization  2 for Translation  3 for Summarization & Translation")
task = input("Enter 1, 2, or 3: ")

# Upload text file
uploaded = files.upload()
for filename in uploaded.keys():
    with open(filename, "r", encoding="utf-8") as file:
        text = file.read()

        if task == "1":
            result = summarize_text(text)
            print("\nOriginal Text:\n", text[:500], "...")
            print("\nSummarized Text:\n", result)
        elif task == "2":
            target_lang = input("Enter target language (e.g., fr, de, es, ar): ")
            result = translate_text(text, target_lang)
            print("\nOriginal Text:\n", text[:500], "...")
            print(f"\nTranslated Text ({target_lang}):\n", result)
        elif task == "3":
            target_lang = input("Enter target language for translation (e.g., fr, de, es, ar): ")
            summary, translation = summarize_and_translate(text, target_lang)
            print("\nOriginal Text:\n", text[:500], "...")
            print("\nSummarized Text:\n", summary)
            print(f"\nTranslated Summary ({target_lang}):\n", translation)
        else:
            print("Invalid choice. Please restart the script and select a valid option.")


Choose a task: 1 for Summarization  2 for Translation  3 for Summarization & Translation
Enter 1, 2, or 3: 3


Saving test.txt to test (4).txt
Enter target language for translation (e.g., fr, de, es, ar): ar

Original Text:
 snow, the solid form of water that crystallizes in the atmosphere and, falling to the Earth, covers, permanently or temporarily, about 23 percent of the Earth’s surface.

Snow cover has a significant effect on climate and on plant, animal, and human life. By increasing the reflection of solar radiation and interfering with the conduction of heat from the ground, it induces a cold climate. The low heat conduction protects small plants from the effects of the lowest winter temperatures; on the oth ...

Summarized Text:
 snow cover has a significant effect on climate and on plant, animal, and human life . by increasing the reflection of solar radiation, it induces a cold climate . the late disappearance of snow in the spring delays the growth of plants . heavy snowfalls can severely hinder transporation in variable moderate climates .

Translated Summary (ar):
 فالغطاء الثلجي 