<a href="https://colab.research.google.com/github/manikantapolaki/AI-Agent-Project/blob/main/code/wikipedia_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wikipedia transformers pandas langdetect sentencepiece --quiet

In [None]:
from transformers import pipeline
import wikipedia
import pandas as pd
from langdetect import detect, LangDetectException
!pip install diffusers
import torch
from diffusers import DiffusionPipeline
import os

import requests


gen = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
gen.to("cuda")

summarizer = None
try:
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
except Exception:
    try:
        summarizer = pipeline("summarization", model="t5-small")
    except Exception:
        pass

translator_en_to_target = None
try:
    translator_en_to_target = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
except Exception:
    pass

In [None]:
def get_user_input():
    topic = input("Enter the topic you want to search on WEB: ")
    summarize_choice = input("Do you want to summarize the data? (yes/no): ").lower()
    translate_choice = input("Do you want to translate the data? (yes/no): ").lower()
    target_language = None
    if translate_choice == 'yes':
        target_language = input("Enter the target language code for translation (e.g., 'hi' for Hindi, 'fr' for French, 'es' for Spanish): ").lower()
    return topic, summarize_choice, translate_choice, target_language

In [None]:
def get_wikipedia_data(topic):
    try:
        print(f"\nSearching Wikipedia for: '{topic}'...")
        wiki_summary = wikipedia.summary(topic, sentences=10)
        print("Wikipedia summary retrieved.")
        return wiki_summary
    except wikipedia.exceptions.PageError:
        print(f"Error: Wikipedia page for '{topic}' not found.")
        return None
    except wikipedia.exceptions.DisambiguationError as e:
        print(f"Disambiguation Error: '{topic}' could refer to multiple things. Suggestions: {e.options}")
        choice = input("Please be more specific or choose one from the suggestions (enter full suggestion): ")
        return get_wikipedia_data(choice)
    except Exception as e:
        print(f"An unexpected error occurred while fetching Wikipedia data: {e}")
        return None

In [None]:
def process_data(text, summarize_choice, translate_choice, target_language):
    processed_data = {"Original Text": text}

    if summarize_choice == 'yes' and summarizer:
        print("\nSummarizing data...")
        try:
            summary = summarizer(text, min_length=50, max_length=200)[0]['summary_text']
            processed_data["Summarized Text"] = summary
            print("Data summarized.")
        except Exception as e:
            print(f"Error during summarization: {e}")
            processed_data["Summarized Text"] = "Summarization failed."
    elif summarize_choice == 'yes' and not summarizer:
        processed_data["Summarized Text"] = "Summarizer model not loaded, skipping summarization."

    if translate_choice == 'yes' and translator_en_to_target:
        print(f"\nDetecting language of the original text for translation...")
        try:
            detected_lang = detect(text)
            print(f"Detected language: {detected_lang}")

            if detected_lang == 'en':
                print(f"Translating data to {target_language}...")
                try:
                    translation_result = translator_en_to_target(text)[0]['translation_text']
                    processed_data[f"Translated Text ({target_language})"] = translation_result
                    print("Data translated.")
                except Exception as e:
                    print(f"Error during translation: {e}")
                    processed_data[f"Translated Text ({target_language})"] = "Translation failed."
            else:
                print(f"Skipping translation: Translator is set for English source, but detected language is {detected_lang}.")
                processed_data[f"Translated Text ({target_language})"] = f"Skipped (source language not English)."
        except LangDetectException as e:
            print(f"Error detecting language: {e}. Skipping translation.")
            processed_data[f"Translated Text ({target_language})"] = "Language detection failed, skipping translation."
    elif translate_choice == 'yes' and not translator_en_to_target:
        processed_data[f"Translated Text ({target_language})"] = "Translation model not loaded, skipping translation."

    return processed_data

In [None]:
def generate_output_file(data, topic):
    file_name = f"{topic.replace(' ', '_')}_data.txt"
    try:
        with open(file_name, 'w', encoding='utf-8') as f:
            for key, value in data.items():
                f.write(f"--- {key} ---\n")
                f.write(value + "\n\n")
        print(f"\nData successfully saved to '{file_name}'")
    except Exception as e:
        print(f"Error saving data to file: {e}")

    csv_file_name = f"{topic.replace(' ', '_')}_data.csv"
    try:
        df = pd.DataFrame([data])
        df.to_csv(csv_file_name, index=False, encoding='utf-8')
        print(f"Data also saved to '{csv_file_name}' as CSV.")
    except Exception as e:
        print(f"Error saving data to CSV: {e}")

In [None]:
topic, summarize_choice, translate_choice, target_language = get_user_input()

wikipedia_text = get_wikipedia_data(topic)

if wikipedia_text:
    processed_data = process_data(wikipedia_text, summarize_choice, translate_choice, target_language)
    generate_output_file(processed_data, topic)
    ig = int(input("Would u like to generate a img (0/1): "))
    if ig:
      img = gen(topic).images[0]
      display(img)
    else:

      print("No image generated")
else:
    print("No data collected from Wikipedia. Exiting.")

In [None]:


try:
    file_to_display = f"{topic.replace(' ', '_')}_data.txt"
    print(f"\n--- Displaying the content of '{file_to_display}' ---")
    print()
    with open(file_to_display, 'r', encoding='utf-8') as f:
        print(f.read())
    print()
    print(f"--- End of file '{file_to_display}' ---")
except NameError:
    print("The 'topic' variable is not defined. Please run the previous cell first to get the topic.")
except FileNotFoundError:
    print(f"File '{file_to_display}' not found. Make sure you ran the previous cell and it successfully generated the file.")
except Exception as e:
    print(f"An error occurred while displaying the file: {e}")


