<a href="https://colab.research.google.com/github/pranee31/Knowledge-Alignment-Module/blob/main/dsp_lab1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import wikipedia
import requests
import time
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

def fetch_wikipedia_knowledge(target_string):
    try:
        summary = wikipedia.summary(target_string)
    except wikipedia.exceptions.PageError:
        summary = "No Wikipedia summary found."
    except Exception as e:  # Handle generic exceptions
        summary = f"Error fetching Wikipedia summary: {e}"
    return summary

def fetch_google_search_results(target_string, max_retries=3, delay=5):
    for attempt in range(max_retries):
        try:
            url = f"https://www.google.com/search?q={target_string}"
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'}
            response = requests.get(url, headers=headers)
            soup = BeautifulSoup(response.text, 'html.parser')

            # Check if results are found
            results = soup.find_all('div', class_='g')
            if not results:
                return []  # Return an empty list if no results

            snippets = [result.find('div', class_='s3').text for result in results]
            return snippets
        except Exception as e:  # Handle generic exceptions
            if attempt < max_retries - 1:
                print(f"Error fetching Google Search results: {e}. Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print(f"Failed to fetch Google Search results after {max_retries} attempts.")
                return []

def calculate_similarity(text1, text2):
    try:
        model = SentenceTransformer('all-MiniLM-L6-v2')
        embeddings = model.encode([text1, text2])
        similarity_score = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
        return similarity_score
    except Exception as e:  # Handle generic exceptions
        print(f"Error calculating similarity: {e}")
        return 0.0  # Return 0 as a default similarity score

def semantic_similarity_verification(target_string, wikipedia_summary, google_snippets):
    wiki_similarity = calculate_similarity(target_string, wikipedia_summary)

    # Handle empty google_snippets list
    if not google_snippets:
        print("No Google Search results found. Using Wikipedia summary only.")
        return "Wikipedia", wikipedia_summary

    google_similarities = [calculate_similarity(target_string, snippet) for snippet in google_snippets]
    max_google_similarity = max(google_similarities)

    if wiki_similarity > max_google_similarity:
        return "Wikipedia", wikipedia_summary
    else:
        return "Google Snippets", max(google_similarities, key=lambda x: x[1])[0]

def main():
    target_string = input("Enter the target string: ")

    wikipedia_summary = fetch_wikipedia_knowledge(target_string)
    google_snippets = fetch_google_search_results(target_string)

    most_relevant_source, relevant_text = semantic_similarity_verification(target_string, wikipedia_summary, google_snippets)

    print(f"Most relevant source: {most_relevant_source}")
    print("\nRelevant Knowledge:")
    print(relevant_text)

if __name__ == "__main__":
    main()

Enter the target string: mango
Error fetching Google Search results: 'NoneType' object has no attribute 'text'. Retrying in 5 seconds...
Error fetching Google Search results: 'NoneType' object has no attribute 'text'. Retrying in 5 seconds...
Failed to fetch Google Search results after 3 attempts.
No Google Search results found. Using Wikipedia summary only.
Most relevant source: Wikipedia

Relevant Knowledge:
Manga (漫画, IPA: [maŋga] ) are comics or graphic novels originating from Japan. Most manga conform to a style developed in Japan in the late 19th century, and the form has a long history in earlier Japanese art. The term manga is used in Japan to refer to both comics and cartooning. Outside of Japan, the word is typically used to refer to comics originally published in Japan.
In Japan, people of all ages and walks of life read manga. The medium includes works in a broad range of genres: action, adventure, business and commerce, comedy, detective, drama, historical, horror, mystery

In [None]:
!pip install wikipedia requests beautifulsoup4 sentence-transformers scikit-learn

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11679 sha256=9bbcab6a7b1087eff06e94586101a5f7292b2bb8578182c2f11e9f846a301a5e
  Stored in directory: /root/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0
