In [68]:
# !pip install wikipedia

In [69]:
# pip install wikipedia requests beautifulsoup4 googlesearch-python

In [89]:
# pip install concurrent-log-handler

In [65]:
import wikipedia
import requests
from tkinter import Tk, Frame, Entry, Button, Text, Scrollbar, END, Radiobutton, StringVar, Label, messagebox
from tkinter.ttk import Progressbar
import webbrowser
from bs4 import BeautifulSoup
import threading
from googlesearch import search
import re

class ContentFetcher:
    def __init__(self, root):
        self.root = root
        self.setup_gui()
        
    def setup_gui(self):
        self.root.title("Smart Content Fetcher")
        self.root.geometry("800x600")
        self.root.resizable(True, True)
        self.root.configure(bg="#f0f2f5")
        
        # Main container
        main_frame = Frame(self.root, bg="#f0f2f5", padx=20, pady=20)
        main_frame.pack(fill="both", expand=True)
        
        # Search section
        search_frame = Frame(main_frame, bg="#f0f2f5")
        search_frame.pack(fill="x", pady=(0, 15))
        
        Label(search_frame, text="Search Topic:", bg="#f0f2f5", font=("Arial", 12)).pack(side="left", padx=(0, 10))
        
        self.search_entry = Entry(search_frame, font=("Arial", 12), width=40)
        self.search_entry.pack(side="left", fill="x", expand=True, padx=(0, 10))
        
        self.search_btn = Button(search_frame, text="Search", command=self.start_search, 
                               bg="#4CAF50", fg="white", font=("Arial", 10, "bold"))
        self.search_btn.pack(side="left")
        
        # Options frame
        options_frame = Frame(main_frame, bg="#f0f2f5")
        options_frame.pack(fill="x", pady=(0, 15))
        
        self.content_type = StringVar(value="verified")
        
        Radiobutton(options_frame, text="Verified Summary", variable=self.content_type, 
                   value="verified", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Radiobutton(options_frame, text="Wikipedia Summary", variable=self.content_type, 
                   value="wiki_summary", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Radiobutton(options_frame, text="Full Content", variable=self.content_type, 
                   value="full", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        
        # Results section
        results_frame = Frame(main_frame, bg="#ffffff", bd=2, relief="solid")
        results_frame.pack(fill="both", expand=True)
        
        self.results_text = Text(results_frame, font=("Arial", 11), wrap="word", 
                                padx=10, pady=10, state="disabled")
        scrollbar = Scrollbar(results_frame, command=self.results_text.yview)
        self.results_text.configure(yscrollcommand=scrollbar.set)
        
        scrollbar.pack(side="right", fill="y")
        self.results_text.pack(fill="both", expand=True)
        
        # Progress bar
        self.progress = Progressbar(main_frame, mode="indeterminate")
        self.progress.pack(fill="x", pady=(10, 0))
        
        # Bottom buttons
        bottom_frame = Frame(main_frame, bg="#f0f2f5")
        bottom_frame.pack(fill="x", pady=(15, 0))
        
        Button(bottom_frame, text="Search Web", command=self.search_web, 
              bg="#2196F3", fg="white", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Button(bottom_frame, text="Clear", command=self.clear_results, 
              bg="#f44336", fg="white", font=("Arial", 10)).pack(side="left")
        
    def start_search(self):
        query = self.search_entry.get().strip()
        if not query:
            messagebox.showwarning("Warning", "Please enter a search term")
            return
            
        self.progress.start()
        self.search_btn.config(state="disabled")
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.insert(END, "Searching... Please wait.")
        self.results_text.config(state="disabled")
        
        # Run search in separate thread to prevent GUI freezing
        threading.Thread(target=self.perform_search, args=(query,), daemon=True).start()
        
    def perform_search(self, query):
        try:
            content_type = self.content_type.get()
            
            if content_type == "verified":
                result = self.get_verified_content(query)
            elif content_type == "wiki_summary":
                result = self.get_wikipedia_summary(query)
            elif content_type == "full":
                result = self.get_full_content(query)
                
            self.display_results(result)
            
        except Exception as e:
            self.display_results(f"Error: {str(e)}")
        finally:
            self.progress.stop()
            self.search_btn.config(state="normal")
    
    def get_verified_content(self, query):
        """Get content from multiple sources and verify consistency"""
        try:
            # Get results from multiple sources
            wiki_summary = self.get_wikipedia_summary(query)
            web_content = self.search_web_content(query)
            
            # Simple verification - check if key terms appear in both
            key_terms = set(query.lower().split())
            wiki_terms = set(wiki_summary.lower().split())
            web_terms = set(web_content.lower().split())
            
            common_terms = key_terms.intersection(wiki_terms).intersection(web_terms)
            
            if len(common_terms) / len(key_terms) > 0.5:  # If >50% terms match
                # Return the more detailed version
                return web_content if len(web_content) > len(wiki_summary) else wiki_summary
            else:
                return f"Warning: Sources disagree on this topic.\n\nWikipedia says:\n{wiki_summary}\n\nWeb sources say:\n{web_content}"
                
        except Exception as e:
            return f"Verification failed: {str(e)}"
    
    def get_wikipedia_summary(self, query):
        try:
            wikipedia.set_lang("en")
            return wikipedia.summary(query, sentences=5)
        except wikipedia.exceptions.DisambiguationError as e:
            return f"Multiple matches found. Please be more specific. Options: {', '.join(e.options[:5])}..."
        except wikipedia.exceptions.PageError:
            return "No Wikipedia page found for this topic."
        except Exception as e:
            return f"Error fetching Wikipedia summary: {str(e)}"
    
    def search_web_content(self, query):
        try:
            # Get top 3 relevant URLs from Google
            search_results = list(search(query, num=3, stop=3, pause=1))
            
            contents = []
            for url in search_results:
                try:
                    content = self.scrape_website(url)
                    if content:
                        contents.append(f"From {url}:\n{content}\n\n")
                except:
                    continue
            
            return "\n".join(contents) if contents else "No web content found."
            
        except Exception as e:
            return f"Error searching web: {str(e)}"
    
    def scrape_website(self, url):
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            response = requests.get(url, headers=headers, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'footer', 'iframe']):
                element.decompose()
                
            # Get text and clean it up
            text = soup.get_text()
            text = re.sub(r'\s+', ' ', text).strip()
            
            return text[:2000] + "..." if len(text) > 2000 else text
            
        except Exception as e:
            return f"Could not retrieve content from {url}: {str(e)}"
    
    def get_full_content(self, query):
        try:
            wikipedia.set_lang("en")
            page = wikipedia.page(query)
            return page.content[:5000] + "..." if len(page.content) > 5000 else page.content
        except Exception as e:
            return f"Error fetching full content: {str(e)}"
    
    def display_results(self, text):
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.insert(END, text)
        self.results_text.config(state="disabled")
    
    def search_web(self):
        query = self.search_entry.get().strip()
        if not query:
            messagebox.showwarning("Warning", "Please enter a search term")
            return
            
        try:
            webbrowser.open_new_tab(f"https://www.google.com/search?q={query.replace(' ', '+')}")
        except Exception as e:
            messagebox.showerror("Error", f"Could not open browser: {str(e)}")
    
    def clear_results(self):
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.config(state="disabled")
        self.search_entry.delete(0, END)

if __name__ == "__main__":
    root = Tk()
    app = ContentFetcher(root)
    root.mainloop()

In [87]:
import wikipedia
import requests
from tkinter import Tk, Frame, Entry, Button, Text, Scrollbar, END, Radiobutton, StringVar, Label, messagebox
from tkinter.ttk import Progressbar
import webbrowser
from bs4 import BeautifulSoup
import threading
from googlesearch import search
import re
import concurrent.futures

class ContentFetcher:
    def __init__(self, root):
        self.root = root
        self.setup_gui()
        
    def setup_gui(self):
        self.root.title("Smart Content Fetcher")
        self.root.geometry("900x700")
        self.root.resizable(True, True)
        self.root.configure(bg="#f0f2f5")
        
        # Main container
        main_frame = Frame(self.root, bg="#f0f2f5", padx=20, pady=20)
        main_frame.pack(fill="both", expand=True)
        
        # Search section
        search_frame = Frame(main_frame, bg="#f0f2f5")
        search_frame.pack(fill="x", pady=(0, 15))
        
        Label(search_frame, text="Search Topic:", bg="#f0f2f5", font=("Arial", 12)).pack(side="left", padx=(0, 10))
        
        self.search_entry = Entry(search_frame, font=("Arial", 12), width=40)
        self.search_entry.pack(side="left", fill="x", expand=True, padx=(0, 10))
        
        self.search_btn = Button(search_frame, text="Search", command=self.start_search, 
                               bg="#4CAF50", fg="white", font=("Arial", 10, "bold"))
        self.search_btn.pack(side="left")
        
        # Options frame
        options_frame = Frame(main_frame, bg="#f0f2f5")
        options_frame.pack(fill="x", pady=(0, 15))
        
        self.content_type = StringVar(value="verified")
        
        Radiobutton(options_frame, text="Verified Summary", variable=self.content_type, 
                   value="verified", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Radiobutton(options_frame, text="Wikipedia Summary", variable=self.content_type, 
                   value="wiki_summary", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Radiobutton(options_frame, text="Full Content", variable=self.content_type, 
                   value="full", bg="#f0f2f5", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        
        # Results section
        results_frame = Frame(main_frame, bg="#ffffff", bd=2, relief="solid")
        results_frame.pack(fill="both", expand=True)
        
        self.results_text = Text(results_frame, font=("Arial", 11), wrap="word", 
                                padx=10, pady=10, state="disabled")
        scrollbar = Scrollbar(results_frame, command=self.results_text.yview)
        self.results_text.configure(yscrollcommand=scrollbar.set)
        
        scrollbar.pack(side="right", fill="y")
        self.results_text.pack(fill="both", expand=True)
        
        # Progress bar
        self.progress = Progressbar(main_frame, mode="indeterminate")
        self.progress.pack(fill="x", pady=(10, 0))
        
        # Bottom buttons
        bottom_frame = Frame(main_frame, bg="#f0f2f5")
        bottom_frame.pack(fill="x", pady=(15, 0))
        
        Button(bottom_frame, text="Search Web", command=self.search_web, 
              bg="#2196F3", fg="white", font=("Arial", 10)).pack(side="left", padx=(0, 10))
        Button(bottom_frame, text="Clear", command=self.clear_results, 
              bg="#f44336", fg="white", font=("Arial", 10)).pack(side="left")
        
    def start_search(self):
        query = self.search_entry.get().strip()
        if not query:
            messagebox.showwarning("Warning", "Please enter a search term")
            return
            
        self.progress.start()
        self.search_btn.config(state="disabled")
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.insert(END, "Searching... Please wait.")
        self.results_text.config(state="disabled")
        
        # Run search in separate thread to prevent GUI freezing
        threading.Thread(target=self.perform_search, args=(query,), daemon=True).start()
        
    def perform_search(self, query):
        try:
            content_type = self.content_type.get()
            
            if content_type == "verified":
                result = self.get_verified_content(query)
            elif content_type == "wiki_summary":
                result = self.get_wikipedia_summary(query)
            elif content_type == "full":
                result = self.get_full_content(query)
                
            self.display_results(result)
            
        except Exception as e:
            self.display_results(f"Error: {str(e)}")
        finally:
            self.progress.stop()
            self.search_btn.config(state="normal")
    
    def get_verified_content(self, query):
        """Get content from multiple sources and verify consistency"""
        try:
            with concurrent.futures.ThreadPoolExecutor() as executor:
                # Get results from multiple sources concurrently
                wiki_future = executor.submit(self.get_wikipedia_summary, query)
                web_future = executor.submit(self.search_web_content, query)
                
                wiki_summary = wiki_future.result()
                web_content = web_future.result()
            
            # Improved verification logic
            if "Error" in wiki_summary or "Error" in web_content:
                return f"Wikipedia:\n{wiki_summary}\n\nWeb Results:\n{web_content}"
            
            # Check similarity using more sophisticated method
            similarity_score = self.calculate_similarity(wiki_summary, web_content)
            
            if similarity_score > 0.6:  # If sources mostly agree
                # Combine the best parts of both
                combined = self.combine_contents(wiki_summary, web_content)
                return f"Verified Information:\n\n{combined}"
            else:
                return f"Sources show different information:\n\nWikipedia:\n{wiki_summary}\n\nWeb Results:\n{web_content}"
                
        except Exception as e:
            return f"Verification failed: {str(e)}"
    
    def calculate_similarity(self, text1, text2):
        """Calculate a simple similarity score between two texts"""
        words1 = set(re.findall(r'\w+', text1.lower()))
        words2 = set(re.findall(r'\w+', text2.lower()))
        
        common_words = words1.intersection(words2)
        total_words = words1.union(words2)
        
        return len(common_words) / len(total_words) if total_words else 0
    
    def combine_contents(self, wiki_content, web_content):
        """Combine the best parts from both sources"""
        # Take first 3 sentences from Wikipedia
        wiki_sentences = re.split(r'(?<=[.!?])\s+', wiki_content)
        combined = ' '.join(wiki_sentences[:3]) + "\n\n"
        
        # Add additional unique information from web content
        web_sentences = re.split(r'(?<=[.!?])\s+', web_content)
        for sentence in web_sentences:
            if sentence not in wiki_content and len(sentence.split()) > 5:
                combined += sentence + " "
                if len(combined.split()) > 150:  # Limit to reasonable length
                    break
                    
        return combined.strip()
    
    def get_wikipedia_summary(self, query):
        try:
            wikipedia.set_lang("en")
            return wikipedia.summary(query, sentences=5)
        except wikipedia.exceptions.DisambiguationError as e:
            return f"Multiple matches found. Please be more specific. Options: {', '.join(e.options[:5])}..."
        except wikipedia.exceptions.PageError:
            return "No Wikipedia page found for this topic."
        except Exception as e:
            return f"Error fetching Wikipedia summary: {str(e)}"
    
    def search_web_content(self, query):
        try:
            # Get top 5 relevant URLs from Google
            search_results = list(search(query, num=5, stop=5, pause=1))
            
            contents = []
            for url in search_results[:3]:  # Only use top 3 to be faster
                try:
                    content = self.scrape_website(url)
                    if content and not any(err in content for err in ["Error", "Could not retrieve"]):
                        contents.append(content)
                        if len(contents) >= 2:  # Stop after getting 2 good sources
                            break
                except:
                    continue
            
            if not contents:
                return "No reliable web content found."
                
            # Combine the best parts of the web results
            return self.combine_web_contents(contents)
            
        except Exception as e:
            return f"Error searching web: {str(e)}"
    
    def combine_web_contents(self, contents):
        """Combine multiple web contents intelligently"""
        if len(contents) == 1:
            return contents[0]
            
        # Find common sentences
        all_sentences = []
        for content in contents:
            sentences = re.split(r'(?<=[.!?])\s+', content)
            all_sentences.append(set(sentences))
            
        common_sentences = set.intersection(*all_sentences)
        
        # Build result starting with common sentences
        result = []
        if common_sentences:
            result.append("Common information from multiple sources:")
            result.extend(common_sentences)
            result.append("\nAdditional information:")
        
        # Add unique information
        for sentences in all_sentences:
            unique = sentences - common_sentences
            if unique:
                result.extend(unique)
                break  # Just add from one source to avoid duplication
                
        return ' '.join(result)
    
    def scrape_website(self, url):
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            response = requests.get(url, headers=headers, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'footer', 'iframe', 'header']):
                element.decompose()
                
            # Get text from likely content-containing elements
            content_elements = soup.find_all(['article', 'main', 'div[class*="content"]', 'p'])
            text = ' '.join([elem.get_text() for elem in content_elements])
            
            # Clean up text
            text = re.sub(r'\s+', ' ', text).strip()
            text = re.sub(r'\[[^\]]+\]', '', text)  # Remove citations like [1], [2]
            
            if not text or len(text.split()) < 20:  # Skip if too short
                return None
                
            return text[:3000]  # Return first 3000 characters
        
        except Exception as e:
            return f"Could not retrieve content from {url}: {str(e)}"
    
    def get_full_content(self, query):
        try:
            wikipedia.set_lang("en")
            page = wikipedia.page(query)
            return page.content  # Return full content without truncation
        except Exception as e:
            return f"Error fetching full content: {str(e)}"
    
    def display_results(self, text):
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.insert(END, text)
        self.results_text.config(state="disabled")
    
    def search_web(self):
        query = self.search_entry.get().strip()
        if not query:
            messagebox.showwarning("Warning", "Please enter a search term")
            return
            
        try:
            webbrowser.open_new_tab(f"https://www.google.com/search?q={query.replace(' ', '+')}")
        except Exception as e:
            messagebox.showerror("Error", f"Could not open browser: {str(e)}")
    
    def clear_results(self):
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, END)
        self.results_text.config(state="disabled")
        self.search_entry.delete(0, END)

if __name__ == "__main__":
    root = Tk()
    app = ContentFetcher(root)
    root.mainloop()