In [1]:
## Google Search Example

### 1. **Set up the Google search loader:**

import requests
from bs4 import BeautifulSoup
#from langchain.loaders import BaseLoader
from langchain.document_loaders.base import BaseLoader



class GoogleSearchLoader(BaseLoader):
    def __init__(self, query):
        self.query = query
    def load(self):
        url = f"https://www.google.com/search?q={self.query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
        response = requests.get(url, headers=headers)
        return response.text
    def parse(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for g in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
            title = g.get_text()
            link = g.find_parent('a')['href']
            snippet = g.find_next_sibling('div').get_text()
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results
if __name__ == "__main__":
    loader = GoogleSearchLoader(query="Python web scraping")
    html = loader.load()
    parsed_results = loader.parse(html)
    for result in parsed_results:
        print(result)



In [2]:
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders.base import BaseLoader

class GoogleSearchLoader(BaseLoader):
    def __init__(self, query):
        self.query = query

    def load(self):
        url = f"https://www.google.com/search?q={self.query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve search results. Status code: {response.status_code}")
            return ""

    def parse(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for g in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
            title = g.get_text()
            link_tag = g.find_parent('a')
            if link_tag and 'href' in link_tag.attrs:
                link = link_tag['href']
            else:
                link = "No link found"
            snippet_tag = g.find_next_sibling('div')
            if snippet_tag:
                snippet = snippet_tag.get_text()
            else:
                snippet = "No snippet found"
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

if __name__ == "__main__":
    loader = GoogleSearchLoader(query="Python web scraping")
    html = loader.load()
    if html:
        parsed_results = loader.parse(html)
        for result in parsed_results:
            print(result)
    else:
        print("No HTML content to parse.")


In [1]:
import tkinter as tk
from tkinter import simpledialog
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders.base import BaseLoader

class GoogleSearchLoader(BaseLoader):
    def __init__(self, query):
        self.query = query

    def load(self):
        url = f"https://www.google.com/search?q={self.query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve search results. Status code: {response.status_code}")
            return ""

    def parse(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for g in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
            title = g.get_text()
            link_tag = g.find_parent('a')
            if link_tag and 'href' in link_tag.attrs:
                link = link_tag['href']
            else:
                link = "No link found"
            snippet_tag = g.find_next_sibling('div')
            if snippet_tag:
                snippet = snippet_tag.get_text()
            else:
                snippet = "No snippet found"
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

def main():
    # Create the GUI
    root = tk.Tk()
    root.withdraw()  # Hide the root window

    # Ask the user for a search topic
    query = simpledialog.askstring("Input", "Enter the topic to search for:")

    if query:
        loader = GoogleSearchLoader(query=query)
        html = loader.load()
        if html:
            parsed_results = loader.parse(html)
            for result in parsed_results:
                print(result)
        else:
            print("No HTML content to parse.")
    else:
        print("No query provided.")

if __name__ == "__main__":
    main()


In [7]:
### Complete Example with LangChain Integration

#To integrate this with LangChain, you can use the custom loaders created above. Here’s how to use one of #them with LangChain:

#```python
#from langchain.loaders import BaseLoader
from langchain.document_loaders.base import BaseLoader

class CustomLoader(BaseLoader):
    def __init__(self, engine, query):
        self.engine = engine
        self.query = query

    def load(self):
        if self.engine == "google":
            return GoogleSearchLoader(self.query).load()
        elif self.engine == "duckduckgo":
            return DuckDuckGoSearchLoader(self.query).load()
        elif self.engine == "bing":
            return BingSearchLoader(self.query).load()
        else:
            raise ValueError("Unsupported search engine")

    def parse(self, html):
        if self.engine == "google":
            return GoogleSearchLoader(self.query).parse(html)
        elif self.engine == "duckduckgo":
            return DuckDuckGoSearchLoader(self.query).parse(html)
        elif self.engine == "bing":
            return BingSearchLoader(self.query).parse(html)
        else:
            raise ValueError("Unsupported search engine")

if __name__ == "__main__":
    loader = CustomLoader(engine="google", query="Python web scraping")
    html = loader.load()
    parsed_results = loader.parse(html)
    for result in parsed_results:
        print(result)
#```

#This example demonstrates how to perform searches on Google, DuckDuckGo, and Bing using web scraping #techniques, parse the results, and integrate them with LangChain for a custom loader. Make sure to handle #the search engine's terms of service and usage policies regarding web scraping.

In [9]:
import tkinter as tk
from tkinter import simpledialog
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders.base import BaseLoader

class GoogleSearchLoader(BaseLoader):
    def __init__(self, query):
        self.query = query

    def load(self):
        url = f"https://www.google.com/search?q={self.query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve search results. Status code: {response.status_code}")
            return ""

    def parse(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for g in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
            title = g.get_text()
            link_tag = g.find_parent('a')
            if link_tag and 'href' in link_tag.attrs:
                link = link_tag['href']
            else:
                link = "No link found"
            snippet_tag = g.find_next_sibling('div')
            if snippet_tag:
                snippet = snippet_tag.get_text()
            else:
                snippet = "No snippet found"
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

def main():
    # Create the GUI
    root = tk.Tk()
    root.withdraw()  # Hide the root window

    # Ask the user for a search topic
    query = simpledialog.askstring("Input", "Enter the topic to search for:")

    if query:
        loader = GoogleSearchLoader(query=query)
        html = loader.load()
        if html:
            parsed_results = loader.parse(html)
            for result in parsed_results:
                print(result)
        else:
            print("No HTML content to parse.")
    else:
        print("No query provided.")

if __name__ == "__main__":
    main()


In [10]:
import tkinter as tk
from tkinter import simpledialog
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders.base import BaseLoader

class GoogleSearchLoader(BaseLoader):
    def __init__(self, query):
        self.query = query

    def load(self):
        url = f"https://www.google.com/search?q={self.query}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve search results. Status code: {response.status_code}")
            return ""

    def parse(self, html):
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for result in soup.select('.tF2Cxc'):
            title_tag = result.select_one('.DKV0Md')
            link_tag = result.select_one('a')
            snippet_tag = result.select_one('.VwiC3b')
            
            title = title_tag.get_text() if title_tag else "No title found"
            link = link_tag['href'] if link_tag else "No link found"
            snippet = snippet_tag.get_text() if snippet_tag else "No snippet found"

            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

def main():
    # Create the GUI
    root = tk.Tk()
    root.withdraw()  # Hide the root window

    # Ask the user for a search topic
    query = simpledialog.askstring("Input", "Enter the topic to search for:")

    if query:
        loader = GoogleSearchLoader(query=query)
        html = loader.load()
        if html:
            parsed_results = loader.parse(html)
            for result in parsed_results:
                print(result)
        else:
            print("No HTML content to parse.")
    else:
        print("No query provided.")

if __name__ == "__main__":
    main()
