<a href="https://colab.research.google.com/github/snehjn1407/ChatReview/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def scrape_amazon():
    url = "https://www.amazon.in/s?k=laptops&crid=15NJC9UU07FHD&sprefix=laptops%2Caps%2C243&ref=nb_sb_noss_2"  # Replace with the actual URL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTP errors if any
    except requests.exceptions.MissingSchema:
        print("Error: The URL is invalid or missing a schema. Please update it.")
        return pd.DataFrame()  # Return empty DataFrame
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return pd.DataFrame()

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract product details (replace placeholders with actual class names)
    products = []
    for item in soup.find_all("div", class_="s-main-slot"):
        name = item.find("span", class_="a-text-normal").text.strip() if item.find("span", class_="a-text-normal") else "Unknown"
        price = item.find("span", class_="a-price-whole").text.strip() if item.find("span", class_="a-price-whole") else "Unknown"
        rating = item.find("span", class_="a-icon-alt").text.strip() if item.find("span", class_="a-icon-alt") else "No ratings"
        description = name  # Use name as fallback description
        products.append({"Name": name, "Price": price, "Rating": rating, "Description": description})

    return pd.DataFrame(products)

def scrape_flipkart():
    url = "https://www.flipkart.com/search?q=laptop&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract product details
    products = []
    for item in soup.find_all("div", class_="<class_name>"):
        name = item.find("a", class_="<name_class>").text.strip()
        price = item.find("div", class_="<price_class>").text.strip()
        rating = item.find("div", class_="<rating_class>").text.strip()
        reviews = item.find("span", class_="<reviews_class>").text.strip()
        description = "Description not available"
        products.append({"Name": name, "Price": price, "Rating": rating, "Reviews": reviews, "Description": description})

    return pd.DataFrame(products)

def chatbot_logic(user_query, scraped_data):
    # Check if the DataFrame is empty or if the 'Description' column is missing
    if scraped_data.empty or 'Description' not in scraped_data.columns:
        print("Error: No valid data available for recommendations.")
        return pd.DataFrame()  # Return an empty DataFrame

    vectorizer = TfidfVectorizer(stop_words='english')
    product_vectors = vectorizer.fit_transform(scraped_data['Description'])
    query_vector = vectorizer.transform([user_query])

    similarity = cosine_similarity(query_vector, product_vectors)
    ranked_indices = similarity.argsort()[0][::-1]

    recommendations = scraped_data.iloc[ranked_indices[:5]]
    return recommendations

def chatbot():
    print("Welcome to the Multi-Platform Product Recommendation Chatbot!")
    print("Fetching product data...")

    amazon_data = scrape_amazon()
    print(f"Amazon data shape: {amazon_data.shape}")
    print(f"Amazon data columns: {amazon_data.columns}")

    flipkart_data = scrape_flipkart()
    print(f"Flipkart data shape: {flipkart_data.shape}")
    print(f"Flipkart data columns: {flipkart_data.columns}")

    combined_data = pd.concat([amazon_data, flipkart_data], ignore_index=True)
    print(f"Combined data shape: {combined_data.shape}")
    print(f"Combined data columns: {combined_data.columns}")

    if combined_data.empty:
        print("No data could be fetched from the platforms. Please check your connections or URLs.")
        return

    print("Data fetched successfully! Ask for product recommendations.")
    print("Type 'exit' to quit.")

    while True:
        user_query = input("You: ").lower()
        if user_query == 'exit':
            print("Thank you for using the chatbot. Goodbye!")
            break

        recommendations = chatbot_logic(user_query, combined_data)
        if recommendations.empty:
            print("Sorry, no matching products found.")
        else:
            print("Here are some recommendations for you:")
            for _, product in recommendations.iterrows():
                print(f"Name: {product['Name']}")
                print(f"Price: {product['Price']}")
                print(f"Rating: {product['Rating']}")
                # Use .get() to safely access 'Reviews' column in case it's missing in some rows
                print(f"Reviews: {product.get('Reviews', 'No reviews available')}")
                print(f"Description: {product['Description']}")
                print("---")

# Run the chatbot
# Note: Replace placeholders with actual scraping logic based on inspected HTML structures
chatbot()


Welcome to the Multi-Platform Product Recommendation Chatbot!
Fetching product data...
Amazon data shape: (1, 4)
Amazon data columns: Index(['Name', 'Price', 'Rating', 'Description'], dtype='object')
Flipkart data shape: (0, 0)
Flipkart data columns: RangeIndex(start=0, stop=0, step=1)
Combined data shape: (1, 4)
Combined data columns: Index(['Name', 'Price', 'Rating', 'Description'], dtype='object')
Data fetched successfully! Ask for product recommendations.
Type 'exit' to quit.
You: laptop
Here are some recommendations for you:
Name: Check each product page for other buying options.
Price: 48,990
Rating: 4.6 out of 5 stars.
Reviews: No reviews available
Description: Check each product page for other buying options.
---
You: phone
Here are some recommendations for you:
Name: Check each product page for other buying options.
Price: 48,990
Rating: 4.6 out of 5 stars.
Reviews: No reviews available
Description: Check each product page for other buying options.
---
You: quit
Here are some 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def scrape_amazon():
    url = "https://www.amazon.in/s?k=laptops"  # Replace with the actual URL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTP errors if any
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Description"])  # Return empty DataFrame with required columns

    soup = BeautifulSoup(response.text, 'html.parser')

    products = []
    for item in soup.find_all("div", class_="s-main-slot s-result-list s-search-results sg-row"):
        name = item.find("span", class_="a-text-normal").text.strip() if item.find("span", class_="a-text-normal") else "Unknown"
        price = item.find("span", class_="a-price-whole").text.strip() if item.find("span", class_="a-price-whole") else "Unknown"
        rating = item.find("span", class_="a-icon-alt").text.strip() if item.find("span", class_="a-icon-alt") else "No ratings"
        description = name  # Use name as fallback description
        products.append({"Name": name, "Price": price, "Rating": rating, "Description": description})

    return pd.DataFrame(products)

def scrape_flipkart():
    url = "https://www.flipkart.com/search?q=jeans"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Reviews", "Description"])  # Return empty DataFrame

    soup = BeautifulSoup(response.text, 'html.parser')

    products = []
    for item in soup.find_all("div", class_="_2kHMtA"):  # Flipkart product container class
        name = item.find("div", class_="_4rR01T").text.strip() if item.find("div", class_="_4rR01T") else "Unknown"
        price = item.find("div", class_="_30jeq3 _1_WHN1").text.strip() if item.find("div", class_="_30jeq3 _1_WHN1") else "Unknown"
        rating = item.find("div", class_="_3LWZlK").text.strip() if item.find("div", class_="_3LWZlK") else "No ratings"
        reviews = item.find("span", class_="_2_R_DZ").text.strip() if item.find("span", class_="_2_R_DZ") else "No reviews"
        description = name  # Use name as fallback description
        products.append({"Name": name, "Price": price, "Rating": rating, "Reviews": reviews, "Description": description})

    return pd.DataFrame(products)

def chatbot_logic(user_query, scraped_data):
    if scraped_data.empty:
        return pd.DataFrame()  # Return empty DataFrame if no data available

    vectorizer = TfidfVectorizer(stop_words='english')
    product_vectors = vectorizer.fit_transform(scraped_data['Description'])
    query_vector = vectorizer.transform([user_query])

    similarity = cosine_similarity(query_vector, product_vectors)
    ranked_indices = similarity.argsort()[0][::-1]

    recommendations = scraped_data.iloc[ranked_indices[:5]]
    return recommendations

def chatbot():
    print("Welcome to the Multi-Platform Product Recommendation Chatbot!")
    print("Fetching product data...")

    amazon_data = scrape_amazon()
    flipkart_data = scrape_flipkart()
    combined_data = pd.concat([amazon_data, flipkart_data], ignore_index=True)

    if combined_data.empty:
        print("No data could be fetched from the platforms. Please check your connections or URLs.")
        return

    print("Data fetched successfully! Ask for product recommendations.")
    print("Type 'exit' to quit.")

    while True:
        user_query = input("You: ").lower()
        if user_query == 'exit':
            print("Thank you for using the chatbot. Goodbye!")
            break

        recommendations = chatbot_logic(user_query, combined_data)
        if recommendations.empty:
            print("Sorry, no matching products found.")
        else:
            print("Here are some recommendations for you:")
            for _, product in recommendations.iterrows():
                print(f"Name: {product['Name']}")
                print(f"Price: {product['Price']}")
                print(f"Rating: {product['Rating']}")
                print(f"Reviews: {product.get('Reviews', 'No reviews available')}")
                print(f"Description: {product['Description']}")
                print("---")

# Run the chatbot
chatbot()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def scrape_amazon():
    url = "hhttps://www.amazon.in/?&tag=googhydrabk1-21&ref=pd_sl_5szpgfto9i_e&adgrpid=155259813593&hvpone=&hvptwo=&hvadid=713930225169&hvpos=&hvnetw=g&hvrand=12019880781526199765&hvqmt=e&hvdev=c&hvdvcmdl=&hvlocint=&hvlocphy=9302648&hvtargid=kwd-64107830&hydadcr=14452_2402225&gad_source=1"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching Amazon data: {e}")
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Description"])  # Empty DataFrame with required columns

    soup = BeautifulSoup(response.text, 'html.parser')
    products = []
    for item in soup.find_all("div", class_="s-main-slot s-result-list s-search-results sg-row"):
        name = item.find("span", class_="a-text-normal").text.strip() if item.find("span", class_="a-text-normal") else "Unknown"
        price = item.find("span", class_="a-price-whole").text.strip() if item.find("span", class_="a-price-whole") else "Unknown"
        rating = item.find("span", class_="a-icon-alt").text.strip() if item.find("span", class_="a-icon-alt") else "No ratings"
        description = name  # Use name as fallback description
        products.append({"Name": name, "Price": price, "Rating": rating, "Description": description})

    return pd.DataFrame(products)

def scrape_flipkart():
    url = "https://www.flipkart.com/?s_kwcid=AL!739!3!582822043580!b!!g!!flipkart&gclsrc=aw.ds&&semcmpid=sem_8024046704_brand_exe_buyers_goog&gad_source=1&gclid=CjwKCAiAxea5BhBeEiwAh4t5K4IKFKBYhyBwoZKWGDl6hxDD8_hWR712MsW69KYgtAjXEd99pLDYKhoCeXIQAvD_BwE"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching Flipkart data: {e}")
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Reviews", "Description"])  # Empty DataFrame

    soup = BeautifulSoup(response.text, 'html.parser')
    products = []
    for item in soup.find_all("div", class_="_2kHMtA"):  # Flipkart product container class
        name = item.find("div", class_="_4rR01T").text.strip() if item.find("div", class_="_4rR01T") else "Unknown"
        price = item.find("div", class_="_30jeq3 _1_WHN1").text.strip() if item.find("div", class_="_30jeq3 _1_WHN1") else "Unknown"
        rating = item.find("div", class_="_3LWZlK").text.strip() if item.find("div", class_="_3LWZlK") else "No ratings"
        reviews = item.find("span", class_="_2_R_DZ").text.strip() if item.find("span", class_="_2_R_DZ") else "No reviews"
        description = name  # Use name as fallback description
        products.append({"Name": name, "Price": price, "Rating": rating, "Reviews": reviews, "Description": description})

    return pd.DataFrame(products)

def chatbot_logic(user_query, scraped_data):
    if scraped_data.empty:
        return pd.DataFrame()  # Return empty DataFrame if no data is available

    vectorizer = TfidfVectorizer(stop_words='english')
    product_vectors = vectorizer.fit_transform(scraped_data['Description'])
    query_vector = vectorizer.transform([user_query])

    similarity = cosine_similarity(query_vector, product_vectors)
    ranked_indices = similarity.argsort()[0][::-1]

    recommendations = scraped_data.iloc[ranked_indices[:5]]
    return recommendations

def chatbot():
    print("Welcome to the Multi-Platform Product Recommendation Chatbot!")
    print("Fetching product data...")

    amazon_data = scrape_amazon()
    flipkart_data = scrape_flipkart()
    combined_data = pd.concat([amazon_data, flipkart_data], ignore_index=True)

    if combined_data.empty:
        print("No data could be fetched from the platforms. Please check your connections or URLs.")
        return

    print("Data fetched successfully! Ask for product recommendations.")
    print("Type 'exit' to quit.")

    while True:
        user_query = input("You: ").lower()
        if user_query == 'exit':
            print("Thank you for using the chatbot. Goodbye!")
            break

        recommendations = chatbot_logic(user_query, combined_data)
        if recommendations.empty:
            print("Sorry, no matching products found. Try another query.")
        else:
            print("Here are some recommendations for you:")
            for _, product in recommendations.iterrows():
                print(f"Name: {product['Name']}")
                print(f"Price: {product['Price']}")
                print(f"Rating: {product['Rating']}")
                print(f"Reviews: {product.get('Reviews', 'No reviews available')}")
                print(f"Description: {product['Description']}")
                print("---")

# Run the chatbot
chatbot()


In [None]:
import time

def scrape_amazon():
    url = "https://www.amazon.in/s?k=laptops"  # Corrected URL
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTP errors if any
    except requests.exceptions.RequestException as e:
        print(f"Error fetching Amazon data: {e}")
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Description"])  # Return empty DataFrame

    soup = BeautifulSoup(response.text, 'html.parser')
    products = []
    for item in soup.find_all("div", class_="s-main-slot s-result-list s-search-results sg-row"):
        name = item.find("span", class_="a-text-normal").text.strip() if item.find("span", class_="a-text-normal") else "Unknown"
        price = item.find("span", class_="a-price-whole").text.strip() if item.find("span", class_="a-price-whole") else "Unknown"
        rating = item.find("span", class_="a-icon-alt").text.strip() if item.find("span", class_="a-icon-alt") else "No ratings"
        description = name
        products.append({"Name": name, "Price": price, "Rating": rating, "Description": description})

    return pd.DataFrame(products)

def scrape_flipkart():
    url = "https://www.flipkart.com/search?q=jeans"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    for _ in range(3):  # Retry logic
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            break
        except requests.exceptions.RequestException as e:
            print(f"Error fetching Flipkart data: {e}")
            time.sleep(5)  # Wait before retrying
    else:
        return pd.DataFrame(columns=["Name", "Price", "Rating", "Reviews", "Description"])  # Empty DataFrame if failed

    soup = BeautifulSoup(response.text, 'html.parser')
    products = []
    for item in soup.find_all("div", class_="_2kHMtA"):
        name = item.find("div", class_="_4rR01T").text.strip() if item.find("div", class_="_4rR01T") else "Unknown"
        price = item.find("div", class_="_30jeq3 _1_WHN1").text.strip() if item.find("div", class_="_30jeq3 _1_WHN1") else "Unknown"
        rating = item.find("div", class_="_3LWZlK").text.strip() if item.find("div", class_="_3LWZlK") else "No ratings"
        reviews = item.find("span", class_="_2_R_DZ").text.strip() if item.find("span", class_="_2_R_DZ") else "No reviews"
        description = name
        products.append({"Name": name, "Price": price, "Rating": rating, "Reviews": reviews, "Description": description})

    return pd.DataFrame(products)

# Run the chatbot
chatbot()
