# Installing Required Packages

In [1]:
!pip install langchain beautifulsoup4 flask sentence-transformers faiss-cpu

Collecting langchain
  Downloading langchain-0.3.4-py3-none-any.whl.metadata (7.1 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting langchain-core<0.4.0,>=0.3.12 (from langchain)
  Downloading langchain_core-0.3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.136-py3-none-any.whl.metadata (13 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.4.0,>=0.3.12->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting httpx<1,>=0.23.0 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
C

In [3]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.3-py3-none-any.whl.metadata (2.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.0-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.0-py3-none-any.whl.metadata (7.6 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloa

# Extracting Data from URL using Langchain URL Loaders and BeautifulSoup

In [4]:
from langchain.document_loaders import WebBaseLoader
from bs4 import BeautifulSoup
import requests

# Define the URL
url = "https://brainlox.com/courses/category/technical"

# Create a loader for the URL
loader = WebBaseLoader([url])

# Load the HTML content
docs = loader.load()

# Using BeautifulSoup to extract course details
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Extracting course titles, descriptions, and prices
courses = []
for course in soup.find_all('div', class_='single-courses-box'):
    title = course.find('h3').get_text(strip=True)
    description = course.find('p').get_text(strip=True)
    price = course.find('span', class_='price-per-session').get_text(strip=True)
    courses.append({'title': title, 'description': description, 'price': price})

# Print the extracted data
print(f"Extracted {len(courses)} courses.")




Extracted 67 courses.


# Creating Embeddings and Store them in a Vector Store (FAISS)

In [7]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.docstore.document import Document
import faiss
import numpy as np

# Load the model for creating embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract the course descriptions for embedding
course_descriptions = [course['description'] for course in courses]

# Generate embeddings for the course descriptions
embeddings = model.encode(course_descriptions)

# Create a FAISS index for L2 distance (Euclidean distance)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

# Create documents for the FAISS vector store
documents = [Document(page_content=desc, metadata={"title": course["title"], "price": course["price"]}) for desc, course in zip(course_descriptions, courses)]

# Create an InMemoryDocstore to map embeddings to documents
docstore = InMemoryDocstore({i: doc for i, doc in enumerate(documents)})

# Create the FAISS vector store with index, docstore, and mapping
index_to_docstore_id = {i: i for i in range(len(documents))}  # Index to document ID mapping
vector_store = FAISS(embedding_function=model.encode, index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)

# Save the FAISS index to disk
faiss.write_index(index, 'courses_vector_store.index')


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]



# Creating a Flask RESTful API to Handle Conversation

In [8]:
from flask import Flask, request, jsonify
from threading import Thread, Event
import faiss
import numpy as np

# Load the FAISS index from disk
index = faiss.read_index('courses_vector_store.index')

# Create the Flask app
app = Flask(__name__)

@app.route('/chatbot', methods=['POST'])
def chatbot():
    # Get the user query from the POST request
    user_input = request.json.get('query')

    # Generate embeddings for the user query and reshape it to (1, embedding_dim)
    user_embedding = model.encode([user_input])
    user_embedding = np.array(user_embedding).reshape(1, -1)

    # Search for the most relevant course in the FAISS index
    distances, indices = index.search(user_embedding, k=1)
    matched_course = courses[indices[0][0]]

    # Return the course information as JSON
    return jsonify({
        'course_title': matched_course['title'],
        'description': matched_course['description'],
        'price': matched_course['price']
    })

# Event to signal the server to stop
stop_event = Event()

# Function to run the Flask app
def run_app():
    # Run the app until the stop_event is set
    app.run(port=5001)

# Start the Flask app in a separate thread
thread = Thread(target=run_app)
thread.start()

# Function to stop the server
def stop_server():
    stop_event.set()
    thread.join()

# Call stop_server() when you want to stop the server

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5001
INFO:werkzeug:[33mPress CTRL+C to quit[0m


# Testing the Chatbot API Using curl

In [9]:
!curl -X POST http://127.0.0.1:5001/chatbot -H "Content-Type: application/json" -d '{"query": "I want to learn JavaScript"}'

INFO:werkzeug:127.0.0.1 - - [22/Oct/2024 12:08:15] "POST /chatbot HTTP/1.1" 200 -


{"course_title":"LEARN JAVASCRIPT","description":"JavaScript is the most popular programming language in the world. It powers the entire modern web.","price":"$20"}
