In [None]:
from langchain.vectorstores import Qdrant
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import qdrant_client
import os
from langchain_community.llms import LlamaCpp
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import DataLoader, TensorDataset
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

In [None]:
embedding_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
    encode_kwargs={'batch_size': 16}
)

In [None]:
client = QdrantClient("localhost", port=6333)
collection_name = 'pulkitcollection'
client.recreate_collection(
 collection_name=collection_name,
 vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)
vectorstore = Qdrant(
        client=client,
        collection_name='pulkitcollection',
        embeddings=embedding_model
    )

In [None]:
loader = DirectoryLoader("documents", glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

# Split the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) # Change
splits = text_splitter.split_documents(docs)
vectorstore.add_documents(documents=splits)