# Openshift with WatsonX and PosgreSQL for RAG
### Openshift

In [1]:
#from IPython.display import clear_output
!pip install --upgrade pip
!pip install "langchain==0.0.345" 
!pip install wget 
!pip install sentence-transformers 
!pip install "chromadb==0.3.26" 
!pip install ibm-watson-machine-learning==1.0.359
!pip install pydantic==1.10.11
!pip install python-dotenv
!pip install typing-inspect==0.8.0
!pip install typing_extensions==4.8.0
!pip install psycopg2-binary
!pip install pypdf
!pip install pgvector
!pip install sentence-transformers
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
#clear_output()



In [2]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.pgvector import PGVector
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
import os 
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
from langchain.llms import WatsonxLLM
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from typing import Any, List, Mapping, Optional, Union, Dict
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from pydantic import BaseModel, Extra
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from dotenv import load_dotenv    
import wget
import psycopg2
from langchain.chains import RetrievalQA
from langchain.document_loaders import WebBaseLoader
import getpass



# PostgreSQL Server Setup

In [3]:
## The first step we need load the credentials posgrgre server
# Load the .env file
load_dotenv()
# Get the values from the .env file, or use default values if not set
user = os.getenv("user", "testuser")
password = os.getenv("password", "testpwd")
database = os.getenv("database", "vectordb")
server = os.getenv("server", "localhost")

print("User:", user)
print("Database:", database)

User: vectordb
Database: vectordb


In [6]:
# Construct the connection string
#CONNECTION_STRING = f"postgresql://{user}:{password}@{server}/{database}"
CONNECTION_STRING = f"postgresql+psycopg://{user}:{password}@{server}:5432/{database}"
# Print the connection string
print(CONNECTION_STRING)

postgresql+psycopg://vectordb:vectordb@ab92d13fd3c9e4e2897f4464a09f2500-694822135.us-east-2.elb.amazonaws.com:5432/vectordb


In [7]:
## Testing the Server connection

conn = psycopg2.connect(
    host=server,
    database=database,
    user=user,
    password=password
)

cur = conn.cursor()
cur.execute("SELECT 1")
print(cur.fetchone())  # Should print (1,)
conn.close()
####

(1,)


In [8]:
### We coonnect to create the db 
# Construct the connection string
CONNECTION_STRING = f"postgresql://{user}:{password}@{server}/{database}"
# Create a connection to the database
conn = psycopg2.connect(CONNECTION_STRING)
# Create a cursor object to execute queries
cur = conn.cursor()
# Execute the SQL command
cur.execute("""
    CREATE EXTENSION IF NOT EXISTS vector;
    CREATE TABLE IF NOT EXISTS embeddings (
      id SERIAL PRIMARY KEY,
      embedding vector,
      text text,
      created_at timestamptz DEFAULT now()
    );
""")

# Commit the changes
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()

# Create a connection to the database
conn = psycopg2.connect(CONNECTION_STRING)

# Create a cursor object to execute queries
cur = conn.cursor()

# Check if the table exists
cur.execute("SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'embeddings')")
table_exists = cur.fetchone()[0]

if table_exists:
    print("Table 'embeddings' exists!")
else:
    print("Table 'embeddings' does not exist.")

# Get the schema of the table
cur.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name = 'embeddings'")
schema = cur.fetchall()

print("Schema of table 'embeddings':")
for column in schema:
    print(f"  {column[0]}: {column[1]}")

# Close the cursor and connection
cur.close()
conn.close()

Table 'embeddings' exists!
Schema of table 'embeddings':
  id: integer
  embedding: USER-DEFINED
  text: text
  created_at: timestamp with time zone


# WatsonX with  Retrieval-Augmented Generation with PostreSQL

In [9]:
pdf_folder_path = './rhods-doc'
filename = 'Vector_database.pdf'
url = 'https://github.com/ruslanmv/WatsonX-with-Langchain-PostgreSQL-with-pgvector/raw/master/rhods-doc/Vector_database.pdf'

# Create the directory if it doesn't exist
if not os.path.exists(pdf_folder_path):
    os.makedirs(pdf_folder_path)

full_path = os.path.join(pdf_folder_path, filename)

if not os.path.isfile(full_path):
    wget.download(url, out=full_path)

loader = PyPDFDirectoryLoader(pdf_folder_path)
docs = loader.load()    
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,
                                               chunk_overlap=40)
all_splits_pdfs = text_splitter.split_documents(docs)

print(all_splits_pdfs[0])
for doc in all_splits_pdfs:
    doc.page_content = doc.page_content.replace('\x00', '')

embeddings = HuggingFaceEmbeddings()

COLLECTION_NAME = "documents_test"

db = PGVector.from_documents(
    documents=all_splits_pdfs,
    embedding=embeddings,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,)    


model_id ='ibm/granite-13b-chat-v2'

# Create an instance of WatsonxLLM

# WatsonxLLM initialization
## Fist Type of parameters
parameters = {
    GenParams.DECODING_METHOD: DecodingMethods.SAMPLE.value,
    GenParams.MAX_NEW_TOKENS: 1000,
    GenParams.MIN_NEW_TOKENS: 50,
    GenParams.TEMPERATURE: 0.7,
    GenParams.TOP_K: 50,
    GenParams.TOP_P: 1
}

## Second Type of parameters
parameters = {
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MIN_NEW_TOKENS: 1,
    GenParams.MAX_NEW_TOKENS: 200,
    GenParams.STOP_SEQUENCES: ["<|endoftext|>"]
}


load_dotenv()

load_dotenv()
project_id = os.getenv("PROJECT_ID", None)
credentials = {
    #"url":  "https://eu-de.ml.cloud.ibm.com",
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": os.getenv("API_KEY", None)
}




#!pip install ibm_watsonx_ai
model_id ='ibm/granite-13b-chat-v2'


watsonx_granite = WatsonxLLM(
    model_id='ibm/granite-13b-chat-v2',
    url=credentials.get("url"),
    apikey=credentials.get("apikey"),
    project_id=project_id,
    params=parameters
)

qa = RetrievalQA.from_chain_type(llm=watsonx_granite, chain_type="stuff", retriever=db.as_retriever())

query = "What is vector database?"
qa.run(query)

data = loader.load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,
                                               chunk_overlap=40)
all_splits = text_splitter.split_documents(data)
for doc in all_splits:
    doc.page_content = doc.page_content.replace('\x00', '')

embeddings = HuggingFaceEmbeddings()
store = PGVector(
    connection_string=CONNECTION_STRING,
    collection_name=COLLECTION_NAME,
    embedding_function=embeddings)    


store.add_documents(all_splits_pdfs);

query = "What is  Retrieval-Augmented Generation?"
docs_with_score = store.similarity_search_with_score(query)



for doc, score in docs_with_score[:1]:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)


qa = RetrievalQA.from_chain_type(llm=watsonx_granite, chain_type="stuff", retriever=store.as_retriever())    


query = "What is Prompt?"
qa.run(query)


query = "What ist Retrieval-Augmented Generation?"
qa.run(query)

page_content="Vector database\nA vector database management system (VDBMS) or simply vector database or vector store is a\ndatabase that can store vectors (fixed-length lists of numbers) along with other data items. Vector databases\ntypically implement one or more Approximate Nearest Neighbor  (ANN) algorithms,[1][2] so that one can\nsearch the database with a query vector to retrieve the closest matching da tabase records.\nVectors are mathematical representations of data in a high-dimensional space. In this space, each dimension\ncorresponds  to a feature of the data, and tens of thous ands of dimensions might be used to represent\nsophisticated data. A vector's position in this space represents its characteristics. Words, phrases, or entire\ndocuments, and images, audio, and ot her types of data can all be vectorized.[3]\nThese feature vectors may be computed from the raw data using machine learning methods such as feature\nextraction algorithms, word embeddings[4] or deep learning

Downloading .gitattributes:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

Downloading 1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

--------------------------------------------------------------------------------
Score:  0.4634260491640626
data items receive feature vectors that are close to each other.
Vector databases can be used for similarity search, multi-modal search, recommendations engines, large
langua ges models (LLMs), etc.[5]
Vector databases are also used to implement Retrieval-Augmented Generation (RAG), a method to improve
domain-specific respons es of large language models. Text documents describing the domain of interest are
collected and for each document a feature vector (know n as an "embedding") is computed, typically using a
deep learning network, and stored in a vector database. Given a user prompt, the feature vector of the
--------------------------------------------------------------------------------


' Retrieval-Augmented Generation (RAG) is a method to improve domain-specific responses of large language models. It works by collecting text documents describing the domain of interest and computing, typically using a deep learning network, a feature vector (known as an "embedding") for each document. These embeddings are then stored in a vector database. Given a user prompt, the feature vector of the relevant document is retrieved from the vector database and used to generate a more accurate and relevant response.'