In [10]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from typing import List
from langchain_community.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import faiss
from typing import List
from langchain_core.runnables.config import run_in_executor
import os
from typing import Any, Dict, Iterator, List, Mapping, Optional
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

In [3]:
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M")

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M")
model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M").to(device)

In [6]:
from langchain.llms.base import LLM
from typing import Any, List, Mapping, Optional
from pydantic import Field

class T5Embeddings(Embeddings):

    def __init__(self, model_name: str = "MBZUAI/LaMini-T5-738M"):
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)

    def _embed_text(self, text: str) -> List[float]:
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = inputs.input_ids.to(self.device)
        
        with torch.no_grad():
            embeddings = self.model.shared(input_ids).mean(dim=1)
        
        return embeddings.squeeze().cpu().numpy().tolist()
        
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed search docs."""
        return [self._embed_text(text) for text in texts]


    def embed_query(self, text: str) -> List[float]:
        """Embed query text."""
        return self._embed_text(text)

class CustomLLM(LLM):
    n: int
    device = "cuda" if torch.cuda.is_available() else "cpu"
    """The number of characters from the last message of the prompt to be echoed."""

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:

        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
            
        input_ids = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = input_ids.input_ids.to(self.device)
        
        outputs = model.generate(input_ids, max_length=512)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        """Return a dictionary of identifying parameters."""
        return {
            # The model name allows users to specify custom token counting
            # rules in LLM monitoring applications (e.g., in LangSmith users
            # can provide per token pricing for their model and monitor
            # costs for the given LLM.)
            "model_name": "CustomChatModel",
        }

    @property
    def _llm_type(self) -> str:
        """Get the type of language model used by this chat model. Used for logging purposes only."""
        return "custom"
        

### T5 model with T5 embeddings

In [7]:
embeddings = T5Embeddings("MBZUAI/LaMini-T5-738M")

def get_embeddings_from_pdf(pdf_folder_path, embeddings):
    documents = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(pdf_folder_path, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
            
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
    chunked_documents = text_splitter.split_documents(documents)
    faiss_index = FAISS.from_documents(chunked_documents, embeddings)
          
    return faiss_index
    
faiss_index = get_embeddings_from_pdf("./prof_1/", embeddings)
retriever = faiss_index.as_retriever()
llm = CustomLLM(n=5)

# Create a prompt template
template = """You are an helpful assistant and you try to use the following information to answer the questions given at the end:  
Context: {context}

Question: {question}

Answer:"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:

"T5 MODEL WITH T5 EMBEDDINGS" 

query = "how to utilize a graph neural network (GNN) to encode structural infor-"\
"mation from neighboring nodes into a graph prompt?"

result = qa_chain({"query": query})
print(result['result'])

The text does not provide information about which of the two key strategies is not used in the first step of GPEFT.


### T5 model with llama embeddings

In [11]:

embeddings = (
    OllamaEmbeddings()
)
llm = CustomLLM(n=5)

def get_embeddings_from_pdf(pdf_folder_path, embeddings):
    documents = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(pdf_folder_path, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
            
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
    chunked_documents = text_splitter.split_documents(documents)
    faiss_index = FAISS.from_documents(chunked_documents, embeddings)
          
    return faiss_index
    
faiss_index = get_embeddings_from_pdf("./prof_1/", embeddings)
retriever = faiss_index.as_retriever()


# Create a prompt template
template = """You are an helpful assistant and you try to use the following information to answer the questions given at the end:  
Context: {context}

Question: {question}

Answer:"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

In [12]:

"T5 MODEL WITH LLAMA EMBEDDINGS"

query = "how to utilize a graph neural network (GNN) to encode structural infor-"\
"mation from neighboring nodes into a graph prompt?"

result = qa_chain({"query": query})
print(result['result'])

The article does not provide information about the year when the article was published.


### Llama 2 with T5 embeddings

In [13]:


llm = Ollama(model="llama2")
embeddings = T5Embeddings("MBZUAI/LaMini-T5-738M")

def get_embeddings_from_pdf(pdf_folder_path, embeddings):
    documents = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(pdf_folder_path, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
            
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
    chunked_documents = text_splitter.split_documents(documents)
    faiss_index = FAISS.from_documents(chunked_documents, embeddings)
          
    return faiss_index
    
faiss_index = get_embeddings_from_pdf("./prof_1/", embeddings)
retriever = faiss_index.as_retriever()


# Create a prompt template
template = """You are an helpful assistant and you try to use the following information to answer the questions given at the end:  
Context: {context}

Question: {question}

Answer:"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:

"LLAMA 2 7B with T5 embeddings" 

query = "how to utilize a graph neural network (GNN) to encode structural infor-"\
"mation from neighboring nodes into a graph prompt?"

result = qa_chain({"query": query})
print(result['result'])

1. Use of GNNs for Graph Prompt Encoding: The use of GNNs for encoding structural information from neighboring nodes into a graph prompt is a crucial aspect of SemPool. By incorporating the node features and neighborhood structures, GNNs can learn to encode the structural information effectively.
2. Multi-Layer Language Model (MLLM) Pre-training: To pre-train the language model, SemPool utilizes a multi-layer MLLM architecture that is fine-tuned for downstream tasks. The pre-training step enables the language model to learn high-level semantic representations of text data, which are then used for graph prompt encoding.
3. Fusion of Semantic Information: SemPool aggregates the semantic information from the whole graph into a single vector through a fusion module. This module combines the node features and neighborhood structures to generate a unified representation of the graph.
4. Efficient Representation Learning: To learn efficient representations, SemPool employs an efficient traini

### Llama 2 with llama embeddings

In [15]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

llm = Ollama(model="llama2")
embeddings = (
    OllamaEmbeddings()
)

def get_embeddings_from_pdf(pdf_folder_path, embeddings):
    documents = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(pdf_folder_path, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
            
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
    chunked_documents = text_splitter.split_documents(documents)
    faiss_index = FAISS.from_documents(chunked_documents, embeddings)
          
    return faiss_index
    
faiss_index = get_embeddings_from_pdf("./prof_1/", embeddings)
retriever = faiss_index.as_retriever()


# Create a prompt template
template = """You are an helpful assistant and you try to use the following information to answer the questions given at the end:  
Context: {context}

Question: {question}

Answer:"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

In [16]:

"LLAMA 2 7b with llama embeddings" 

query = "how to utilize a graph neural network (GNN) to encode structural infor-"\
"mation from neighboring nodes into a graph prompt?"

result = qa_chain({"query": query})
print(result['result'])

The passage discusses the effectiveness of large language models (LLMs) at graph representation learning, particularly with the GPEFT pre-training phase. The author provides several observations based on experiments conducted on two different domains:

1. Masked language models (e.g., Sentence-BERT) were popularly used for sequence representation, but large language models (e.g., PEFT-LLaMA) have shown improved performance over masked language models in graph representation learning.
2. LLMs yield more than 10% improvement over Sentence-BERT in terms of graph representation learning.

The author also discusses the limitations of current QA GNN-based approaches, which use external node embeddings to represent the nodes' information. The findings suggest that GNNs rely on the underlying graph statistics to discriminate between correct and incorrect answers.

To address these limitations, the author proposes a novel approach called InstructGLM-embeddings, which utilizes a graph neural net