In [1]:
%%capture
!pip install --upgrade langchain transformers faiss-cpu sentence-transformers langchain_community langchain-huggingface datasets

# **1- Import necessary modules**

In [2]:
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_huggingface import HuggingFacePipeline
from langchain.chains import LLMChain

import warnings
warnings.filterwarnings('ignore')

# **2- Loading Data**

In [3]:
data = ['Artificial intelligence (AI) is rapidly transforming industries, making AI skills some of the most sought-after in the global job market. AI encompasses technologies and methodologies that allow machines to mimic human intelligence, enabling them to perform tasks like decision-making, speech recognition, and visual perception. The demand for AI skills is skyrocketing as businesses leverage these technologies to streamline operations, reduce costs, and gain competitive advantages.'
'AI skills are required across various industries, from healthcare and finance to retail and technology. These skills are essential for building intelligent systems that can analyze data, automate processes, and generate insights. As industries race to adopt AI-driven solutions, professionals with a solid understanding of AI technologies like machine learning, natural language processing (NLP), and computer vision are in high demand .']

# **3- Document Transformers**

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.create_documents(data)

# **4- Text Embedding**

In [5]:
def setup_embeddings():
    print(">> setup_embeddings")
    model_name = "sentence-transformers/all-mpnet-base-v2"
    return HuggingFaceEmbeddings(model_name=model_name)

In [6]:
embeddings = setup_embeddings()

>> setup_embeddings


# **5- Vector Stores**

In [7]:
db = FAISS.from_documents(docs, embeddings)

In [8]:
question = "What is artificial intelligence?"
searchDocs = db.similarity_search(question)
print(searchDocs[0].page_content)

Artificial intelligence (AI) is rapidly transforming industries, making AI skills some of the most sought-after in the global job market. AI encompasses technologies and methodologies that allow machines to mimic human intelligence, enabling them to perform tasks like decision-making, speech recognition, and visual perception. The demand for AI skills is skyrocketing as businesses leverage these technologies to streamline operations, reduce costs, and gain competitive advantages.AI skills are required across various industries, from healthcare and finance to retail and technology. These skills are essential for building intelligent systems that can analyze data, automate processes, and generate insights. As industries race to adopt AI-driven solutions, professionals with a solid understanding of AI technologies like machine learning, natural language processing (NLP), and computer vision are in high demand .


# **6- Preparing the LLM Model**

In [9]:
def getFlan():
    model_id = "google/flan-t5-large"  # Use the correct model ID
    print(f">> getLlama {model_id}")
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

        # Ensure the tokenizer uses the correct padding token
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.model_max_length = 512  # Set a reasonable max length for the model

        text_generation_pipeline = pipeline(
            "text2text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,  # Set max_new_tokens to control the length of the generated text
            truncation=True,  # Explicitly set truncation
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
        )
        llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
        return llm
    except Exception as e:
        print(f"An error occurred while loading the model: {e}")
        return None

In [10]:
llm = getFlan()

>> getLlama google/flan-t5-large


Device set to use cpu


# **7- Define a prompt template for generating responses**

In [11]:
prompt_sample = """
    I am a helpful AI that answers questions.
    When I don't know the answer I say I don't know.
    I know context: {context}
    when asked: {question}
    my response using only information in the context is:
"""
prompt = PromptTemplate(template=prompt_sample, input_variables=["context", "question"])

# **8- Create an LLM chain using the prompt template and language model**

In [12]:
if llm is not None:

    llm_chain = LLMChain(prompt=prompt, llm=llm)
    try:
        question = "What is artificial intelligence?"

        similar_doc = db.similarity_search(question)
        doc_context = similar_doc[0].page_content

        # Generate response
        response = llm_chain.invoke({"context": doc_context, "question": question})

        print(response["text"])
    except Exception as e:
        print(f"An error occurred duringor response generation: {e}")
else:
    print("Failed to create the language model pipeline.")

AI encompasses technologies and methodologies that allow machines to mimic human intelligence
