In [None]:
!pip install langchain_community langchain transformers faiss-cpu

Collecting langchain_community
  Downloading langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain
  Downloading langchain-0.3.2-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain-core<0.4.0,>=0.3.6 (from langchain_community)
  Downloading langchain_core-0.3.9-py3-none-any.whl.metadata (6.3 kB)
Collecting langsmith<0.2.0,>=0.1.125 (from langchain_community)
  Downloading langsmith-0.1.131-py3-none-any.whl.metadata (13 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.5.2-py3-none-any.whl.metadata (3.5 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain_community)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1

Renaming columns to help the model interpret the data.

In [None]:
import pandas as pd
df =pd.read_csv('rev2.csv')
df.rename(columns={'title':'product_name','text': 'product_review'}, inplace=True)

In [None]:
df.to_csv('rev3.csv',index=False)

##RAG approach with GPT2

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from pathlib import Path
import os

In [None]:
from transformers import GenerationConfig
generation_config = GenerationConfig(
    max_new_tokens=300, do_sample=True, temperature =0.6 # temperature greater than 0.6 was resulting in hallucinations.
)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")



In [None]:
# Setting up the HuggingFacePipeline
llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer,generation_config=generation_config)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

In [None]:
loader = CSVLoader(file_path='rev2.csv')
docs = loader.load_and_split()

In [None]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

# Create FAISS vector store
from langchain.embeddings import SentenceTransformerEmbeddings # Import SentenceTransformerEmbeddings
embedding_model = SentenceTransformerEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

#embeddings = embedding_model
index = faiss.IndexFlatL2(len(embedding_model.embed_query(" ")))
vector_store = FAISS(
    embedding_function=embedding_model,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

  embedding_model = SentenceTransformerEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [None]:
vector_store.add_documents(documents=docs)

['6390cce3-8caf-4174-ae72-6d43396ee11a',
 'fcd08a45-cb3e-4b89-bb50-f93661fb440c',
 '4a3ea3cf-be68-47f0-8c50-e7dc626d930d',
 '8eb9aef9-0ab5-43f1-83d2-ed7b84f835c1',
 'b3cc1b35-6ec2-47b7-905f-fafcdd7fb1ed',
 'da686902-b513-484e-b544-b94c6f5f9c92',
 '2413cbdd-e6fa-426b-ab50-8b742c5105c0',
 '642ab7b1-069c-40e2-b18e-49b65e1cc84d',
 '4962887f-d9df-4acd-8c7a-d981a0fa26c9',
 'e53c1e5d-d002-4c00-b5a2-1196a885e90a',
 'b66570c4-4d18-4840-b5bc-6f48ae6bec35',
 '7438209f-e94b-48cd-964e-4ca4c03125c7',
 '598067f0-0bbb-45a6-9d81-35b10bab58b7',
 '46d66848-8456-4209-8e47-7a34ec1401ac',
 'ef5f684d-024b-4886-83b0-bafaef4a9e0d',
 '019a73b0-aeb6-44da-8b68-a895ffd224a4',
 '83d067e7-e760-4dcd-ac18-5c3f28e0f6de',
 '11fd037f-9561-49b1-88c5-6af750b1c4e4',
 '0def4018-4ca4-4442-96a8-b7ae85f13d70',
 'f6c29964-9415-4cb8-9769-ac1afb0fb474',
 '40e99735-b0bc-415d-a9fe-535945d8482e',
 'bc6c2a09-e981-4906-8d58-c20ba132a460',
 '4550a1ca-ad0a-4447-8167-53e2e9b01261',
 '9af4fec6-92e4-4514-8996-2e4029bc5d81',
 'cfd6c653-dad4-

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

retriever = vector_store.as_retriever()
#instructions =
# Set up system prompt
#Based on this, write a new product review:
#Query: {query}
# Detailed instructions for creating new product reviews
system_prompt = (
"""
You are an expert product reviewer and creative writer. Your task is to write new product reviews that matches the style, tone, and sentiment of the retrieved product reviews. Pay close attention to the following:

1. **Tone**: Observe the tone of the retrieved reviews. If the reviews are positive, maintain an enthusiastic and appreciative tone. If they are negative, reflect a critical yet constructive tone. Keep the tone consistent throughout the new reviews.

2. **Style**: Match the writing style of the retrieved reviews. If the reviews are informal, use casual language, contractions, and personal experiences. If they are formal, maintain a professional and structured approach. Pay attention to sentence length, use of adjectives, and whether the reviewers focus on features or personal opinions.

3. **Content**: Include common themes or topics mentioned in the retrieved reviews, such as specific features of the product (e.g., design, quality, functionality). Highlight these aspects in a similar way, adding some creative details or hypothetical experiences to make the reviews feel unique yet aligned with previous reviews.

4. **Length**: Make sure the length of the new reviews are similar to the retrieved reviews. If the retrieved reviews are brief and to the point, avoid writing lengthy reviews. Conversely, if they are more detailed, elaborate accordingly.

5. **Personalization**: Include hypothetical personal experiences or insights, as seen in the retrieved reviews, to make the new reviews feel authentic. However, do not simply copy existing reviews—create original content while using the retrieved reviews as inspiration.

Write different well-crafted product reviews that follow these guidelines and are indistinguishable from real reviews.
Only provide the **new product review**. Do not include the prompt, query, or retrieved reviews in your response. Focus purely on writing the review.

Here are the retrieved reviews:

Retrieved Reviews:
{context}


"""


)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),

])

# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
answer= rag_chain.invoke({"input": "Write new reviews for ACTIF Bariatric Probiotic Maximum Strength"})
answer['answer']

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'System: \nYou are an expert product reviewer and creative writer. Your task is to write new product reviews that matches the style, tone, and sentiment of the retrieved product reviews. Pay close attention to the following:\n\n1. **Tone**: Observe the tone of the retrieved reviews. If the reviews are positive, maintain an enthusiastic and appreciative tone. If they are negative, reflect a critical yet constructive tone. Keep the tone consistent throughout the new reviews.\n   \n2. **Style**: Match the writing style of the retrieved reviews. If the reviews are informal, use casual language, contractions, and personal experiences. If they are formal, maintain a professional and structured approach. Pay attention to sentence length, use of adjectives, and whether the reviewers focus on features or personal opinions.\n\n3. **Content**: Include common themes or topics mentioned in the retrieved reviews, such as specific features of the product (e.g., design, quality, functionality). Highli

##RAG with Instruct model

In [None]:
!pip install sentence_transformers langchain_community langchain transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.1.1


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import torch
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores.faiss import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from pathlib import Path

# Load CSV data
csv_loader = CSVLoader(file_path=Path('rev2.csv'))

documents = csv_loader.load()






In [None]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [None]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.1.0-py3-none-any.whl.metadata (1.3 kB)
Downloading langchain_huggingface-0.1.0-py3-none-any.whl (20 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.1.0


In [None]:
# Create FAISS vector store
#from langchain.embeddings import SentenceTransformerEmbeddings # Import SentenceTransformerEmbeddings
#embedding_model = SentenceTransformerEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2').to('cuda')

from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs = {'device': 'cuda'})
faiss_index = FAISS.from_documents(documents,embedding_model)

# Retriever setup
#retriever = DensePassageRetriever(embedding_model=embedding_model, vector_store=faiss_index)
retriever = faiss_index.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.7})

# Hugging Face LLM for response generation
#tokenizer = AutoTokenizer.from_pretrained("gpt2")
#model = AutoModelForCausalLM.from_pretrained("gpt2").to('cuda')
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct").to('cuda')
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [None]:
from transformers import GenerationConfig

#model = AutoModelForCausalLM.from_pretrained("my_account/my_model")
generation_config = GenerationConfig(
    max_length= 300
)


In [None]:
# Setting up the HuggingFacePipeline
#generation_config=generation_config,
llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer,device=0,generation_config=generation_config)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

In [None]:
"""
You are an expert product reviewer and creative writer. Your task is to write new product reviews that matches the style, tone, and sentiment of the retrieved product reviews. Pay close attention to the following:

1. **Tone**: Observe the tone of the retrieved reviews. If the reviews are positive, maintain an enthusiastic and appreciative tone. If they are negative, reflect a critical yet constructive tone. Keep the tone consistent throughout the new reviews.

2. **Style**: Match the writing style of the retrieved reviews. If the reviews are informal, use casual language, contractions, and personal experiences. If they are formal, maintain a professional and structured approach. Pay attention to sentence length, use of adjectives, and whether the reviewers focus on features or personal opinions.

3. **Content**: Include common themes or topics mentioned in the retrieved reviews, such as specific features of the product (e.g., design, quality, functionality). Highlight these aspects in a similar way, adding some creative details or hypothetical experiences to make the reviews feel unique yet aligned with previous reviews.

4. **Length**: Make sure the length of the new reviews are similar to the retrieved reviews. If the retrieved reviews are brief and to the point, avoid writing lengthy reviews. Conversely, if they are more detailed, elaborate accordingly.

5. **Personalization**: Include hypothetical personal experiences or insights, as seen in the retrieved reviews, to make the new reviews feel authentic. However, do not simply copy existing reviews—create original content while using the retrieved reviews as inspiration.

Write 5 different well-crafted product reviews that follow these guidelines and are indistinguishable from real reviews.
Only provide the **new product review**. Do not include the prompt, query, or retrieved reviews in your response. Focus purely on writing the review.


Here is the query for new reviews and the retrieved reviews:

"""


In [None]:
#The generated reviews should be similar in style and tone to the provided reviews
# Generate response using the LLM

# Query and retrieved documents
#query = "Write a review for the product: Wireless Bluetooth Earbuds"


# Now you can query the retriever and use the LLM to generate a response
query = """ Write a review for product_name: ACTIF Bariatric Probiotic Maximum Strength with 20 Billion CFU, Immunity and Gut Support, for Bariatric and Gastric Bypass Surgery """
#query_embedding = embedding_model.encode(query)
#relevant_docs = retriever.get_relevant_documents(query_embedding)
relevant_docs = retriever.get_relevant_documents(query)
d = []
for doc in relevant_docs:
  t = doc.page_content
  x =t[t.index('text:')+5:]
  d.append(x)




instruction = """
You are an expert product reviewer and creative writer.
Your task is to write new product reviews for the given product that matches the style, tone, and sentiment of the retrieved list of product reviews.
However, do not simply copy existing reviews—create original content while using the retrieved reviews as inspiration.
Here is the query for new reviews and the retrieved reviews:

"""
#
#Query: {query}
#Retrieved Reviews:
#{d}

# Combine the query and instructions into a formatted prompt
formatted_prompt = f"{instruction}\nQuery: {query}\nRetrieved Reviews: {d}"


response = llm(formatted_prompt)
print(response)


You are an expert product reviewer and creative writer. 
Your task is to write new product reviews for the given product that matches the style, tone, and sentiment of the retrieved list of product reviews. 
However, do not simply copy existing reviews—create original content while using the retrieved reviews as inspiration.
Here is the query for new reviews and the retrieved reviews:


Query:  Write a review for product_name: ACTIF Bariatric Probiotic Maximum Strength with 20 Billion CFU, Immunity and Gut Support, for Bariatric and Gastric Bypass Surgery 
Retrieved Reviews: [' Great Price!', ' I buy these every month and very happy with them', ' I order these supplements from Amazon because they are always available, and because I get the best prices on Amazon. And the delivery service is great. They are a really great product for women. Provides more energy when needed, and I sleep better at night. I feel better about my overall health. I encourage women to try it for themselves', '

###Results from both the models have been less than satisfactory.