### Load and Prepare Data(Sephora Reviews)
- This is the same dataset I used in my assignment part1 & part2; 
- loading, preprocessing, and evaluating a base model before fine-tuning.

In [5]:
import numpy as np
import kagglehub
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os
import pandas as pd

In [6]:
# Downloading from the Kaggle 
path = kagglehub.dataset_download("nadyinky/sephora-products-and-skincare-reviews")
print("Path to dataset files:", path)

Path to dataset files: /nfs/home/vlr3588/.cache/kagglehub/datasets/nadyinky/sephora-products-and-skincare-reviews/versions/2


In [7]:
# Getting all the datasets from the Kaggle
dataset_dir = path 
review_files = [
    "reviews_0-250.csv",
    "reviews_250-500.csv",
    "reviews_500-750.csv",
    "reviews_750-1250.csv",
    "reviews_1250-end.csv"
]

review_dfs = [pd.read_csv(os.path.join(dataset_dir, file)) for file in review_files] # Loading and combine all review files into one DataFrame
reviews = pd.concat(review_dfs, ignore_index=True)

print("Total reviews loaded:", len(reviews))

  review_dfs = [pd.read_csv(os.path.join(dataset_dir, file)) for file in review_files] # Loading and combine all review files into one DataFrame
  review_dfs = [pd.read_csv(os.path.join(dataset_dir, file)) for file in review_files] # Loading and combine all review files into one DataFrame
  review_dfs = [pd.read_csv(os.path.join(dataset_dir, file)) for file in review_files] # Loading and combine all review files into one DataFrame


Total reviews loaded: 1094411


In [4]:
#data = reviews
#data = data.drop(columns=['Unnamed: 0'])

In [8]:
# Filtering Reviews After 2023
reviews['submission_time'] = pd.to_datetime(reviews['submission_time'])

# Filter reviews for the year 2023
filtered_reviews_2023 = reviews[(reviews['submission_time'].dt.year == 2023)]

print("Filtered reviews (2023):", len(filtered_reviews_2023))
print(filtered_reviews_2023.head()) 

# Save filtered reviews to a CSV file
filtered_reviews_2023.to_csv("Sephora_skincare_Reviews_2023.csv", index=False)
print("Filtered reviews saved to 'Sephora_skincare_Reviews_2023.csv'")

Filtered reviews (2023): 49531
   Unnamed: 0    author_id  rating  is_recommended  helpfulness  \
0           0   1741593524       5             1.0          1.0   
1           1  31423088263       1             0.0          NaN   
2           2   5061282401       5             1.0          NaN   
3           3   6083038851       5             1.0          NaN   
4           4  47056667835       5             1.0          NaN   

   total_feedback_count  total_neg_feedback_count  total_pos_feedback_count  \
0                     2                         0                         2   
1                     0                         0                         0   
2                     0                         0                         0   
3                     0                         0                         0   
4                     0                         0                         0   

  submission_time                                        review_text  \
0      2023-02-01  

In [9]:
data = pd.read_csv('Sephora_skincare_Reviews_2023.csv')
data = data.drop(columns=['Unnamed: 0'])

In [10]:
filtered_reviews_2023.shape

(49531, 19)

In [11]:
# Display column names
print("Columns in the dataset:", data.columns)

Columns in the dataset: Index(['author_id', 'rating', 'is_recommended', 'helpfulness',
       'total_feedback_count', 'total_neg_feedback_count',
       'total_pos_feedback_count', 'submission_time', 'review_text',
       'review_title', 'skin_tone', 'eye_color', 'skin_type', 'hair_color',
       'product_id', 'product_name', 'brand_name', 'price_usd'],
      dtype='object')


In [12]:
print(data[['review_text', 'rating']].head())

                                         review_text  rating
0  I use this with the Nudestix “Citrus Clean Bal...       5
1  I bought this lip mask after reading the revie...       1
2  My review title says it all! I get so excited ...       5
3  I’ve always loved this formula for a long time...       5
4  If you have dry cracked lips, this is a must h...       5


In [13]:
# Keep only review text and rating
data = data[['review_text', 'rating']]

# Drop missing values
data.dropna(inplace=True)

print("Cleaned dataset size:", len(data))

Cleaned dataset size: 49507


In [14]:
# Select GPU (adjust as per your cluster)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [15]:
# Define a function to map ratings to sentiment labels
def map_rating_to_sentiment(rating):
    if rating >= 4:
        return "Positive"
    elif rating == 3:
        return "Neutral"
    else:
        return "Negative"

# Apply function to create a new sentiment column
data['sentiment'] = data['rating'].apply(map_rating_to_sentiment)

# Drop rating column
data = data.drop(columns=['rating'])

# Show sentiment distribution
print(data['sentiment'].value_counts())

sentiment
Positive    40474
Negative     5492
Neutral      3541
Name: count, dtype: int64


### Converting Data into Question-Answer Format

In [16]:
def create_qa_pair(row):
    question = f"What is the sentiment of this review? '{row['review_text']}'"
    answer = row['sentiment']
    return {'question': question, 'answer': answer}

qa_data = data.apply(create_qa_pair, axis=1).tolist()


In [17]:
# Save the full dataset
with open("sentiment_dataset.json", "w") as f:
    json.dump(qa_data, f, indent=4)

## Test the base model on the test questions
- Selecting a small language model (≤1.7B parameters)
- flan-t5-small is a general-purpose instruction-tuned model and has not been specifically trained on Sephora product reviews or this particular type of sentiment task.
- Neutral sentiment is often the hardest class to detect in sentiment analysis, especially for general-purpose LLMs.

In [15]:
#!pip install torch transformers datasets


In [18]:
# Select first 15 questions from the dataset
test_questions = qa_data[:15]

# Save the test questions
with open("test_questions.json", "w") as f:
    json.dump(test_questions, f, indent=4)


In [19]:
# Load test questions
with open("test_questions.json", "r") as f:
    test_questions = json.load(f)

# Load base model and tokenizer
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Build the pipeline
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Evaluate the base model on the test set
for sample in test_questions:
    question = sample["question"]
    true_answer = sample["answer"]

    response = qa_pipeline(question, max_length=10)[0]['generated_text']

    print(f"Question: {question}")
    print(f"True Sentiment: {true_answer}")
    print(f"Base Model Answer: {response}\n")




Question: What is the sentiment of this review? 'I use this with the Nudestix “Citrus Clean Balm & Make-Up Melt“ to double cleanse and it has completely changed my skin (for the better). The make-up melt is oil based and removes all of your makeup super easily. I follow-up with this water based cleanser, and I also use this just by itself when I’m not wearing make-up. It leaves the skin gently cleansed, but without stripping the skin. 10/10 recommend combining with the make-up melt. It’s perfection!'
True Sentiment: Positive
Base Model Answer: this review.! this review

Question: What is the sentiment of this review? 'I bought this lip mask after reading the reviews and the hype. Unfortunately, it did not meet my expectations as vaseline petroleum jelly works way better for me.'
True Sentiment: Negative
Base Model Answer: this review??? What

Question: What is the sentiment of this review? 'My review title says it all! I get so excited to get into bed and apply this lip mask. I do see 

### Conclusion from the above model :
- My base t5-small model (without any fine-tuning) is not able to perform the sentiment classification task effectively.
- The base t5-small model is not trained on the sentiment classification task.

## LoRA Fine-tuning with PEFT

In [9]:
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
#device = "cuda"
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "4"


In [20]:
with open("sentiment_dataset.json", "r") as f:
    train_data = json.load(f)

### Tokenize the dataset

In [21]:
from datasets import Dataset
from transformers import AutoTokenizer

# Load dataset
train_dataset = Dataset.from_list(train_data)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-small")

# Tokenization function
def tokenize_function(example):
    model_inputs = tokenizer(
        example["question"],
        max_length=256,
        padding="max_length",
        truncation=True,
    )
    labels = tokenizer(
        example["answer"],
        max_length=5,
        padding="max_length",
        truncation=True,
    )
    labels["input_ids"] = [
        (token if token != tokenizer.pad_token_id else -100)
        for token in labels["input_ids"]
    ]
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply tokenization
tokenized_dataset = train_dataset.map(tokenize_function, batched=False)


Map:   0%|          | 0/49507 [00:00<?, ? examples/s]

In [11]:
import os

# Force uninstall PEFT and Transformers
os.system("pip uninstall -y peft transformers")

# Clean cache
os.system("pip cache purge")

# Remove lingering folders manually
os.system('rm -rf ~/.local/lib/python3.9/site-packages/peft*')
os.system('rm -rf ~/.local/lib/python3.9/site-packages/transformers*')


Found existing installation: peft 0.8.2
Uninstalling peft-0.8.2:
  Successfully uninstalled peft-0.8.2
Found existing installation: transformers 4.36.2
Uninstalling transformers-4.36.2:
  Successfully uninstalled transformers-4.36.2
Files removed: 12 (8.6 MB)


0

In [12]:
os.system('ls ~/.local/lib/python3.9/site-packages/ | grep peft')
os.system('ls ~/.local/lib/python3.9/site-packages/ | grep transformers')


256

In [28]:
# os.system("pip install transformers==4.36.2 peft==0.8.2")

In [1]:
import transformers
import peft

print(transformers.__version__)  # should be 4.36.2
print(peft.__version__)          # should be 0.8.2


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


4.36.2
0.8.2


### Apply LoRA with PEFT

In [22]:
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q", "v", "k", "o"],
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_2_SEQ_LM",
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 589,824 || all params: 61,096,448 || trainable%: 0.9653981848502878


### Train the model

In [23]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq

training_args = Seq2SeqTrainingArguments(
    output_dir="./lora-t5-small-sentiment",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    learning_rate=5e-4,
    logging_steps=50,
    save_strategy="epoch",
    fp16=True,
    report_to="none",
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer),
)

trainer.train()


You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
50,2.454
100,0.2814
150,0.2181
200,0.1871
250,0.1648
300,0.1504
350,0.1436
400,0.1551
450,0.1471
500,0.1474




TrainOutput(global_step=3714, training_loss=0.1506210140599059, metrics={'train_runtime': 3147.1893, 'train_samples_per_second': 47.192, 'train_steps_per_second': 1.18, 'total_flos': 1.01851053686784e+16, 'train_loss': 0.1506210140599059, 'epoch': 3.0})

### Save your fine-tuned model:

In [24]:
model.save_pretrained("./lora-t5-small-sentiment")
tokenizer.save_pretrained("./lora-t5-small-sentiment")



('./lora-t5-small-sentiment/tokenizer_config.json',
 './lora-t5-small-sentiment/special_tokens_map.json',
 './lora-t5-small-sentiment/tokenizer.json')

### Evaluate the fine-tuned model:

In [25]:
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained("./lora-t5-small-sentiment")
tokenizer = AutoTokenizer.from_pretrained("./lora-t5-small-sentiment")
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

for sample in test_questions:
    question = sample["question"]
    true_answer = sample["answer"]

    response = qa_pipeline(question, max_length=10)[0]['generated_text']

    print(f"Question: {question}")
    print(f"True Sentiment: {true_answer}")
    print(f"Fine-tuned Model Answer: {response}\n")


Question: What is the sentiment of this review? 'I use this with the Nudestix “Citrus Clean Balm & Make-Up Melt“ to double cleanse and it has completely changed my skin (for the better). The make-up melt is oil based and removes all of your makeup super easily. I follow-up with this water based cleanser, and I also use this just by itself when I’m not wearing make-up. It leaves the skin gently cleansed, but without stripping the skin. 10/10 recommend combining with the make-up melt. It’s perfection!'
True Sentiment: Positive
Fine-tuned Model Answer: Positive

Question: What is the sentiment of this review? 'I bought this lip mask after reading the reviews and the hype. Unfortunately, it did not meet my expectations as vaseline petroleum jelly works way better for me.'
True Sentiment: Negative
Fine-tuned Model Answer: Negative

Question: What is the sentiment of this review? 'My review title says it all! I get so excited to get into bed and apply this lip mask. I do see a difference bec

## Conclusion

- In this assignment, I worked on sentiment classification using Sephora reviews, focusing on predicting Positive, Negative, and Neutral sentiments.
- I first evaluated the base T5-small model, which struggled to provide meaningful predictions. To improve performance, I applied LoRA fine-tuning with PEFT, which gave noticeably better results, especially for Positive and Negative sentiments.
- However, the model still struggled with Neutral sentiment prediction due to the class imbalance in the dataset, where Neutral examples are underrepresented.
- Since LoRA fine-tunes only a small subset of the model's parameters, it is more sensitive to the quality and balance of the training data. Without enough Neutral examples, the model couldn't learn to predict them well.

## Future Work
- Balance the dataset to include sufficient Neutral examples and re-run LoRA fine-tuning to check if Neutral predictions improve.
- Try other techniques like class-weighted loss functions or data augmentation to handle the imbalance.
- Explore more advanced fine-tuning strategies to further boost performance across all sentiment classes.



## UnitTest

In [27]:
import json
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
from peft import LoraConfig, get_peft_model

# Tiny dataset
test_data = [
    {"question": "What is the sentiment of this review? 'It works well.'", "answer": "Positive"},
    {"question": "What is the sentiment of this review? 'It is okay.'", "answer": "Neutral"},
]

dataset = Dataset.from_list(test_data)
tokenizer = AutoTokenizer.from_pretrained("t5-small")

def tokenize(example):
    inputs = tokenizer(example["question"], max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(example["answer"], max_length=5, padding="max_length", truncation=True)
    labels["input_ids"] = [(id if id != tokenizer.pad_token_id else -100) for id in labels["input_ids"]]
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized_dataset = dataset.map(tokenize)

model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
lora_config = LoraConfig(
    r=4, lora_alpha=16, target_modules=["q", "v", "k", "o"],
    lora_dropout=0.1, bias="none", task_type="SEQ_2_SEQ_LM",
)
model = get_peft_model(model, lora_config)

training_args = Seq2SeqTrainingArguments(
    output_dir="./unit_test_output",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    learning_rate=1e-4,
    logging_dir="./unit_test_logs",
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()
print("Unit test complete!")


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Step,Training Loss


Unit test complete!


## Compare Base vs. Fine-Tuned Model

In [29]:
import json
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Load test questions
with open("test_questions.json", "r") as f:
    test_questions = json.load(f)

# Load base model and tokenizer
base_model_name = "t5-small"
base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
base_pipeline = pipeline("text2text-generation", model=base_model, tokenizer=base_tokenizer)

# Load fine-tuned model and tokenizer
finetuned_model_path = "./lora-t5-small-sentiment"
finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_model_path)
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained(finetuned_model_path)
finetuned_pipeline = pipeline("text2text-generation", model=finetuned_model, tokenizer=finetuned_tokenizer)

# Compare outputs
for sample in test_questions:
    question = sample["question"]
    true_answer = sample["answer"]

    base_response = base_pipeline(question, max_length=10)[0]['generated_text']
    finetuned_response = finetuned_pipeline(question, max_length=10)[0]['generated_text']

    print(f"Question: {question}")
    print(f"True Sentiment: {true_answer}")
    print(f"Base Model Answer: {base_response}")
    print(f"Fine-tuned Model Answer: {finetuned_response}\n")


Question: What is the sentiment of this review? 'I use this with the Nudestix “Citrus Clean Balm & Make-Up Melt“ to double cleanse and it has completely changed my skin (for the better). The make-up melt is oil based and removes all of your makeup super easily. I follow-up with this water based cleanser, and I also use this just by itself when I’m not wearing make-up. It leaves the skin gently cleansed, but without stripping the skin. 10/10 recommend combining with the make-up melt. It’s perfection!'
True Sentiment: Positive
Base Model Answer: this review.! this review
Fine-tuned Model Answer: Positive

Question: What is the sentiment of this review? 'I bought this lip mask after reading the reviews and the hype. Unfortunately, it did not meet my expectations as vaseline petroleum jelly works way better for me.'
True Sentiment: Negative
Base Model Answer: this review??? What
Fine-tuned Model Answer: Negative

Question: What is the sentiment of this review? 'My review title says it all!

 ## Testing generalization by using new, unseen reviews that are not part of your training or test sets. 

In [30]:
# Example of new, unseen reviews for generalization testing
new_reviews = [
    "The product is okay, not bad but nothing special. I wouldn't repurchase.",
    "Terrible experience! It caused a rash after just one use.",
    "Wow! This exceeded my expectations. Smooth application and amazing scent.",
    "It's decent, but I found it a bit overpriced for the results.",
    "I expected more. It's fine, but didn't really do what it promised."
]

print("\n--- Generalization Test on New, Unseen Reviews ---\n")
for review in new_reviews:
    question = f"What is the sentiment of this review? '{review}'"
    response = qa_pipeline(question, max_length=10)[0]['generated_text']
    print(f"Review: {review}")
    print(f"Predicted Sentiment: {response}\n")



--- Generalization Test on New, Unseen Reviews ---

Review: The product is okay, not bad but nothing special. I wouldn't repurchase.
Predicted Sentiment: Negative

Review: Terrible experience! It caused a rash after just one use.
Predicted Sentiment: Negative

Review: Wow! This exceeded my expectations. Smooth application and amazing scent.
Predicted Sentiment: Positive

Review: It's decent, but I found it a bit overpriced for the results.
Predicted Sentiment: Neutral

Review: I expected more. It's fine, but didn't really do what it promised.
Predicted Sentiment: Negative



In [31]:
# New, unseen reviews with their true (expected) sentiments
new_reviews = [
    {"review": "The product is okay, not bad but nothing special. I wouldn't repurchase.", "true_sentiment": "Neutral"},
    {"review": "Terrible experience! It caused a rash after just one use.", "true_sentiment": "Negative"},
    {"review": "Wow! This exceeded my expectations. Smooth application and amazing scent.", "true_sentiment": "Positive"},
    {"review": "It's decent, but I found it a bit overpriced for the results.", "true_sentiment": "Neutral"},
    {"review": "I expected more. It's fine, but didn't really do what it promised.", "true_sentiment": "Neutral"}
]

print("\n--- Generalization Test on New, Unseen Reviews ---\n")
for sample in new_reviews:
    review = sample["review"]
    true_sentiment = sample["true_sentiment"]

    question = f"What is the sentiment of this review? '{review}'"
    response = qa_pipeline(question, max_length=10)[0]['generated_text']

    print(f"Review: {review}")
    print(f"True Sentiment: {true_sentiment}")
    print(f"Predicted Sentiment: {response}\n")



--- Generalization Test on New, Unseen Reviews ---

Review: The product is okay, not bad but nothing special. I wouldn't repurchase.
True Sentiment: Neutral
Predicted Sentiment: Negative

Review: Terrible experience! It caused a rash after just one use.
True Sentiment: Negative
Predicted Sentiment: Negative

Review: Wow! This exceeded my expectations. Smooth application and amazing scent.
True Sentiment: Positive
Predicted Sentiment: Positive

Review: It's decent, but I found it a bit overpriced for the results.
True Sentiment: Neutral
Predicted Sentiment: Neutral

Review: I expected more. It's fine, but didn't really do what it promised.
True Sentiment: Neutral
Predicted Sentiment: Negative



In [32]:
required_packages = [
    "torch==2.2.0",
    "transformers==4.36.2",
    "peft==0.8.2",
    "datasets==2.16.1",
    "accelerate==0.26.1",
    "safetensors==0.5.3",
    "numpy==1.24.3",
    "pandas==2.2.1",
    "tqdm==4.67.1",
    "scikit-learn==1.4.1.post1",
    "kagglehub==0.1.5"  # Remove if not used
]

with open("requirements.txt", "w") as f:
    f.write("\n".join(required_packages))

print("requirements.txt generated!")


requirements.txt generated!
