**Import Necessary Libraries**


In [117]:
import os
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
from langchain_community.llms import HuggingFaceEndpoint

**Hugging Face Model Configuration**

In [None]:
hf_token = "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"
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token

model = HuggingFaceEndpoint(repo_id=model_name, temperature=0.01, max_new_tokens= 20)

**Load Data**

In [119]:
train_data = pd.read_csv("hw4-train-new.csv")
test_data = pd.read_csv("hw4-llm-test-part2-new.csv")


In [120]:

def evaluate(true_labels, predictions):
    precision = precision_score(true_labels, predictions, average='macro', zero_division=0)
    recall = recall_score(true_labels, predictions, average='macro', zero_division=0)
    f1 = f1_score(true_labels, predictions, average='macro', zero_division=0)

    # Print the evaluation results
    print(f"Precision: {precision * 100:.2f}")
    print(f"Recall: {recall * 100:.2f}")
    print(f"Macro F1: {f1 * 100:.2f}")
    
def rating(response):
    # 0 is "low-star" or unkown (defaulting to 0, since 0 is "low-star"), 1 is "high-star"
    response = response.strip().lower()
    if "high-star" in response:
        return 1
    elif "low-star" in response:
        return 0
    else:
        return 0 #default
    

In [None]:

# zero-shot classifier function
def zero_shot_classifier(review_text, model):
    prompt = f""" Task: Classify review as either 'high-star' or 'low-star'.
                Review to classify: "{review_text}"
            """
    
    
    #     prompt = f"""Classify review as either 'high-star' or 'low-star'.

    #     Example:
    #     Review: "{review_sample}"
    #     Classification: {'high-star' if classification_sample == 1 else 'low-star'}

    #     Now classify this review:
    #     Review: "{review_text}"

    #     Rules for classification:
    #     - high-star or 1: positive sentiment, customer satisfaction
    #     - low-star or 0: negative or neutral sentiment, customer dissatisfaction
    
    # """
    response = model(prompt)
    return (rating(response))


pred_zero_shot = []

for review_text in test_data['reviewText']:
    prediction = zero_shot_classifier(review_text, model)
    pred_zero_shot.append(prediction)

#zero-shot evaluation
print("Zero-Shot Evaluation Results:")
evaluate (test_data['label'].tolist(), pred_zero_shot )


In [None]:
def one_shot_classifier( review_text,model, train_data):
    
    example = train_data.sample(n= 1)
    example_review = example['reviewText'].values[0]
    #print(review_sample)
    #print("\n")
    example_label = example['label'].values[0]
    #print(classification_sample)
    
    prompt = (
        f"Task: Classify the following review as 'high-star' or 'low-star':\n"
        f"Example review: {example_review}\n"
        f"Example classification: {'high-star' if example_label == 1 else 'low-star'}\n\n"
        f"Now classify the following review: {review_text}"
    )   
    
    # prompt = f""" Task: Classify review as either 'high-star' or 'low-star'.

    #     This is an example of a review and its classification:
    #         Example:
    #         Review: "{review_sample}"
    #         Classification: {'high-star' if classification_sample == 1 else 'low-star'}

    #     Now classify this review:
    #     Review: "{review_text}"

    #     Rules for classification:
    #     - high-star or 1: positive sentiment, customer satisfaction
    #     - low-star or 0: negative or neutral sentiment, customer dissatisfaction
    
    # """
    
    response = model(prompt)

    return rating(response)


pred_one_shot =[]
for review_text in test_data['reviewText']:
    prediction = one_shot_classifier(review_text, model, train_data)
    pred_one_shot.append(prediction)


#one_shot evaluation
print("One-Shot Evaluation Results:") 

evaluate(test_data['label'].tolist() , pred_one_shot)


In [None]:
def few_shot_classifier(review_text, model, train_data):
    # Randomly sample n examples from the training data for few-shot context
    examples = train_data.sample(n=7)
    
#     examples = [
#     {'reviewText': "This product is amazing! It exceeded my expectations in every way.", 'label': 1},  # high-star
#     {'reviewText': "Great value for the price. The performance is top-notch, I highly recommend it.", 'label': 1},  # high-star
#     {'reviewText': "Worst purchase ever. The item arrived damaged and customer service was unhelpful.", 'label': 0},  # low-star
# ]

    prompt = "Classify the following review as 'high-star' or 'low-star':\n"
    prompt += "Here are a few examples (does not include the review to classify):\n\n"


    # for example in examples:
    #     review = example['reviewText']
    #     label = example['label']
    #     classification = 'high-star' if label == 1 else 'low-star'
    #     prompt += f"Example Review: {review}\n"
    #     prompt += f"Example Classification: {classification}\n"


    for review, label in zip(examples['reviewText'], examples['label']):
        classification = 'high-star' if label == 1 else 'low-star'
        prompt += f"Example Review: {review} -> Classification: {classification}\n"

    # Add the new review for classification
    prompt += f"Now classify this review: \n Review: {review_text} -> Classification: ?"
    
    response = model(prompt)
    return rating(response)

pred_few_shot = []

for review_text in test_data['reviewText']:
    prediction = few_shot_classifier(review_text, model, train_data)
    pred_few_shot.append(prediction)

# Few-shot evaluation
print("Few-Shot Evaluation Results:")
evaluate(test_data['label'].tolist(), pred_few_shot)