In [None]:
!pip install datasets

In [None]:
!pip install --upgrade transformers

In [None]:
!pip install sentencepiece

In [None]:
!pip install protobuf

In [1]:
import os
import torch
import pandas as pd
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import classification_report
from tqdm import tqdm

In [2]:

model_name = 'meta-llama/Llama-3.1-8B'
token = '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'
single_precision = True
gpu_id = 0
classes = ['negative', 'positive']
class_labels = {0: "negative", 1: "positive"}

# Updated prompt templates
prompts = [
    "Given the following text, does the sentiment lean more towards being positive or negative? Analyze the text carefully before answering. \nExample: \nText: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' \nSentiment: Negative \nNow analyze the following text: \nText: {} \nSentiment:",
    "What is the emotional sentiment conveyed by the following text? Indicate if it reflects a positive or negative sentiment. \nExample: \nText: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' \nSentiment: Negative \nNow analyze the following text: \nText: {}\nSentiment:",
    "Is the sentiment in this text generally favorable or unfavorable? Please provide your answer based on the tone of the text. \nExample: \nText: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' \nSentiment: Negative \nNow analyze the following text: \nText: {}\nSentiment:",
    "Does the following sentence express positive or negative opinion? \nExample: \nText: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' \nSentiment: Negative \nNow analyze the following text: \nText: {}\nSentiment:",
    "Classify the sentiment of the following sentence as either positive or negative. \nExample: \nText: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' \nSentiment: Negative \nNow analyze the following text: \nText: {}\nSentiment:"
]

In [3]:
# Set device and seed
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.cuda.set_device(gpu_id)
device = torch.device(f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu')
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

# Load model and tokenizer
def load_model_tokenizer(model_name, single_precision, token):
    model = LlamaForCausalLM.from_pretrained(model_name,
                                             cache_dir="cache/",
                                             torch_dtype=torch.float16 if single_precision else torch.float32,
                                             use_auth_token=token)
    tokenizer = AutoTokenizer.from_pretrained(model_name,
                                              cache_dir="cache/",
                                              use_auth_token=token,
                                              padding_side="left")
    tokenizer.add_special_tokens({'pad_token': '<PAD>'})
    model.resize_token_embeddings(len(tokenizer))
    model.config.pad_token_id = tokenizer.pad_token_id
    return model, tokenizer

model, tokenizer = load_model_tokenizer(model_name, single_precision, token)
model.to(device)
model.eval()

# Get the token indices for the class labels "positive" and "negative"
class_idx = {
    0: tokenizer.encode("negative", add_special_tokens=False)[0],
    1: tokenizer.encode("positive", add_special_tokens=False)[0]
}




config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [4]:
test_file_path = "data/test.tsv"

# Read the TSV file with the correct delimiter
test_data = pd.read_csv(test_file_path, sep='\t')

# Check the data structure
print(test_data.head())

   label                                            content
0      0     no movement , no yuks , not much of anything .
1      0  a gob of drivel so sickly sweet , even the eag...
2      0  gangs of new york is an unapologetic mess , wh...
3      0  we never really feel involved with the story ,...
4      1            this is one of polanski 's best films .


In [5]:
dev_file_path = "data/dev.tsv"

# Read the TSV file with the correct delimiter
dev_data = pd.read_csv(dev_file_path, sep='\t')

# Check the data structure
print(dev_data.head())

   label                                            content
0      0                       one long string of cliches .
1      0  if you 've ever entertained the notion of doin...
2      0  k-19 exploits our substantial collective fear ...
3      0  it 's played in the most straight-faced fashio...
4      1  there is a fabric of complex ideas here , and ...


In [6]:
train_file_path = "data/train.tsv"

# Read the TSV file with the correct delimiter
train_data = pd.read_csv(train_file_path, sep='\t')

# Check the data structure
print(train_data.head())

   label                                           sentence
0      0        hide new secretions from the parental units
1      0                contains no wit , only labored gags
2      1  that loves its characters and communicates som...
3      0  remains utterly satisfied to remain the same t...
4      0  on the worst revenge-of-the-nerds clichés the ...


In [7]:
def classify_Mexample(sentence,label, prompt_template, maps, curr_prompt, curr_sentence):
    # Format the prompt with the review text

    prompt_text = prompt_template.format(sentence)

    # Encode the prompt and truncate to fit model's max length
    inputs = tokenizer(prompt_text, return_tensors="pt", padding='longest', truncation=True).to(device)
    with torch.no_grad():
        logits = model(**inputs).logits

    # Extract the logits for the last token and apply softmax for binary classification
    last_token_logits = logits[:, -1, [class_idx[0], class_idx[1]]]
    probs = torch.softmax(last_token_logits, dim=-1)

    # Get predicted class (0 = negative, 1 = positive)
    predicted_class = torch.argmax(probs, dim=-1).item()
    if predicted_class == label:
        if maps[curr_sentence]['confidence'] < abs(probs[0][predicted_class].item() - 0.5):
            maps[curr_sentence]['confidence'] = abs(probs[0][predicted_class].item() - 0.5)
            maps[curr_sentence]['prompt'] = curr_prompt
    else :
        if maps[curr_sentence]['prompt'] == -1:
            maps[curr_sentence]['prompt'] = curr_prompt
            maps[curr_sentence]['confidence'] = 0

In [8]:
maps = {key: {"confidence": 0, "prompt": -1} for key in range(len(train_data["sentence"]))}
curr_prompt = 0
for prompt_template in prompts:
    print(f"Evaluating using prompt: {prompt_template}")

    # all_preds = []
    # all_labels = train_data["label"]  # Ground truth labels
    curr_sentence = 0
    
    for sentence in tqdm(train_data["sentence"]):
        classify_Mexample(sentence,train_data["label"][curr_sentence], prompt_template, maps, curr_prompt, curr_sentence) 
        curr_sentence += 1 
        # all_preds.append(pred)
    curr_prompt += 1 
    # print("Evaluation Metrics for the current prompt:")
    # print(classification_report(all_labels, all_preds, target_names=["negative", "positive"]))
    # print("\n" + "="*50 + "\n")


Evaluating using prompt: Given the following text, does the sentiment lean more towards being positive or negative? Analyze the text carefully before answering. 
Example: 
Text: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' 
Sentiment: Negative 
Now analyze the following text: 
Text: {} 
Sentiment:


100%|██████████| 67349/67349 [53:45<00:00, 20.88it/s]


Evaluating using prompt: What is the emotional sentiment conveyed by the following text? Indicate if it reflects a positive or negative sentiment. 
Example: 
Text: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' 
Sentiment: Negative 
Now analyze the following text: 
Text: {}
Sentiment:


100%|██████████| 67349/67349 [54:07<00:00, 20.74it/s]


Evaluating using prompt: Is the sentiment in this text generally favorable or unfavorable? Please provide your answer based on the tone of the text. 
Example: 
Text: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' 
Sentiment: Negative 
Now analyze the following text: 
Text: {}
Sentiment:


100%|██████████| 67349/67349 [54:14<00:00, 20.69it/s]


Evaluating using prompt: Does the following sentence express positive or negative opinion? 
Example: 
Text: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' 
Sentiment: Negative 
Now analyze the following text: 
Text: {}
Sentiment:


100%|██████████| 67349/67349 [53:06<00:00, 21.14it/s]


Evaluating using prompt: Classify the sentiment of the following sentence as either positive or negative. 
Example: 
Text: 'if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever.' 
Sentiment: Negative 
Now analyze the following text: 
Text: {}
Sentiment:


100%|██████████| 67349/67349 [53:24<00:00, 21.02it/s]


In [9]:
def confidenceMethod(sentence, prompt_template, curr_sentence, maps):
    prompt_temp = prompt_template[maps[curr_sentence]['prompt']]
    prompt_text = prompt_temp.format(sentence)
    
    # Encode the prompt and truncate to fit model's max length
    inputs = tokenizer(prompt_text, return_tensors="pt", padding='longest', truncation=True).to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    
    # Extract the logits for the last token and apply softmax for binary classification
    last_token_logits = logits[:, -1, [class_idx[0], class_idx[1]]]
    probs = torch.softmax(last_token_logits, dim=-1)
    
    # Get predicted class (0 = negative, 1 = positive)
    predicted_class = torch.argmax(probs, dim=-1).item()
    

    return predicted_class

In [10]:
all_preds = []
all_labels = train_data["label"]  # Ground truth labels
curr_sentence = 0
data = {
    "statement": [],
    "prompt": []
}

for sentence in tqdm(train_data["sentence"]):
    data["statement"].append(sentence)
    data["prompt"].append(maps[curr_sentence]['prompt'])
    pred = confidenceMethod(sentence, prompts, curr_sentence, maps)
    curr_sentence += 1
    all_preds.append(pred)
print("Evaluation Metrics for Confidence method:")
print(classification_report(all_labels, all_preds, target_names=["negative", "positive"], digits=4))
print("\n" + "="*50 + "\n")

100%|██████████| 67349/67349 [54:09<00:00, 20.73it/s]

Evaluation Metrics for Confidence method:
              precision    recall  f1-score   support

    negative     0.9980    0.3702    0.5401     29780
    positive     0.6669    0.9994    0.8000     37569

    accuracy                         0.7212     67349
   macro avg     0.8325    0.6848    0.6700     67349
weighted avg     0.8133    0.7212    0.6851     67349








In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [12]:

df = pd.DataFrame(data)

# 1. Data Preprocessing
X = df["statement"]
y = df["prompt"].values

# Convert text into numerical features using TF-IDF
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# 2. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# 3. Train Logistic Regression Model
Lmodel = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
Lmodel.fit(X_train, y_train)

# 4. Evaluate Model
y_pred = Lmodel.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 5. Prediction for New Statements
new_statements = ["I think the actor could have done a better job, overall the stroy was good.", "The screenplay was done right and it has perfect climax."]
new_statements_vectorized = vectorizer.transform(new_statements)
predictions = Lmodel.predict(new_statements_vectorized)

for statement, pred in zip(new_statements, predictions):
    print(f"Statement: '{statement}' => Predicted Prompt: {pred}")

Accuracy: 0.6555308092056422

Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.77      0.69      5767
           1       0.30      0.00      0.01       697
           2       0.60      0.15      0.23      1094
           3       0.50      0.00      0.01       297
           4       0.69      0.75      0.72      5615

    accuracy                           0.66     13470
   macro avg       0.54      0.34      0.33     13470
weighted avg       0.63      0.66      0.62     13470

Statement: 'I think the actor could have done a better job, overall the stroy was good.' => Predicted Prompt: 4
Statement: 'The screenplay was done right and it has perfect climax.' => Predicted Prompt: 4


In [13]:
def FinalPrediction(sentence, Lmodel):
    new_statements_vectorized = vectorizer.transform(sentence)
    predictions = Lmodel.predict(new_statements_vectorized)
    prompt_template = prompts[predictions[0]]
    prompt_text = prompt_template.format(sentence[0])
    
    # Encode the prompt and truncate to fit model's max length
    inputs = tokenizer(prompt_text, return_tensors="pt", padding='longest', truncation=True).to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    
    # Extract the logits for the last token and apply softmax for binary classification
    last_token_logits = logits[:, -1, [class_idx[0], class_idx[1]]]
    probs = torch.softmax(last_token_logits, dim=-1)
    
    # Get predicted class (0 = negative, 1 = positive)
    predicted_class = torch.argmax(probs, dim=-1).item()
    return predicted_class
    

In [14]:
all_preds = []
all_labels = test_data["label"]  # Ground truth labels

for sentence in tqdm(test_data["content"]):
    pred = FinalPrediction([sentence], Lmodel) 
    all_preds.append(pred)

print("Evaluation Metrics for the current prompt:")
print(classification_report(all_labels, all_preds, target_names=["negative", "positive"], digits=4))
print("\n" + "="*50 + "\n")


100%|██████████| 1821/1821 [01:30<00:00, 20.04it/s]

Evaluation Metrics for the current prompt:
              precision    recall  f1-score   support

    negative     0.9938    0.5285    0.6901       912
    positive     0.6781    0.9967    0.8071       909

    accuracy                         0.7622      1821
   macro avg     0.8360    0.7626    0.7486      1821
weighted avg     0.8362    0.7622    0.7485      1821






