In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [None]:
! pip install newspaper3k
! pip install lxml[html_clean]

Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.1.3-py3-none-any.whl.metadata (11 kB)
Collecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tinysegmenter==0.3 (from newspaper3k)
  Downloading tinysegmenter-0.3.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Co

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/Ind_Proj/large_language_models/contextual_custom_google_scrapper.py .

In [None]:
import logging
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from sklearn.metrics import accuracy_score, f1_score
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from contextual_custom_google_scrapper import enrich_dataset_with_contextual_text, scrape_google_context

# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

In [None]:
import kagglehub
path = kagglehub.dataset_download("deepakjoshi2k/yahoo-stock-prediction-by-news")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/deepakjoshi2k/yahoo-stock-prediction-by-news?dataset_version_number=1...


100%|██████████| 39.2M/39.2M [00:00<00:00, 109MB/s] 

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/deepakjoshi2k/yahoo-stock-prediction-by-news/versions/1


In [None]:
! ls /root/.cache/kagglehub/datasets/deepakjoshi2k/yahoo-stock-prediction-by-news/versions/1/

News_Yahoo_stock.csv  NEWS_YAHOO_stock_prediction.csv


In [None]:
# Load the CSV dataset
dataset = load_dataset("csv", data_files="/root/.cache/kagglehub/datasets/deepakjoshi2k/yahoo-stock-prediction-by-news/versions/1/NEWS_YAHOO_stock_prediction.csv")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'ticker', 'Date', 'category', 'title', 'content', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'label'],
        num_rows: 15975
    })
})


In [None]:
from datetime import datetime

# Define a mapping function to extract and rename the desired fields
def extract_columns(example):
    # Parse the 'Date' field and format it to include a time and 'Z' suffix
    formatted_published_time = (
        datetime.strptime(example["Date"], "%Y-%m-%d").strftime("%Y-%m-%dT%H:%M:%SZ")
        if "Date" in example and example["Date"]
        else None  # Handle cases where 'Date' is missing or null
    )

    return {
        "text": example["title"],  # Rename 'title' to 'text'
        "actualContent": example["content"],
        "publishedTime": formatted_published_time,
        "stockIndicator": example["label"]
    }

In [None]:
# Apply the mapping function to the dataset
processed_dataset = dataset.map(extract_columns)

Map:   0%|          | 0/15975 [00:00<?, ? examples/s]

In [None]:
# Remove any unwanted columns (keep only 'text' and 'actualContent', 'publishedTime' and 'stockIndicator')
processed_dataset = processed_dataset.remove_columns(
    [col for col in processed_dataset.column_names["train"] if col not in {"text", "actualContent", "publishedTime", "stockIndicator"}]
)

In [None]:
# Apply the transformation to the dataset
yahoo_finance_dataset = processed_dataset["train"]

# Verify the changes by printing some rows
print(yahoo_finance_dataset.select(range(5)))

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator'],
    num_rows: 5
})


In [None]:
# Convert the dataset to a Pandas DataFrame
df = yahoo_finance_dataset.to_pandas()

df.count()

Unnamed: 0,0
text,15975
actualContent,15975
publishedTime,15975
stockIndicator,15975


In [None]:
# Convert the dataset to a Pandas DataFrame
df = yahoo_finance_dataset.to_pandas()

# Print the first 5 rows of the DataFrame
print(df.head())

                                                text  \
0  Apple Set To Beat Q1 Earnings Estimates  Tech ...   
1  Tech Daily  Intel Results  Netflix Surge  Appl...   
2  7 Monster Stock Market Predictions For The Wee...   
3  Apple Earnings Preview  5G Launch  Expanding S...   
4  Buy Surging Apple   Microsoft Stock Before Qua...   

                                       actualContent         publishedTime  \
0  Technology giant Apple   NASDAQ AAPL   is set ...  2020-01-27T00:00:00Z   
1  The top stories in this digest are Intel s   N...  2020-01-27T00:00:00Z   
2  S P 500  SPY \nThis week will be packed with e...  2020-01-27T00:00:00Z   
3    Reports Q1 2020 results on Tuesday  Jan  28 ...  2020-01-27T00:00:00Z   
4  On today s episode of Full Court Finance here ...  2020-01-27T00:00:00Z   

   stockIndicator  
0               0  
1               0  
2               0  
3               0  
4               0  


In [None]:
# Shuffle the dataset and select the first 1000 rows
sliced_dataset = yahoo_finance_dataset.shuffle(seed=42).select(range(1000))

In [None]:
# Check the new dataset
print(sliced_dataset)

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator'],
    num_rows: 1000
})


In [None]:
enriched_sliced_dataset = enrich_dataset_with_contextual_text(sliced_dataset)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

0.25
Not relevant
0.3333333333333333
Not relevant
0.25
Not relevant
0.3333333333333333
Not relevant
0.25
Not relevant
0.3333333333333333
Not relevant
1.0
1.0
1.0
1.0
0.0
Not relevant
0.0
Not relevant
0.0
Not relevant
1.2
0.7
Not relevant
0.7142857142857143
Not relevant
0.9
Date parsing error: no such group
0.7777777777777778
Not relevant
Date parsing error: no such group
0.7777777777777778
Not relevant
Date parsing error: no such group
0.7777777777777778
Not relevant
Date parsing error: no such group
0.3333333333333333
Not relevant
0.5
Not relevant
Date parsing error: no such group
0.3333333333333333
Not relevant
0.5
Not relevant
Date parsing error: no such group
0.3333333333333333
Not relevant
0.5
Not relevant
1.0
0.2222222222222222
Not relevant
0.2222222222222222
Not relevant
0.2222222222222222
Not relevant
1.0
0.9
1.0
Date parsing error: no such group
1.0
Date parsing error: no such group
Date parsing error: no such group
Date parsing error: no such group
0.5
Not relevant
0.5
Not re

In [None]:
filtered_dataset = enriched_sliced_dataset.filter(lambda x: x['context_fetched'] == True)

# Check the filtered dataset
print(filtered_dataset)

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator', 'contextual_text', 'context_fetched'],
    num_rows: 270
})


In [None]:
# Convert the dataset to a Pandas DataFrame
df = filtered_dataset.to_pandas()

# Print the first 5 rows of the DataFrame
print(df.head())
print(df.count())

                                                text  \
0  Freight   Tariffs Hurt Tyson Foods  Savings Pl...   
1  5 Stocks To Fall Back On Amid Lurking Governme...   
2  PVH Corp Grapples With Multiple Headwinds  Is ...   
3  LG Display to replace its chief executive amid...   
4                Who Are Uber s Biggest Competitors    

                                       actualContent         publishedTime  \
0  Plagued by headwinds such as commodity cost vo...  2019-01-15T00:00:00Z   
1  The unprecedented reversal in the stock market...  2018-01-16T00:00:00Z   
2  PVH Corp   NYSE PVH   looks troubled  thanks t...  2019-09-10T00:00:00Z   
3  SEOUL  Reuters    South Korea s  LG Display  C...  2019-09-16T00:00:00Z   
4  Uber Technologies Inc   the massive  omniprese...  2016-08-29T00:00:00Z   

   stockIndicator                                    contextual_text  \
0               1  Freight   Tariffs Hurt Tyson Foods  Savings Pl...   
1               0  5 Stocks To Fall Back On Amid L

In [None]:
!ls /content/drive/MyDrive/Ind_Proj/large_language_models

context_enriched_yahoo_stock_news.csv  contextual_scraper_yahoo.py
contextual_custom_google_scrapper.py   LLama_2_7b_chat_multidataset_finetuned
contextual_scraper.py		       Sentences_AllAgree.txt


In [None]:
!cp -r /content/drive/MyDrive/Ind_Proj/large_language_models/LLama_2_7b_chat_multidataset_finetuned /content/Llama-2-7b-chat-finetuned

In [None]:
!ls /content/Llama-2-7b-chat-finetuned

adapter_config.json  adapter_model.bin	README.md


In [None]:
model_name = "NousResearch/Llama-2-7b-chat-hf"

In [None]:
new_model = "Llama-2-7b-chat-finetuned"

In [None]:
# Load the entire model on the GPU 0
device_map = {"": 0}

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  adapters_weights = torch.load(


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [None]:
# Define the text-generation pipeline
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [None]:
# Function to extract the part after [/INST]
def extract_after_inst(generated_text):
    parts = generated_text.split('[/INST]')
    return parts[1].strip() if len(parts) > 1 else generated_text

In [None]:
# Function to generate sentiment
def generate_sentiment(text):
    # Construct the prompt
    prompt = f"Determine the sentiment of the financial news as negative, neutral or positive: {text}"

    # Run the text generation
    result = pipe(f"<s>[INST] {prompt} [/INST]")

    # Extract the generated text
    generated_text = result[0]['generated_text']

    # Extract the text after [/INST]
    return extract_after_inst(generated_text)

In [None]:
# Function to check sentiment in the model's output
def check_sentiment(output):
    output = output.lower()
    if "negative" in output:
        return "negative"
    elif "neutral" in output:
        return "neutral"
    elif "positive" in output:
        return "positive"
    else:
        return "neutral"  # Default to "neutral" if no keyword found

In [None]:
def generate_actual_label_batch(batch):
    texts = batch['actualContent']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_true': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result = filtered_dataset.map(generate_actual_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_true = result['y_true']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]

Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
def generate_title_label_batch(batch):
    texts = batch['text']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_title': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result_title = filtered_dataset.map(generate_title_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_title = result_title['y_title']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]



Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
# Evaluate the model using accuracy and F1 score
accuracy = accuracy_score(y_true, y_title)
f1 = f1_score(y_true, y_title, average='weighted')

# Print the evaluation metrics with instruction tuned LLM
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.7444
F1 Score: 0.7378


In [None]:
def generate_context_label_batch(batch):
    texts = batch['contextual_text']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_context': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result_context = filtered_dataset.map(generate_context_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_context = result_context['y_context']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]



Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
# Evaluate the model using accuracy and F1 score
accuracy = accuracy_score(y_true, y_context)
f1 = f1_score(y_true, y_context, average='weighted')

# Print the evaluation metrics with enhanced Recency aware RAG framework
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8519
F1 Score: 0.8534


In [None]:
print(filtered_dataset)

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator', 'contextual_text', 'context_fetched'],
    num_rows: 270
})


In [None]:
# Save the dataset to a CSV file
filtered_dataset.to_csv("context_enriched_yahoo_stock_news.csv")

print(f"Dataset saved to context_enriched_yahoo_stock_news.csv")

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Dataset saved to context_enriched_yahoo_stock_news.csv


In [None]:
!cp /content/drive/MyDrive/Ind_Proj/large_language_models/context_enriched_yahoo_stock_news.csv .

In [None]:
csv_dataset = load_dataset("csv", data_files="context_enriched_yahoo_stock_news.csv")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
csv_dataset = csv_dataset['train']
print(csv_dataset)

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator', 'contextual_text', 'context_fetched'],
    num_rows: 270
})


In [None]:
enriched_csv_dataset = scrape_google_context(csv_dataset)

Map:   0%|          | 0/270 [00:00<?, ? examples/s]

1.0
1.0
1.0
1.2
0.9
1.0
1.0
0.9
0.7777777777777778
Not relevant
0.6666666666666666
Not relevant
0.75
Not relevant
0.5555555555555556
Not relevant
0.4444444444444444
Not relevant
1.0
1.0
1.0
1.25
1.0
1.0
1.0909090909090908
1.0909090909090908
1.0909090909090908
1.0
1.0
1.0
0.875
1.0
1.0
1.0
0.3333333333333333
Not relevant
0.5
Not relevant
0.2727272727272727
Not relevant
0.0
Not relevant
0.8888888888888888
1.0
1.125
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.125
1.0
0.5
Not relevant
0.8333333333333334
1.0
1.0
0.7
Not relevant
1.0
1.1111111111111112
1.0
1.0
1.1111111111111112
1.0
1.0
1.0
0.2857142857142857
Not relevant
Exception in scrape_yahoo_finance_article_page: Article `download()` failed with 503 Server Error: Service Unavailable for url: https://finance.yahoo.com/quote/%5EGSPC/history/?period1=1428390000&period2=1491548400&interval=1d&filter=history&frequency=1d on URL https://finance.yahoo.com/quote/%5EGSPC/history?period1=1428390000&period2=1491548400&interval=1d&filter=history&frequen

In [None]:
print(enriched_csv_dataset)

Dataset({
    features: ['text', 'actualContent', 'publishedTime', 'stockIndicator', 'contextual_text', 'context_fetched', 'google_context', 'google_context_fetched'],
    num_rows: 270
})


In [None]:
enriched_csv_dataset.to_pandas()['google_context_fetched'].sum()

260

In [None]:
def generate_actual_label_batch(batch):
    texts = batch['actualContent']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_true': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result = csv_dataset.map(generate_actual_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_true = result['y_true']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]



Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
def generate_google_context_label_batch(batch):
    texts = batch['google_context']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_context': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result_google_context = enriched_csv_dataset.map(generate_google_context_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_google_context = result_google_context['y_context']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]

Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
# Evaluate the model using accuracy and F1 score
accuracy = accuracy_score(y_true, y_google_context)
f1 = f1_score(y_true, y_google_context, average='weighted')

# Print the evaluation metrics with RAG framework (without recency aware) with instruction tuned LLM
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8333
F1 Score: 0.8346


In [None]:
model_name = "NousResearch/Llama-2-7b-chat-hf"

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [None]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

Your GPU supports bfloat16: accelerate training with bf16=True




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Define the text-generation pipeline
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [None]:
# Function to generate sentiment
def generate_sentiment(text):
    # Construct the prompt
    prompt = f"Determine the sentiment of the financial news as negative, neutral or positive: {text}"

    # Run the text generation
    result = pipe(f"<s>[INST] {prompt} [/INST]")

    # Extract the generated text
    generated_text = result[0]['generated_text']

    # Extract the text after [/INST]
    return extract_after_inst(generated_text)

In [None]:
# Function to extract the part after [/INST]
def extract_after_inst(generated_text):
    parts = generated_text.split('[/INST]')
    return parts[1].strip() if len(parts) > 1 else generated_text

In [None]:
# Function to check sentiment in the model's output
def check_sentiment(output):
    output = output.lower()
    if "negative" in output:
        return "negative"
    elif "neutral" in output:
        return "neutral"
    elif "positive" in output:
        return "positive"
    else:
        return "neutral"  # Default to "neutral" if no keyword found

In [None]:
def generate_llama_label_batch(batch):
    texts = batch['text']

    # Generate predictions for the batch
    generated_texts = pipe([f"<s>[INST] Determine the sentiment of the financial news as negative, neutral or positive: {text} [/INST]" for text in texts])

    # Flattening the nested lists of generated texts
    if isinstance(generated_texts[0], list):
        generated_texts = [item for sublist in generated_texts for item in sublist]

    # Extract and classify sentiment from generated texts
    predicted_sentiments = [check_sentiment(extract_after_inst(result['generated_text'])) for result in generated_texts]

    print("Batch completed.")

    return {'y_llama_context': predicted_sentiments}

# Apply the transformation using map with batch_size
batch_size = 64
result_llama_context = csv_dataset.map(generate_llama_label_batch, batched=True, batch_size=batch_size)

# Extract true and predicted labels for evaluation
y_llama_context = result_llama_context['y_llama_context']

Map:   0%|          | 0/270 [00:00<?, ? examples/s]

Batch completed.
Batch completed.
Batch completed.
Batch completed.
Batch completed.


In [None]:
# Evaluate the model using accuracy and F1 score
accuracy = accuracy_score(y_true, y_llama_context)
f1 = f1_score(y_true, y_llama_context, average='weighted')

# Print the evaluation metrics with Base LLM
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.3519
F1 Score: 0.3659
