In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

In [2]:
import random
import pandas as pd
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# Define the prompt template
prompt_template = """<s>[INST] <<SYS>>
You are an insightful financial analyst with a knack for interpreting news. Respond concisely in one sentence starting with a dynamic phrase like:
{opening_line}
<</SYS>>
{user_input} (Sentiment - {sentiment}) [/INST]

###Response:==>
"""

In [4]:
# List of 15 opening phrases
opening_phrases = [
    "The sentiment around this news is {sentiment} because...",
    "This headline conveys a {sentiment} sentiment because...",
    "From a financial perspective, this reflects a {sentiment} sentiment because...",
    "The tone of this news is {sentiment} because...",
    "Market analysts interpret this as a {sentiment} sentiment because...",
    "This news indicates a {sentiment} sentiment because...",
    "The mood around this news is {sentiment} because...",
    "This update reflects a {sentiment} sentiment due to...",
    "The implications of this news are {sentiment} because...",
    "Financially, this signals a {sentiment} sentiment because...",
    "The perception of this news is {sentiment} because...",
    "This event highlights a {sentiment} sentiment because...",
    "From an analytical viewpoint, this is a {sentiment} sentiment because...",
    "The narrative around this news points to a {sentiment} sentiment because...",
    "The overall impression of this news is {sentiment} because..."
]


In [5]:
# Function to dynamically generate the prompt
def generate_dynamic_prompt(user_input, sentiment):
    # Select a random opening phrase
    opening_line = random.choice(opening_phrases).format(sentiment=sentiment)
    # Generate the formatted prompt
    return prompt_template.format(opening_line=opening_line, user_input=user_input, sentiment=sentiment)

# Example function to process the dataset
def generate_response(sentence, sentiment):
    # Generate a dynamic prompt
    prompt = generate_dynamic_prompt(sentence, sentiment)
    # Tokenize and generate response
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    # Extract the final response
    split_response = response.split("###Response:==>")
    if len(split_response) > 1:
        filtered_output = split_response[1].strip().split('.')[0].strip()
        return filtered_output
    return 'No response found'


In [None]:
# Load and preprocess the dataset
file_path = "sentiment_data.csv"  # Replace with your dataset path
data = pd.read_csv(file_path)
data.columns = data.columns.str.strip().str.lower()  # Normalize column names

In [None]:
# Validate dataset columns
if "sentence" not in data.columns or "sentiment" not in data.columns:
    print("Columns found in dataset:", data.columns)
    raise ValueError("The dataset must contain 'sentence' and 'sentiment' columns.")

In [None]:
# Process each row and generate responses
results = []
for _, row in data.iterrows():
    sentence = row["sentence"]
    sentiment = row["sentiment"]
    response = generate_response(sentence, sentiment)
    print(f"Sentence: {sentence}\nSentiment: {sentiment}\nResponse: {response}\n")
    results.append({"sentence": sentence, "sentiment": sentiment, "response": response})


In [None]:
# Save results to a CSV file
output_file = "responses.csv"
result_df = pd.DataFrame(results)
result_df.to_csv(output_file, index=False)
print(f"Responses saved to {output_file}")