In [1]:
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams

import os
from dotenv import load_dotenv

In [5]:
load_dotenv()
api_key = os.getenv("API_KEY", None)
project_id = os.getenv("PROJECT_ID", None)

creds = {
    "url"    : "https://us-south.ml.cloud.ibm.com",
    "apikey" : api_key
}

params = {
    GenParams.DECODING_METHOD: "sample",
    GenParams.MAX_NEW_TOKENS: 3000,
    GenParams.MIN_NEW_TOKENS: 1,
    GenParams.TEMPERATURE: 0.5,
    GenParams.TOP_K: 50,
    GenParams.TOP_P: 1
}

model = Model("meta-llama/llama-2-70b-chat", creds, params, project_id)

In [6]:
def generate_summary(info):
    summary = ""
    for response in model.generate_text(info):
        summary += response

    return summary

In [7]:
import pandas as pd

csv = pd.read_csv('evaluation_test.csv')
csv

Unnamed: 0,predicted_summary,reference_summary,human_label
0,"Based on the raw horse data, Ben has a mediu...",Comparing Ben and Adam: Ben : Medium past perf...,True
1,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Sugar Sugar: Ben : Medium pa...,True
2,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Lucky Archangel: Ben : Mediu...,True
3,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Street Conqueror: Ben : Medi...,True
4,"After analyzing the raw horse data, it appea...",Comparing Ben and Togepi: Ben : Medium past pe...,True
5,"Based on the raw horse data, Armour Eagle ho...",Comparing Ben and Armour Eagle: Ben : Medium p...,False
6,"Based on the raw horse data, Lucky Archangel...",Comparing Adam and Lucky Archangel: Adam : Wea...,False
7,"Based on the raw horse data, Togepi holds a ...",Comparing Adam and Togepi: Adam : Weak past pe...,False
8,"Based on the raw horse data, Sugar Sugar has...",Comparing Sugar Sugar and Lucky Archangel: Sug...,False
9,"Based on the raw horse data, Sugar Sugar has...",Comparing Sugar Sugar and Street Conqueror: Su...,False


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from rouge import Rouge
from nltk import ngrams

def cosine_similarity_score(str1, str2):
    # Create a TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    
    # Fit and transform the strings into TF-IDF feature vectors
    tfidf_matrix = vectorizer.fit_transform([str1, str2])
    
    # Calculate the cosine similarity between the vectors
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    
    return cosine_sim


def jaccard_similarity_score(str1, str2):
    # Create n-grams from the strings
    n = 1  # Change n to consider different n-gram sizes (e.g., 1 for unigrams, 2 for bigrams)
    grams1 = set(ngrams(str1.split(), n))
    grams2 = set(ngrams(str2.split(), n))
    
    # Calculate Jaccard Similarity
    jaccard_sim = len(grams1.intersection(grams2)) / len(grams1.union(grams2))
    
    return jaccard_sim


def calculate_rouge_scores(generated_summary, reference_summary):
    rouge = Rouge()
    scores = rouge.get_scores(generated_summary, reference_summary)
    rouge_1 = scores[0]['rouge-1']['f']
    rouge_2 = scores[0]['rouge-2']['f']
    rouge_l = scores[0]['rouge-l']['f']
    return rouge_1, rouge_2, rouge_l

# Load data from CSV
csv = pd.read_csv('evaluation_test.csv')

results = []

for index, row in csv.iterrows():
    predicted_summary = row['predicted_summary']
    reference_summary = row['reference_summary']
    cosine_sim = cosine_similarity_score(predicted_summary, reference_summary)
    jaccard_sim = jaccard_similarity_score(predicted_summary, reference_summary)
    rouge_1, rouge_2, rouge_l = calculate_rouge_scores(predicted_summary, reference_summary)
    results.append({'Predicted Summary': predicted_summary,
                    'Reference Summary': reference_summary,
                    'Cosine Similarity': cosine_sim,
                    'Jaccard Similarity': jaccard_sim,
                    'ROUGE-1': rouge_1,
                    'ROUGE-2': rouge_2,
                    'ROUGE-L': rouge_l})

# Create a DataFrame from the results
df = pd.DataFrame(results)

# Print the DataFrame
df

Unnamed: 0,Predicted Summary,Reference Summary,Cosine Similarity,Jaccard Similarity,ROUGE-1,ROUGE-2,ROUGE-L
0,"Based on the raw horse data, Ben has a mediu...",Comparing Ben and Adam: Ben : Medium past perf...,0.240706,0.046875,0.090909,0.0,0.090909
1,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Sugar Sugar: Ben : Medium pa...,0.365738,0.068493,0.12987,0.040816,0.12987
2,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Lucky Archangel: Ben : Mediu...,0.22827,0.08,0.175,0.020408,0.175
3,"Based on the raw horse data, Ben has a sligh...",Comparing Ben and Street Conqueror: Ben : Medi...,0.19356,0.060606,0.119403,0.022472,0.119403
4,"After analyzing the raw horse data, it appea...",Comparing Ben and Togepi: Ben : Medium past pe...,0.238122,0.069767,0.111111,0.0,0.111111
5,"Based on the raw horse data, Armour Eagle ho...",Comparing Ben and Armour Eagle: Ben : Medium p...,0.295458,0.051282,0.125,0.02,0.125
6,"Based on the raw horse data, Lucky Archangel...",Comparing Adam and Lucky Archangel: Adam : Wea...,0.194057,0.069444,0.131579,0.019608,0.131579
7,"Based on the raw horse data, Togepi holds a ...",Comparing Adam and Togepi: Adam : Weak past pe...,0.174085,0.042857,0.085714,0.0,0.085714
8,"Based on the raw horse data, Sugar Sugar has...",Comparing Sugar Sugar and Lucky Archangel: Sug...,0.3221,0.061538,0.121212,0.044444,0.121212
9,"Based on the raw horse data, Sugar Sugar has...",Comparing Sugar Sugar and Street Conqueror: Su...,0.338089,0.076923,0.149254,0.043956,0.119403
