In [2]:
import requests
import json
import os
from dotenv import load_dotenv

In [3]:
# Load environment variables from .env file
# logger.info("Loading environment variables from .env file")
load_dotenv()

# Get API keys from environment variables
together_apikey = os.environ.get('TOGETHER_API_KEY')
firework_apikey = os.environ.get('FIREWORK_API_KEY')
hf_api_key = os.environ.get('HF_API_KEY')

In [5]:
def togetherai(question,model,api_key=together_apikey):
    # url = "https://api.fireworks.ai/inference/v1/chat/completions" #for FW
    url = "https://api.together.xyz/v1/chat/completions"  #-- for Together AI
    formatted_prompt = f"Regenerate provided text: TEXT = {question}"
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": formatted_prompt}],
        "max_tokens": 1024,
        "top_p": 1,
        "top_k": 40,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "temperature": 0.5,
    }

    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    response = requests.post(url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        # Extract the output message
        output_message = response.json()['choices'][0]['message']['content']
        return output_message
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

In [6]:
def fireworks(question,api_key=firework_apikey,model="accounts/yi-01-ai/models/yi-large"):
    url = "https://api.fireworks.ai/inference/v1/chat/completions" #for FW
    # url = "https://api.together.xyz/v1/chat/completions"  #-- for Together AI
    formatted_prompt = f"Regenerate the text: TEXT={question}\n"
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": formatted_prompt}],
        "max_tokens": 1024,
        "top_p": 1,
        "top_k": 40,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "temperature": 0.5,
    }

    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    response = requests.post(url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        # Extract the output message
        output_message = response.json()['choices'][0]['message']['content']
        return output_message
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

In [7]:
together_ai_models=[
    "Qwen/Qwen2-72B-Instruct",
    # "google/gemma-2-9b-it",
    "mistralai/Mistral-7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
    ]

In [8]:
class Score:
    def __init__(self,edit_score,new_text,model):
        self.edit_score=edit_score
        self.new_text=new_text
        self.model=model

In [9]:
!pip install Levenshtein


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [10]:
from nltk.tokenize import word_tokenize
import Levenshtein
import nltk
nltk.download('punkt_tab')
def get_edit_distance(text1, text2):
    tokens1 = word_tokenize(text1)
    tokens2 = word_tokenize(text2)
    joined1 = " ".join(tokens1)
    joined2 = " ".join(tokens2)
    distance = Levenshtein.distance(joined1, joined2)
    return distance

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/kapilwanaskar/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [11]:
def detect_text(text):
    edit_distance_score=[]
    for model in together_ai_models:
        new_text=togetherai(text,model)
        edit_score=get_edit_distance(text,new_text)
        edit_distance_score.append(Score(edit_score,new_text,model))
    new_text=fireworks(text)
    edit_score=get_edit_distance(text,new_text)
    edit_distance_score.append(Score(edit_score,new_text,"Yi-Large"))
    return edit_distance_score

In [12]:
text="""
Global Economy Global Economy Supported by World Bank Sees Rosier Growth Outlook But rising trade barriers pose a long-term threat to global output as protectionist policies spread, the bank said. ByAlan Rappeport Reporting from Washington The World Bank on Tuesday raised its outlook for the world economy this year but warned that the rise of new trade barriers and protectionist policies posed a long-term threat to global growth. In its latest Global Economic Prospects report, the World Bank projected global growth to hold steady at 2.6 percent this year, an upgrade from itsJanuary forecastof 2.4 percent, and predicted that output would edge higher to 2.7 percent in 2025. The forecasts showed the global economy stabilizing after being rocked in recent years by the pandemic and the wars in Ukraine and the Middle East. “Four years after the upheavals caused by the pandemic, conflicts, inflation and monetary tightening, it appears that global economic growth is steadying,” Indermit Gill, the World Bank’s chief economist, said in a statement accompanying the report. However, sluggish growth continues to haunt the world’s poorest economies, which are still grappling with inflation and the burdens of high debt. The bank noted that over the next three years, countries that account for more than 80 percent of the world’s population would experience slower growth than in the decade before the pandemic. The slightly brighter forecast was led by the resilience of the U.S. economy, which continues to defy expectations despite higher interest rates. Overall, advanced economies are growing at an annual rate of 1.5 percent, with output remaining sluggish in Europe and Japan. By contrast, emerging market and developing economies are growing at a rate of 4 percent, led by China and Indonesia. Although growth is expected to be a bit stronger than previously forecast, the World Bank said prices were easing more slowly than it projected six months ago. It foresees global inflation moderating to 3.5 percent in 2024 and 2.9 percent next year. That gradual decline is likely to lead central banks to delay interest rate cuts, dimming prospects for growth in developing economies."""
     

In [14]:
data=detect_text(text)

In [15]:
print("Text:", text)

Text: 
Global Economy Global Economy Supported by World Bank Sees Rosier Growth Outlook But rising trade barriers pose a long-term threat to global output as protectionist policies spread, the bank said. ByAlan Rappeport Reporting from Washington The World Bank on Tuesday raised its outlook for the world economy this year but warned that the rise of new trade barriers and protectionist policies posed a long-term threat to global growth. In its latest Global Economic Prospects report, the World Bank projected global growth to hold steady at 2.6 percent this year, an upgrade from itsJanuary forecastof 2.4 percent, and predicted that output would edge higher to 2.7 percent in 2025. The forecasts showed the global economy stabilizing after being rocked in recent years by the pandemic and the wars in Ukraine and the Middle East. “Four years after the upheavals caused by the pandemic, conflicts, inflation and monetary tightening, it appears that global economic growth is steadying,” Indermit

In [19]:
for item in data:
    print(f"{item.model} -- {item.edit_score} --{item.new_text[:10]}...")  # Print only the first 100 characters of new_text

Qwen/Qwen2-72B-Instruct -- 1386 --TEXT = 
Wo...
mistralai/Mistral-7B-Instruct-v0.1 -- 1334 -- The globa...
meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo -- 33 --Global Eco...
Yi-Large -- 1221 --Global Eco...


In [29]:
import pandas as pd
from collections import defaultdict

def save_to_csv(data, original_text, filename='output.csv'):
    # Group data by model
    models_data = defaultdict(list)
    for item in data:
        models_data[item.model].append(item)

    
    # Create rows with model data in separate columns
    rows = []
    max_entries = max([len(entries) for entries in models_data.values()])
    
    for i in range(max_entries):
        row = {'Sr. No': i + 1, 'Original Text': original_text}
        
        # Track the best model (lowest edit score) for this row
        best_model = None
        lowest_edit_score = float('inf')
        
        # Add each model's data in separate columns
        for model_name, items in models_data.items():
            if i < len(items):
                edit_score = items[i].edit_score
                row[f'{model_name}_Edit_Score'] = items[i].edit_score
                row[f'{model_name}_New_Text'] = items[i].new_text
                
                # Check if this model has the lowest edit score so far
                if edit_score < lowest_edit_score:
                    lowest_edit_score = edit_score
                    best_model = model_name
            
            else:
                # Fill with empty values if this model has fewer entries
                row[f'{model_name}_Edit_Score'] = None
                row[f'{model_name}_Regenerated_Text'] = None
        
        # Add the best model column
        row['best_LLM_model'] = best_model
        rows.append(row)
    
    df = pd.DataFrame(rows)
    # move 'best_LLM_model' columns at 3rd position >> columns >> sr. no, original_text,best_LLM_model, <model1_name>, <model1_edit_score>, <model1_new_text>, <model2_name>, <model2_edit_score>, <model2_new_text>...
    cols = df.columns.tolist()
    cols.insert(2, cols.pop(cols.index('best_LLM_model')))
    df = df[cols]
    # Save DataFrame to CSV
    if os.path.exists(filename):
        os.remove(filename)  # Remove the file if it already exists
        
    # in each name of column, replace '/',"_",'-' with blank space ' '
    df.columns = [col.replace('/', ' ').replace('_', ' ').replace('-', ' ') for col in df.columns]
    # Save DataFrame to CS
    df.to_csv(filename, index=False)

save_to_csv(data, text, 'baseline_benchmark_3_results.csv')