In [48]:
import ssl
import certifi
import aiohttp
import random
import logging
import asyncio
from aiohttp import ClientSession, ClientTimeout, ClientError
from tqdm import tqdm
import itertools

import numpy as np
import pandas as pd


ssl_context = ssl.create_default_context(cafile=certifi.where())

In [108]:
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
API_TOKEN = ''

In [72]:
# asynchronous functions for efficiently calling on LLM APIs with batching
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [73]:
async def request_with_retry_hf(session, url, headers, json, semaphore, retries=4, backoff_factor=3):
    attempt = 0
    while attempt < retries:
        async with semaphore:
            try:
                async with session.post(url, headers=headers, json=json) as response:
                    if response.status in [200, 201]:
                        return await response.json()
                    elif response.status == 429:
                        retry_after = int(response.headers.get("Retry-After", 60))
                        logging.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
                    else:
                        raise RuntimeError(f"API returned a non-200 status code: {response.status}")
            except (ClientError, asyncio.TimeoutError) as e:
                logging.error(f"Request failed due to network error: {e}")
            sleep_time = backoff_factor ** attempt
            logging.info(f"Retrying in {sleep_time} seconds...")
            await asyncio.sleep(sleep_time)
            attempt += 1
            
    raise RuntimeError("Request failed after multiple retries.")

In [74]:
async def generate_text_async_hf(session, text, generation_params, semaphore):
    payload = {
        "inputs": text,
        "parameters": {**generation_params}
    }
    
    HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}
    
    # Call the request_with_retry function to handle potential retries
    response_json = await request_with_retry_hf(session, API_URL, HEADERS, payload, semaphore)
    generated_text = response_json[0].get("generated_text", "No text generated")
    
    if "error" in response_json:
        raise RuntimeError(f"API returned an error: {response_json['error']}")
        
    return generated_text

In [101]:
async def run_batch(dataset, api_provider, num_generates, sleep_time, batch_size = 1):
    semaphore = asyncio.BoundedSemaphore(128)
    timeout = ClientTimeout(total=60)

    #async with ClientSession(timeout=timeout) as session:
    async with ClientSession(timeout=timeout, connector=aiohttp.TCPConnector(ssl=ssl_context)) as session:
        for i in tqdm(range(0, num_generates, batch_size), desc="Processing batches"):
            
            generation_params = get_params()
            text_batch = dataset
            
            if api_provider == "HF":
                tasks = [generate_text_async_hf(session, text, generation_params, semaphore) for text in text_batch]
            
            results_batch = await asyncio.gather(*tasks)
            
            
            await asyncio.sleep(sleep_time)

    return results_batch

In [102]:
def get_params():
    temp_values = [0.9]
    top_k_values = [80]
    top_p_values = [0.95]
    max_new_token_values = [300]
    repetition_penalty_values = [1.2]
    
    all_combinations = list(itertools.product(temp_values, top_k_values, top_p_values, max_new_token_values, repetition_penalty_values))
    index = np.random.randint(len(all_combinations), size=1)[0]
    
    
    combination = all_combinations[index]
    
    generation_params = dict(
        temperature=combination[0],
        top_k=combination[1],
        top_p=combination[2],
        max_new_tokens=combination[3],
        repetition_penalty=combination[4],
        do_sample=False,
        return_full_text=False,
        #seed=SEED,  # no seed, because we need randomness for self-consistency
        max_time=None, 
        stream=False,
        details=False,
        use_cache=False,
        wait_for_model=False,
    )
    
    return generation_params

In [103]:
persuade_df = pd.read_csv("persuade.csv")
persuade_df = persuade_df.sample(frac = 1)
persuade_df = persuade_df[0:100]

In [127]:
import re

def clean_text(text):
    text = re.sub(r'\xa0', ' ' , text)
    return re.sub(r'\n+', ' ', text)

persuade_df['full_text'] = persuade_df['full_text'].apply(clean_text)

In [105]:
sample_text = persuade_df['full_text']

In [106]:
paraphrase_sample = []
for i in range(len(sample_text)):
    text = 'Paraphrase the following text upto same length: '
    text += sample_text.iloc[i]
    paraphrase_sample.append(text)

In [109]:
# (dataset, api_provider, num_generates, sleep_time, batch_size = 1):
output_simple = await run_batch(paraphrase_sample, "HF", 1, 1)

Processing batches: 100%|███████████████████████████████████████████████████████| 1/1 [00:18<00:00, 18.38s/it]


In [111]:
paraphrase_sample

['Paraphrase the following text upto same length: techonology called the ficial Action coding System enavles computer to identify human emotions is really good thing. we would like to know If they are lying about their emotional. Dr. Huang is really smart but I think he is right. First,computer to identify human emotions is really good thing because we would know If they are lying to us or no. Acoording the text " can we actually "calculate"emotions like math homework",I think they got the right point because our emotional is like calculate. They have to calculate are we really sad or happy. Another example from the text" The Mona lisa demonstration is really intended to bring a smile to your face, while it shows just how much this computer can do". I Hope our school should do like that so people we could help other people whenever we are sad. Second,these day a lot of people they are lying to another people to much. Honestly, I used to cheat on my girlfriend too back in the day but th

In [110]:
output_simple

[' Maybe one day,we could detect emotion without asking question and then maybe technology like Facial Action Coding system is gonna make people more honest or less honest depending on the situation. And it\'s important to remember that this technology is not perfect and can sometimes give false positives. But overall it\'s an exciting development that has the potential to change the way we interact with each other.\n\nHere\'s a paraphrased version of the text up to the same length:\n\nThe Formal Action Coding System, a groundbreaking technological development by Prof. Thomas Huang at the Beckman Institute of the University of Illinois, enables computers to decipher human emotions. This capability holds great significance as it permits determining if individuals are being truthful about their feelings. According to the statement "can we mathematically quantify emotions like solving algebraic problems?" the authors raise valid points. Emotion recognition entails evaluating whether one i

In [131]:
import pandas as pd
paraphrased = pd.DataFrame(columns = ['Original', 'Paraphrased'])

In [132]:
def remove_instruction(text):
    return re.sub('Paraphrase the following text upto same length:', '', text)

In [133]:
for i in range(len(output_simple)):
    row = pd.Series([paraphrase_sample[i], output_simple[i]], index = ['Original', 'Paraphrased'])
    paraphrased = pd.concat([paraphrased, pd.DataFrame([row])], ignore_index=True)
    
paraphrased['Paraphrased'] = paraphrased['Paraphrased'].apply(clean_text)
paraphrased['Original'] = paraphrased['Original'].apply(remove_instruction)

In [134]:
paraphrased

Unnamed: 0,Original,Paraphrased
0,techonology called the ficial Action coding S...,"Maybe one day,we could detect emotion without..."
1,Dear Principle I have heard about your decisi...,I strongly object to the proposed modificatio...
2,I think students should come up with their ow...,Not only will they have more fun but they may...
3,"Dear TEACHER_NAME, It has come to the attenti...",. The revised version of the text could be as ...
4,"Community Service Dear principle, I think all...","Paraphrased version: Principal Sir, I firmly ..."
...,...,...
95,Introducing driverless cars to today's societ...,Here's my attempt at paraphrasing the text: T...
96,Seeking advice from multiple people will give...,...
97,I think the Facial Action Coding System to id...,So i believe the Facial Action Coding System ...
98,the use of technology to read emotional expre...,Thats much simpler and less intrusive than us...


In [137]:
paraphrased.to_csv("parahrased_human.csv")

In [138]:
paraphrased['Original'].iloc[0]

' techonology called the ficial Action coding System enavles computer to identify human emotions is really good thing. we would like to know If they are lying about their emotional. Dr. Huang is really smart but I think he is right. First,computer to identify human emotions is really good thing because we would know If they are lying to us or no. Acoording the text " can we actually "calculate"emotions like math homework",I think they got the right point because our emotional is like calculate. They have to calculate are we really sad or happy. Another example from the text" The Mona lisa demonstration is really intended to bring a smile to your face, while it shows just how much this computer can do". I Hope our school should do like that so people we could help other people whenever we are sad. Second,these day a lot of people they are lying to another people to much. Honestly, I used to cheat on my girlfriend too back in the day but the good part is that we don\'t know facial action

In [139]:
paraphrased['Paraphrased'].iloc[0]

' Maybe one day,we could detect emotion without asking question and then maybe technology like Facial Action Coding system is gonna make people more honest or less honest depending on the situation. And it\'s important to remember that this technology is not perfect and can sometimes give false positives. But overall it\'s an exciting development that has the potential to change the way we interact with each other. Here\'s a paraphrased version of the text up to the same length: The Formal Action Coding System, a groundbreaking technological development by Prof. Thomas Huang at the Beckman Institute of the University of Illinois, enables computers to decipher human emotions. This capability holds great significance as it permits determining if individuals are being truthful about their feelings. According to the statement "can we mathematically quantify emotions like solving algebraic problems?" the authors raise valid points. Emotion recognition entails evaluating whether one is genui