In [None]:
from langchain.llms import OpenAIChat
from langchain import LLMChain
from langchain.text_splitter import CharacterTextSplitter,TokenTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts.few_shot import FewShotPromptTemplate


import os
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
pd.set_option('display.max_colwidth', None)
from tqdm.notebook import tqdm
tqdm.pandas()
from ast import literal_eval
import random
import re
from langchain.chat_models import ChatOpenAI


In [None]:
# !source ../../../elbaff_iesta_venv/bin/activate
# %pip install langchain
# %pip install python-dotenv
#%pip install openai

In [None]:
"""

liberal_chat_prompt = ChatPromptTemplate.from_messages(create_prompt_template(prompt_dict["all"].format(ideology ="liberal")))
llm_chain = LLMChain(llm=chat, prompt=liberal_chat_prompt)
result = llm_chain.run(ineffective_argument="If there was no Kryptonite, can Superman defeat the Silver Surfer?")
print(result)
cons_chat_prompt = ChatPromptTemplate.from_messages(create_prompt_template(prompt_dict["all"].format(ideology ="conservative")))
llm_chain = LLMChain(llm=chat, prompt=cons_chat_prompt)
result = llm_chain.run(ineffective_argument="If there was no Kryptonite, can Superman defeat the Silver Surfer?")
print(result)
"""

In [None]:
# Create a new OpenAI instance
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:

basic_str = "Transform the following argument to an effective argument by maintaining the original length"
ideology_str = "for readers with a {ideology} political ideology"
content_str = "by preserving the content of the argument"
style_str = "by only changing the style of the text"

prompt_dict = {
    "basic": f"{basic_str}:",
    "ideology": f"{basic_str} {ideology_str}:",
    "content":  f"{basic_str} {content_str}:",
    "style":f"{basic_str} {style_str}:",
    "ideology-content": f"{basic_str} {ideology_str} {content_str}:",
    "ideology-style": f"{basic_str} {ideology_str} {style_str}:",
    "all": f"{basic_str} {ideology_str} {content_str} and {style_str}:",
}



In [None]:

def create_prompt_template(prompt):
    system_message_prompt = SystemMessagePromptTemplate.from_template(prompt)
    human_template="{ineffective_argument}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    return [system_message_prompt, human_message_prompt]

In [None]:

from datasets import load_dataset, Dataset

from iesta.machine_learning.huggingface_loader import IESTAHuggingFace

def get_data(ideology, effect='ineffective', limit=500):
    name:str = f'notaphoenix/debateorg_w_effect_for_{ideology}'
    dataset: Dataset = load_dataset(name, split="test")
    dataset = dataset.filter(lambda x: x["label"] == IESTAHuggingFace._LABEL2ID_[effect]).shuffle(seed=2062021)
    
    if len(dataset) > limit:
        dataset = dataset.select(range(limit))
    print(f"Return dataset {name} with {len(dataset)} ")
    dataset = dataset.map(lambda example, idx: {"id": idx, **example}, with_indices=True)

    return dataset

    



In [None]:

def get_generations(ineffective_argument:str, ideology:str):
    
    result_dict = {}
    for k, prompt_template in prompt_dict.items():
        
        chat_prompt = ChatPromptTemplate.from_messages(create_prompt_template(prompt_template.format(ideology=ideology)))
        llm_chain = LLMChain(llm=chat, prompt=chat_prompt)
        result_dict[k] = llm_chain.run(ineffective_argument=ineffective_argument)
        #print(f"'{result}'\n\n")
    return result_dict

    


In [21]:
import json
import pandas as pd
from tqdm import tqdm
from os.path import exists

def generate_args(ideology:str, out_file :str = "../data/llms_out/") -> pd.DataFrame:
    out_file = f"{out_file}{ideology}_gpt3.5turbo.jsonl"
    
    existing_indices = []
    
    if exists(out_file):
        _df = pd.read_json(path_or_buf=out_file, lines=True)
        existing_indices = _df['id'].values.tolist()
    
    filtered_dataset = get_data(ideology, effect="ineffective", limit=500)
    add_new_l = False
    if len(existing_indices) > 0 :
        print(f"filtering out existing indices ({len(existing_indices)})")
        filtered_dataset = filtered_dataset.filter(lambda example: example['id'] not in existing_indices)
        print(f"{filtered_dataset.num_rows} to go...")
        add_new_l = True
    
    with open(out_file, 'a') as file:

        for datapoint in tqdm(filtered_dataset):
            try:    
                
                promt_generated_dict = get_generations(datapoint["text"], ideology)
                promt_generated_dict.update(datapoint)

                nline = "\n" if add_new_l else ""

                file.write(f"{nline}{json.dumps(promt_generated_dict)}")
                add_new_l = True
            except Exception as e:

                print(e)
                print(f"Failed to get a response for ID: {datapoint['id']}")   

    

        

In [22]:
generate_args(ideology="liberal")

Using custom data configuration notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a
Found cached dataset parquet (/home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-046e1b0d5f29e6aa.arrow
Loading cached shuffled indices for dataset at /home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-8558916e1ff496ae.arrow
Loading cached processed dataset at /home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_liberal-1ef

Return dataset notaphoenix/debateorg_w_effect_for_liberal with 500 
filtering out existing indices (500)


  0%|          | 0/1 [00:00<?, ?ba/s]

0 to go...


0it [00:00, ?it/s]


In [23]:
generate_args(ideology="conservative")

Using custom data configuration notaphoenix--debateorg_w_effect_for_conservative-8855d3de38b65ed6
Found cached dataset parquet (/home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_conservative-8855d3de38b65ed6/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/6 [00:00<?, ?ba/s]

Return dataset notaphoenix/debateorg_w_effect_for_conservative with 500 


  0%|          | 0/500 [00:00<?, ?ex/s]

  1%|          | 3/500 [02:56<7:17:47, 52.85s/it] 

In [19]:
load_dataset("notaphoenix/debateorg_w_effect_for_liberal", split="test")[0]

Using custom data configuration notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a
Found cached dataset parquet (/home/elba_ro/.cache/huggingface/datasets/notaphoenix___parquet/notaphoenix--debateorg_w_effect_for_liberal-1efe322430b6ce3a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


{'text': "Con Now I know full good and well that most of you reading this debate actually believe that .999... is equal to 1. I've seen a lot of Debates on the issue and frankly Con has generally done a horrible job supporting the position,not to say Pro is correct. There is no question in my mind that 1 and .999... recurring forever is not equal but entirely two different numbers and I will help you understand why. .999... = 1 is a false statement. Definitions .999... refers to .9 with recurring nines 1 refers to the real number, 1 = means is exactly equal to .999... is actually equal to .999... not one. Most people that contend 1 is = to .999... usually provide the below mathematical proof. Step 1) Let x = .999... Step 2) 10x = 9.999... (multiplying RHS and LHS by 10) Step 3) 10x - x = 9.999... - x (subtracting x from both sides) Step 4) 9x = 9 Step 5) x = 1 Conclusion .999... = 1 HOWEVER if you will notice step 4 is incorrect. It refers to 9x=9. This is wrong it actually equals 9x=8

In [None]:
for k, v in result_dict.items():
    print(f"\n{k} - {prompt_dict[k]}")
    diff = Redlines(ineffective_argument,v)
    display(Markdown(diff.output_markdown))

In [None]:
import itertools
for ptype1, ptype2 in itertools.combinations_with_replacement(prompt_dict.keys(), 2):
    if ptype1 == ptype2:
        continue
    print(f"\n{ptype1} VS. {ptype2}")
    print(len(f"{ptype1} VS. {ptype2}")*"-")

    diff = Redlines(result_dict[ptype1],result_dict[ptype2])
    display(Markdown(diff.output_markdown))

In [None]:
for k, v in result_dict.items():
    for k,v  in result_dict.items():
    print(f"\n{k} - {prompt_dict[k]}")
    diff = Redlines(ineffective_argument,v)
    display(Markdown(diff.output_markdown))