In [30]:
import openai
from tqdm import tqdm
import os
import pandas as pd
import snowflake.connector
from llmengine import Completion
from pyjarowinkler import distance

api_key_file = '/Users/vishalkumar/Documents/apikey.txt'
if os.path.isfile(api_key_file):
    with open(api_key_file) as f:
        openai.api_key = f.readline()
else:
    print(f"Error: {api_key_file} not found.")

OPENAI_API_KEY = openai.api_key

#login to snowflake db
con = snowflake.connector.connect(user='vishal.kumar@scale.com',
                                 account='pxa65918',
                                 authenticator='externalbrowser',
                                 warehouse='COMPUTE_WH',
                                 database='SCALE_CRAWLER',
                                 role='GENERAL_RO')

cs = con.cursor()


def evaluator_gpt(prompt):
    import openai
    client = openai
    text_response = client.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        max_tokens=512
    )
    return text_response


def llama_fix(prompt):
    response = Completion.create(
        model="llama-3-70b-instruct",
        prompt=prompt,
        max_new_tokens=1024,
        temperature=0.1,
    )
    return response.output.text


Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [32]:
#Get data from snowflake
sql = f'''
with task_list as (
     select --ta.task,
     ta._id attempt
     ,row_number() over (partition by ta.task order by ta.attempted_at desc) as rn
     from scale_prod.public.taskattempts ta
     where ta.project = '66352fe26aa48684b7379b96'
     --and ta.task = '663e4b7dee7efc37538de585'
    )

,base as (
 select
    --> Attempt
    t._id AS task_id,
    ta._id attempt,
    tb.name batch,
    t.params:turn[0]:params:chat_models[0]:url::string as model,
    t.params :templateVariables :Subcategory :: string as Subcategory,
    t.params :templateVariables :Category :: string as Category,
    t.params :templateVariables :AB_ratio :: string as AB_ratio,
    t.params :templateVariables :taskID :: string as Prompt_task_id,
    t.params :templateVariables :PROMPTTYPE :: string as prompt_type,
    t.TASK_TEMPLATE_VERSION,
    v3.review_level
    
    
    ,r.value:type::string as step
    ,r.value:index as index
    
    ,CASE WHEN r.value:type = 'PromptInput' THEN r.value:output::string  
    WHEN r.value:type = 'MultiTurnContinue' THEN r.value:output::string ELSE NULL END AS prompt
    
    ,case when r.value:type = 'ModelResponseSelector' then r.value:output::string else null end as response
    ,case when r.value:type = 'ModelResponseEditor' then r.value:output::string else null end as edited_response
    
    ,r.value:context:selectedId::string as selected
    
    
FROM
    scale_prod.public.tasks t
    LEFT JOIN scale_prod.public.taskattempts ta ON ta.task = t._id 
    INNER JOIN task_list tl on ta._id = tl.attempt and rn = 1
    LEFT JOIN scale_prod.public.taskbatches tb on t.batch = tb._id
    LEFT JOIN scale_prod.public.PIPELINEV3HUMANNODES v3 on tl.attempt = v3.attempt_to_review and v3.status = 'pending'
   
    ,LATERAL FLATTEN (input => ta.response:responses, mode => 'array') r
   
WHERE
    t.project = '66352fe26aa48684b7379b96'
    AND v3.review_level in (0,1,8)
   -- AND v3.status = 'completed'
    AND t.status = 'pending'
    AND step not in ('Instruction')
    --AND t._id = '663e4b7fe78ea72f97aa7ca8'
    And v3.status != 'paused'
    
)

,prompts as (
        select Prompt_task_id, review_level, task_id, attempt,TASK_TEMPLATE_VERSION, batch, Category, Subcategory, AB_ratio,
        row_number() over (partition by task_id order by index asc) as turn,
        prompt,
        prompt_type
        from base
        where step in ('PromptInput', 'MultiTurnContinue')
        and prompt != ''
        order by task_id desc, turn asc
    )

,responses as (
        select task_id, response, selected,
        row_number() over (partition by task_id order by index asc) as rn
        from base
        where step in ('ModelResponseSelector')
        and response != ''
    )
    
,edited as (
        select task_id, edited_response, row_number() over (partition by task_id order by index asc) as rn
        from base
        where step in ('ModelResponseEditor')
        and edited_response is not null
    )
  
select 
p.Prompt_task_id,
p.task_id,
p.review_level,
p.attempt,
--p.TASK_TEMPLATE_VERSION,
p.batch,
p.category,
p.subcategory,
p.ab_ratio,
p.turn,
p.prompt_type,
p.prompt,
r.response,
e.edited_response,
r.selected

--,div0(EDITDISTANCE(r.response, e.edited_response), GREATEST(LENGTH(r.response), LENGTH(e.edited_response))) *100 as edits

from prompts p
inner join responses r on p.task_id = r.task_id and p.turn = r.rn
inner join edited e on p.task_id = e.task_id and p.turn = e.rn
where 1=1 
--and p.TASK_TEMPLATE_VERSION >= 37
order by p.task_id desc, turn asc 
'''
cs.execute(sql)
idf = cs.fetch_pandas_all()
print(idf.columns)

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Index(['PROMPT_TASK_ID', 'TASK_ID', 'REVIEW_LEVEL', 'ATTEMPT', 'BATCH',
       'CATEGORY', 'SUBCATEGORY', 'AB_RATIO', 'TURN', 'PROMPT_TYPE', 'PROMPT',
       'RESPONSE', 'EDITED_RESPONSE', 'SELECTED'],
      dtype='object')


In [34]:
prompt_concise_qc = "Analyze the text given for conciseness. If you find repetitive or unnecessary information which not critical in the meaning then say FAIL, otherwise if the text is non-repetitive say PASS. Here is the text:\n\n"
prompt_refusal_qc = "Analyse the text and tell if it is primarily a direct refusal to answer then respond with text 'REFUSAL' otherwise respond with 'NOT REFUSAL'. Here is the text:\n\n"
prompt_judge_qc = "Analyse the text given and tell if it has a very judgemental tone. If you find the text to be very judgemental then respond with 'JUDGEMENTAL' otherwise respond with 'NOT JUDGEMENTAL'. Here is the text:\n\n"

restricted_phrases = ['I’m deeply concerned','Whoa there','I cannot satisfy your request','I am sorry','let’s keep it PG','Whoa there','Can I help you','It’s inappropriate to','It is inappropriate to','I cannot','I’m sorry','As a helpful assistant','As an AI','I am programmed to','As a model','As a AI','I apologize','I would like to emphasize','In summary','Illegal or unethical','Illegal and unethical','I am unable to','I am sorry','It’s essential to','I understand']
rest_count = 0


for index, row in tqdm(idf.iterrows(), total=idf.shape[0]):
    text =  row['EDITED_RESPONSE']
    #check for restricted phrases in text
    for phrase in restricted_phrases:
        if phrase in text:
            #print(f"Restricted phrase found in response: {phrase}",text)
            rest_count = rest_count + 1
        #save rest_count to a new column called 'RESTRICTED_PHRASES'
    idf.loc[index, 'RESTRICTED_PHRASES'] = rest_count

    text = row['EDITED_RESPONSE']
    text = prompt_concise_qc + text
    response = evaluator_gpt(text)
    response = response.choices[0].message.content 
    #save response to a new column called 'CONCISE_QC'
    idf.loc[index, 'CONCISE_QC'] = response

    text = row['EDITED_RESPONSE']
    text = prompt_refusal_qc + text
    response = evaluator_gpt(text)
    response = response.choices[0].message.content 
    #print(row['EDITED_RESPONSE'],response)
    idf.loc[index, 'REFUSAL_QC'] = response

    text = row['EDITED_RESPONSE']
    text = prompt_judge_qc + text
    response = evaluator_gpt(text)
    response = response.choices[0].message.content 
    #print(row['EDITED_RESPONSE'],response)
    idf.loc[index, 'JUDGEMENTAL_QC'] = response

    #check if response ends in a ?
    text = row['EDITED_RESPONSE']
    if text.endswith('?'):
        idf.loc[index, 'ENDS_WITH_QUESTION'] = "YES"
    else:
        idf.loc[index, 'ENDS_WITH_QUESTION'] = "NO"

    #check the jarowinkler similarity between RESPONSE and EDITED_RESPONSE
    text1 = row['RESPONSE']
    text2 = row['EDITED_RESPONSE']
    from pyjarowinkler import distance
    jw = distance.get_jaro_distance(text1, text2, winkler=True, scaling=0.1)
    idf.loc[index, 'JAROWINKLER'] = jw


  0%|          | 14/8890 [00:39<6:59:09,  2.83s/it]

In [None]:
idf.to_csv('Flamingo_Safety_Auto_QA.csv', index=False)