In [1]:
import torch
from transformers import BertTokenizer, BertModel, pipeline

def text_embedding(data):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

    def get_bert_embeddings(data):
        tokens = tokenizer(data.tolist(), padding=True, truncation=True, return_tensors='pt').to(device)
        with torch.no_grad():
            embeddings = bert_model(**tokens).last_hidden_state.mean(dim=1)
        return embeddings

    batch_size = 128
    num_samples = len(data)
    num_batches = (num_samples + batch_size - 1) // batch_size

    embeddings_list = []

    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = (i + 1) * batch_size
        batch_data = data.iloc[start_idx:end_idx]
        batch_embeddings = get_bert_embeddings(batch_data)
        embeddings_list.append(batch_embeddings)

    embeddings = torch.cat(embeddings_list, dim=0).cpu().numpy()
    return embeddings

2024-02-23 00:06:03.197749: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key="AIzaSyClyO_P1azrly9sScfVL3dJnKy8q7HtayU")

The political bias score of this claim is {political_bias} ranging from 0 to 1, with 0 being not biased and 1 being biased.
    The credibility score of the author of this claim is {credibility} ranging from 0 to 1, with 0 being non-credible and 1 being credible.
    The text manipulation score of this claim is {style} ranging from 0 to 1, with 0 being not manipulated and 1 being manipulated.


In [4]:
def evaluate_claim(claim, evidence, factuality_score):
    prompt = f"""please rate the veracity of the following claim on a scale from 0 to 5,
    with 0 being completely true and 5 being entirely false.
    Please ensure that the first character in your response is a single integer between 0 and 5,
    and explain your reasoning with the evidence we provided below the claim:
    {claim} \n
    Please consider the evidence most significantly.
    Here are the evidence of this claim: {evidence}
    
    For your further reference, we built models to predict some factuality factor scores of 
    political bias (with 0 being not biased and 1 being biased),
    credibility (with 0 being non-credible and 1 being credible),
    and text manipulation (with 0 being not manipulated and 1 being manipulated).
        
    The overall score of all factuality scores is {factuality_score} from 0 to 1
        
    Please consider the evidence over the factuality score.
    """

    response = llm.invoke(prompt).content

    rating = response[0]

    return response, int(rating)

In [5]:
import pandas as pd
df = pd.read_csv('test2_score.tsv', delimiter='\t')

# df = df.drop(columns = [0])
# df.rename({1: 'id', 2: 'label', 3: 'statement', 4: 'subject', 5: 'speaker', 6: 'job-title',
#            7: 'state_info', 8: 'party_affiliation', 9: 'barely_true_counts', 10: 'false_counts',
#            11: 'half_true_counts', 12: 'mostly_true_counts', 13: 'pants_on_fire_counts', 14: 'context',
#            15: 'justification'
#           }, axis = 1, inplace = True)
df

Unnamed: 0,id,label,statement,subject,speaker,job-title,state_info,party_affiliation,barely_true_counts,false_counts,half_true_counts,mostly_true_counts,pants_on_fire_counts,context,justification,Political_Bias,Sentiment,Credibility,Style
0,11972.json,0,Building a wall on the U.S.-Mexico border will...,immigration,rick-perry,Governor,Texas,republican,30,30,42,23,18,Radio interview,"Meantime, engineering experts agree the wall w...",1.0,0.0,5.0855,1.0
1,11685.json,4,Wisconsin is on pace to double the number of l...,jobs,katrina-shankland,State representative,Wisconsin,democrat,2,1,0,0,0,a news conference,She cited layoff notices received by the state...,1.0,0.0,5.2396,1.0
2,11096.json,4,Says John McCain has done nothing to help the ...,"military,veterans,voting-record",donald-trump,President-Elect,New York,republican,63,114,51,37,61,comments on ABC's This Week.,"Trump said that McCain ""has done nothing to he...",1.0,0.0,5.1900,1.0
3,5209.json,3,Suzanne Bonamici supports a plan that will cut...,"medicare,message-machine-2012,campaign-adverti...",rob-cornilles,consultant,Oregon,republican,1,1,3,1,1,a radio show,"But spending still goes up. In addition, many ...",1.0,0.0,5.3352,1.0
4,9524.json,5,When asked by a reporter whether hes at the ce...,"campaign-finance,legal-issues,campaign-adverti...",state-democratic-party-wisconsin,,Wisconsin,democrat,5,7,2,2,7,a web video,Our rating A Democratic Party web video making...,2.0,0.0,4.8916,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1262,7334.json,3,Says his budget provides the highest state fun...,education,rick-scott,Governor,Florida,republican,28,23,38,34,7,a news conference,LeMieux didn't compare Rubio and Obama on an i...,1.0,0.0,5.6260,1.0
1263,9788.json,2,Ive been here almost every day.,"civil-rights,crime,criminal-justice",jay-nixon,Governor,Missouri,democrat,2,0,0,1,0,"on ABC's ""This Week""","After making his pledge, Obama said the budget...",1.0,0.0,5.4934,1.0
1264,10710.json,2,"In the early 1980s, Sen. Edward Kennedy secret...","bipartisanship,congress,foreign-policy,history",mackubin-thomas-owens,"senior fellow, Foreign Policy Research Institute",Rhode Island,columnist,1,0,0,0,0,a commentary in The Providence Journal,Former President Clinton said government got t...,0.0,0.0,5.2986,0.0
1265,3186.json,2,Says an EPA permit languished under Strickland...,"environment,government-efficiency",john-kasich,"Governor of Ohio as of Jan. 10, 2011",Ohio,republican,9,8,10,18,3,a news conference,Points of Light has a unique mission carved ou...,1.0,0.0,5.3564,1.0


In [6]:
min_max_dict = {
    'Political_Bias': (min(df['Political_Bias']), max(df['Political_Bias'])),
    'Sentiment': (min(df['Sentiment']), max(df['Sentiment'])),
    'Credibility': (min(df['Credibility']), max(df['Credibility'])),
    'Style': (min(df['Style']), max(df['Style']))
}

# Define the poly_score function
def poly_score(row):
    for factor in ['Political_Bias', 'Credibility', 'Style']:
        min_val, max_val = min_max_dict[factor]
        if max_val - min_val == 0:  # Check for zero division
            row[factor] = 0  # or 1, depending on your preference
        else:
            row[factor] = (row[factor] - min_val) / (max_val - min_val)
    return row

# Apply the function to each row
normalized_df = df.apply(poly_score, axis=1)

In [7]:
# label_map = {'pants-fire': 5, 'false': 4, 'half-true': 3,
#              'barely-true': 2, 'mostly-true': 1, 'true': 0}

# df['label'] = df['label'].replace(label_map)

In [8]:
def get_ann(content, client):
    evidence = []
    if type(content) == str:
        content = [content]
    for text_query in content:
        query_vector = {"vector" : text_embedding(pd.Series(text_query)).tolist()[0],
                    "distance" : 1.0
        }
        results = client.query.get("test_dataset_1", ["context"]).with_additional("distance"
                    ).with_near_vector(query_vector).do()
        evidence.append([result["context"] for result in results['data']['Get']['Test_dataset_1'][:10]])
    return evidence

In [9]:
import weaviate
from IPython.display import clear_output
client = weaviate.Client(
        url = "https://testing-cluster-2qgcoz4q.weaviate.network",  # Replace with your endpoint
        auth_client_secret=weaviate.auth.AuthApiKey(api_key="qRarwGLC0CwrpQsSpK64E1V0c3HajFoAy893"),  # Replace w/ your Weaviate instance API key
    )

preds = []
labels = []
for index, row in normalized_df.iterrows():
    if index % 50 == 0:
        clear_output(wait=True)
        print(f"Running at iteration {index}")
#     try:
    evidence = get_ann(row['statement'], client)
#         political_bias, credibility, style = row['Political_Bias'], row['Credibility'], row['Style']
    factuality_score = np.mean([row['Political_Bias'], row['Credibility'], row['Style']])
    result = evaluate_claim(row['statement'], evidence, factuality_score)[1]
    preds.append(result)
    labels.append(row['label'])
#     except:
#         continue

Running at iteration 1250


In [10]:
preds

[]

In [11]:
import numpy as np
sum(np.abs(np.array(preds) - np.array(labels)) <= 1)

0

In [12]:
results_df = pd.DataFrame({'predicted': preds, 'label': labels})

results_df['predicted'] = results_df['predicted'].astype(int)
(results_df['predicted'] == results_df['label']).mean()

nan