In [None]:
%cd ../src
%load_ext autoreload
%autoreload 2

In [None]:
import os
import requests
requests.packages.urllib3.disable_warnings() 

import pandas as pd
import numpy as np

import credibility

# Qualitative evaluation

### Brad pitt marrying Britney Spears)

In [None]:
claim = "brad pitt is to marry with britney spears"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

In [None]:
claim = "brad pitt is not married to britney spears"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

### Joe Biden taking home classified documents

In [None]:
claim = "Joe Biden took home classified documents after leaving the vice-presidency"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

In [None]:
claim = "Joe Biden never took home any classified documents after leaving the vice-presidency"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

### COVID-19 vaccine and (in)fertility

In [None]:
claim = "COVID-19 vaccine causes infertility"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

In [None]:
claim = "COVID-19 vaccine does not cause infertility"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

### Jan Smit met de US president (fake, but no relevant evidence)

In [None]:
claim = "jan smit has met with US president in 2012"
evidence = credibility.investigate_claim(claim, model_type='zero-shot')
evidence['credibility_score']

# Apply model on real tweets

### Manual tweet example #1

In [None]:
tweet = "@ianbremmer Russia also has troops on Moldova's border with Ukraine."
evidence = credibility.investigate_claim(tweet, model_type='zero-shot')
evidence['credibility_score']

In [None]:
# Experiment: how does rephrasing as a question impact results?
tweet = "does russia have troops on moldova border?"
evidence = credibility.investigate_claim(tweet, model_type='zero-shot')
evidence['credibility_score']

### Filter opinions

In [None]:
tweet = "Ukraine already is a puppet government. The US started a puppet regime, now the Russians might install their own to secure their border. I feel bad for the civilians caught between them"
evidence = credibility.investigate_claim(tweet, model_type='zero-shot')

In [None]:
tweet = "Ukraine in NATO is how we get to WWIII. I can't see how that ever works when Russia controls Crimea. I think Ukraine would have to agree to give up Crimea and possibly other areas and redraw its eastern border before it could happen. https://t.co/sptiD5yxbi,Ukraine in NATO is how we get to WWIII. I can't see how that ever works when Russia controls Crimea. I think Ukraine would have to agree to give up Crimea and possibly other areas and redraw its eastern border before it could happen."
evidence = credibility.investigate_claim(tweet, model_type='zero-shot')

### Questionable news from Russian sources

Example of questionable news by Russian press service and media channels. This news was denied by Ukraine. Note: Russian news sources might be filtered from search results in Poland.

In [None]:
tweet = "The Russian military destroyed two infantry fighting vehicles of the Ukrainian army, which crossed the Russian border from Ukraine for the emergency evacuation of the Ukrainian sabotage group, reports the Southern Military District."
evidence = credibility.investigate_claim(tweet, model_type='zero-shot')
evidence['credibility_score']

### Breitbart news article

In [None]:
news = 'WWIII Watch: E.U. Warns China Arming Russia Would Cross ‘Red Line’'
evidence = credibility.investigate_claim(news, model_type='zero-shot')
evidence['credibility_score']

# Quantitative evaluation

In [None]:
# https://www.euronews.com/my-europe/2022/08/24/ukraine-war-five-of-the-most-viral-misinformation-posts-and-false-claims-since-the-conflic

In [None]:
# Keywords to look for fake tweets: 'special military operation', 'nazi'

In [None]:
test_set_path = '../data/tide_test_data.csv'
border_path = '../data/tide/Russian_border_Ukraine.csv'

In [None]:
border_data = pd.read_csv(border_path)

In [None]:
# ids for russian border ukraine
border_fake = [25598, 8574, 43431, 7007, 18274, 20443, 12586]
border_real = [20131, 44508, 41529, 36136, 7478, 9307, 19483]
border_opinion = [1611, 43289]

In [None]:
fake_df = pd.DataFrame(border_data['renderedContent'][border_fake])
fake_df['is_true_label'] = 0

real_df = pd.DataFrame(border_data['renderedContent'][border_real])
real_df['is_true_label'] = 1

# Concat all
test_df = pd.concat([fake_df, real_df])
test_df.reset_index(names='id', inplace=True)
test_df.rename({'renderedContent': 'text'}, axis=1, inplace=True)

# Save
test_df.to_csv(test_set_path, index=False, sep='\t')

In [None]:
test_df.set_index('id').sample(frac=1)

In [None]:
# Reload
test_df = pd.read_csv(test_set_path, sep='\t')

results_path = '../data/tide_test_results.csv'

try:
    results_df = pd.read_csv(results_path, sep='\t')
except:
    results_df = pd.DataFrame()

if len(results_df) > 0:
    prev_ids = results_df.id.values
else:
    prev_ids = []
    
# Shuffled test
for i, (idx, row) in enumerate(test_df.sample(frac=1).iterrows()):
    if row.id not in prev_ids:
        print(f'Row {i+1} out of {len(test_df)}')
        
        txt = row.text
        cleaned_txt = credibility.clean_input(txt)
        print(f'Input: {cleaned_txt}')
        
        result = credibility.investigate_claim(txt, filter_opinions=False, datasource='google')
        print('Num evidence: ', len(result['credibility_evidences']))

        row = pd.DataFrame(row).T    
        row['num_evidence'] = len(result['credibility_evidences'])
        row['credibility_score'] = result['credibility_score']
        row['is_true_pred'] = int(result['credibility_score'] > 0.5)
        print('True label: ', row.is_true_label.values[0])
        print('Credibility score: ', result['credibility_score'])
        
        results_df = results_df.append(row)
        results_df.drop_duplicates(subset='id', inplace=True, keep='last')
        results_df.to_csv(results_path, mode='w', index=False, sep='\t', header=True)
        
        # Compute current score
        y_true = results_df.is_true_label.values[:i+1]
        y_pred = results_df.is_true_pred.values[:i+1]
        acc = np.sum(np.equal(y_true, y_pred)) / len(y_true)
        print(f'Computed accuracy after iteration {i}: {acc}\n')
    else:
        print('Skipping id ', row.id)