In [1]:
%load_ext autoreload
%autoreload 2
import os
from functools import partial
from dataclasses import dataclass
from typing import Optional, Callable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
from pmf import PoissonMF

# Load & Process Data

In [2]:
data_path = '/scratch/sm2537/data/03_13_24'
notes = pd.read_csv(os.path.join(data_path, 'notes-00000.tsv'), sep='\t')
# Convert NaN to empty string
notes['summary'] = notes['summary'].astype(str).fillna('').str.strip()

# read in ratings from 'ratings-00000.tsv' to 'ratings-00007.tsv'
# and concatenate them into a single DataFrame
for i in range(8):
    print(i)
    filepath = os.path.join(data_path, f'ratings-0000{i}.tsv')
    if i == 0:
        ratings = pd.read_csv(filepath, sep='\t')
    else:
        ratings = pd.concat([ratings, pd.read_csv(filepath, sep='\t')])
# ratings = pd.concat(
#     pd.read_csv(os.path.join(data_path, f'ratings-0000{i}.tsv'), sep='\t')
#     for i in range(8)
# )
#ratings = pd.read_csv(os.path.join(data_path, 'ratings-00000.tsv'), sep='\t')

# Drop rows with NaN in helpfulnessLevel column
ratings = ratings.dropna(subset=['helpfulnessLevel'])

  notes = pd.read_csv(os.path.join(data_path, 'notes-00000.tsv'), sep='\t')


0
1
2
3
4
5
6
7


In [3]:
# Print total number of ratings
print('Total number of ratings: {}'.format(len(ratings)))

# Print number of unique notes and raters
print('Number of unique notes: {}'.format(ratings['noteId'].nunique()))
print('Number of unique raters: {}'.format(ratings['raterParticipantId'].nunique()))

# Get list of notes with more than 5 ratings
note_rating_counts = ratings['noteId'].value_counts()
filtered_note_ids = note_rating_counts[note_rating_counts > 5].index.tolist()
print('Number of notes with more than 5 ratings: {}'.format(len(notes)))

# Get list of raters with more than 10 ratings
rater_counts = ratings['raterParticipantId'].value_counts()
filtered_rater_ids = rater_counts[rater_counts > 10].index.tolist()
print('Number of raters with more than 10 ratings: {}'.format(len(filtered_rater_ids)))

# Filter ratings to only include ratings rated by raters with more than 10 ratings and for notes with more than 5 ratings
ratings = ratings[ratings['raterParticipantId'].isin(filtered_rater_ids) & ratings['noteId'].isin(filtered_note_ids)]
print('Number of ratings after filtering: {}'.format(len(ratings)))

Total number of ratings: 39890629
Number of unique notes: 645763
Number of unique raters: 475794
Number of notes with more than 5 ratings: 659138
Number of raters with more than 10 ratings: 324535
Number of ratings after filtering: 38897884


In [4]:
# Convert the ratings matrix to three lists:
# - rating_labels, which is the 'helpfulnessLevel' column mapped to -1 for 'NOT_HELPFUL',
#   0 for 'SOMEWHAT_HELPFUL', and 1 for 'HELPFUL'
# - user_idxs, which is the 'raterParticipantId' column mapped to a unique integer
# - note_idxs, which is the 'noteId' column mapped to a unique integer
rating_labels = ratings['helpfulnessLevel'].map({'NOT_HELPFUL': -1, 'SOMEWHAT_HELPFUL': 0, 'HELPFUL': 1})
# Use a label encoder to map the user and note ids to unique integers
user_encoder = LabelEncoder()
note_encoder = LabelEncoder()
user_idxs = user_encoder.fit_transform(ratings['raterParticipantId'])
note_idxs = note_encoder.fit_transform(ratings['noteId'])

n_users = len(user_encoder.classes_)
n_notes = len(note_encoder.classes_)

# Sparse exposure matrix (did the user rate the note?)
exp_matrix = csr_matrix((np.ones_like(rating_labels), (user_idxs, note_idxs)), shape=(n_users, n_notes))

# Step 1a: Causal Inference, Exposure Model
Fit Poisson matrix factorization to the exposures/assignments (who rated what). We will then use the reconstructed exposures as substitute confounders.

In [5]:
pf = PoissonMF(n_components=4, random_state=1, verbose=True, a=0.3, b=0.3, c=0.3, d=0.3)
pf.fit(exp_matrix, user_idxs, note_idxs)

In [6]:
# Latent representations learned by Poisson MF
exp_user_factors, exp_item_factors = pf.Eb, pf.Et.T

# Step 1b: Causal Inference, Outcome Model
Now estimate the outcome model, i.e., matrix factorization on the observed ratings while controlling for the substitute confounders estimated from Step 1a.

In [7]:
%load_ext autoreload
%autoreload 2
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch import nn
from mf import MatrixFactorizationModel, ModelData

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# Our full model that deconfounds with the substitute confounder from step 1a
deconf_mf_model = MatrixFactorizationModel(
    n_users, n_notes, 
    exp_user_factors=exp_user_factors, exp_item_factors=exp_item_factors,
    n_components=4)

# Regular matrix factorization without deconfounding
mf_model = MatrixFactorizationModel(n_users, n_notes, n_components=20)

rating_tensor = torch.FloatTensor(rating_labels.values).to(deconf_mf_model.device)
user_idxs_tensor = torch.LongTensor(user_idxs).to(deconf_mf_model.device)
note_idxs_tensor = torch.LongTensor(note_idxs).to(deconf_mf_model.device)
exp_tensor = torch.ones_like(rating_tensor).to(deconf_mf_model.device)

data = ModelData(rating_tensor, user_idxs_tensor, note_idxs_tensor, exp_tensor)

In [21]:
train_loss, val_loss = deconf_mf_model.fit(data, epochs=150, lr=0.1, print_interval=20, validate_fraction=0.1, print_loss=True)

Epoch 0: train L2-reg loss = 0.478, val L2-reg loss = 0.921
Epoch 0: train MSE = 0.920, val MSE = 0.920
Epoch 20: train L2-reg loss = 0.218, val L2-reg loss = 0.440
Epoch 20: train MSE = 0.356, val MSE = 0.404
Epoch 40: train L2-reg loss = 0.189, val L2-reg loss = 0.388
Epoch 40: train MSE = 0.294, val MSE = 0.347
Epoch 60: train L2-reg loss = 0.184, val L2-reg loss = 0.379
Epoch 60: train MSE = 0.287, val MSE = 0.339
Epoch 80: train L2-reg loss = 0.183, val L2-reg loss = 0.377
Epoch 80: train MSE = 0.285, val MSE = 0.336
Epoch 100: train L2-reg loss = 0.183, val L2-reg loss = 0.376
Epoch 100: train MSE = 0.285, val MSE = 0.336
Epoch 120: train L2-reg loss = 0.183, val L2-reg loss = 0.376
Epoch 120: train MSE = 0.285, val MSE = 0.335
Epoch 140: train L2-reg loss = 0.183, val L2-reg loss = 0.376
Epoch 140: train MSE = 0.285, val MSE = 0.335


In [22]:
train_loss, val_loss = mf_model.fit(data, epochs=150, lr=0.1, print_interval=20, validate_fraction=0.1, print_loss=True)

Epoch 0: train L2-reg loss = 0.478, val L2-reg loss = 0.922
Epoch 0: train MSE = 0.920, val MSE = 0.921
Epoch 20: train L2-reg loss = 0.176, val L2-reg loss = 0.406
Epoch 20: train MSE = 0.249, val MSE = 0.395
Epoch 40: train L2-reg loss = 0.153, val L2-reg loss = 0.382
Epoch 40: train MSE = 0.212, val MSE = 0.370
Epoch 60: train L2-reg loss = 0.147, val L2-reg loss = 0.378
Epoch 60: train MSE = 0.204, val MSE = 0.367
Epoch 80: train L2-reg loss = 0.146, val L2-reg loss = 0.376
Epoch 80: train MSE = 0.202, val MSE = 0.365
Epoch 100: train L2-reg loss = 0.145, val L2-reg loss = 0.376
Epoch 100: train MSE = 0.200, val MSE = 0.364
Epoch 120: train L2-reg loss = 0.145, val L2-reg loss = 0.375
Epoch 120: train MSE = 0.200, val MSE = 0.364
Epoch 140: train L2-reg loss = 0.145, val L2-reg loss = 0.375
Epoch 140: train MSE = 0.199, val MSE = 0.364


# Step 2: Voting Aggregation
Calculate results for different voting aggregation rules.

In [23]:
pd.set_option('display.max_colwidth', 1000)

In [24]:
# Define aggregations
def approval(x, dim, threshold=0.7):
    return (x > threshold).float().mean(dim=dim)
quantile = partial(torch.quantile, q=0.25)

# Collect aggregations into dict
filtered_notes = notes[notes['noteId'].isin(filtered_note_ids)]
note_ids = note_encoder.inverse_transform(np.arange(n_notes))
aggs = {'noteId': note_ids}

# Aggregations with deconfounder model
aggs['mean'] = mf_model.get_vote_scores(torch.mean)
aggs['approval'] = mf_model.get_vote_scores(approval)
aggs['quantile'] = mf_model.get_vote_scores(quantile)
#aggs['var'] = mf_model.get_vote_scores(torch.var)

# Aggregations with deconfounder mf model
aggs['decon_mean'] = deconf_mf_model.get_vote_scores(torch.mean)
aggs['decon_approval'] = deconf_mf_model.get_vote_scores(approval)
aggs['decon_quantile'] = deconf_mf_model.get_vote_scores(quantile)
#aggs['var'] = deconf_mf_model.get_vote_scores(torch.var)

In [25]:
aggs = {k: v.cpu().numpy() for k, v in aggs.items() if k != 'noteId'}


In [26]:
aggs['noteId'] = note_ids

In [27]:
note_results = pd.DataFrame(aggs)
scored_notes = filtered_notes.merge(note_results, on='noteId')

In [28]:
scored_notes

Unnamed: 0,noteId,noteAuthorParticipantId,createdAtMillis,tweetId,classification,believable,harmful,validationDifficulty,misleadingOther,misleadingFactualError,...,notMisleadingPersonalOpinion,trustworthySources,summary,isMediaNote,mean,approval,quantile,decon_mean,decon_approval,decon_quantile
0,1537145358521839617,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1655318986910,1536848327979016193,NOT_MISLEADING,,,,0,0,...,1,0,They are expressing a personal opinion in a straightforward manner. This should not need a note.,0,-0.083510,0.014843,-0.285247,0.126846,0.032816,-0.094989
1,1537147343715282945,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1655319460217,1537080831751102467,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,LITTLE_HARM,EASY,0,0,...,0,1,Teslas purchased after 12/31/19 are not eligible for US Federal tax credits because they exceeded the initial 200K eligible cars allowed a credit. States like CA have their own programs that issue rebates or credits but many have ended as well due to higher Tesla prices. https://cleanvehiclerebate.org/en/faqs/can-i-apply-rebate-my-tesla-model-3-or-tesla-model-y,0,0.054070,0.005941,-0.051300,0.290401,0.038363,0.139926
2,1540422295029551104,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1656100269455,1540087463099736065,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,CONSIDERABLE_HARM,EASY,0,1,...,0,1,"The Committee has been found by numerous courts to be constitutional &amp; is not losing the interest of the American people. CNN reports &lt; 20M viewers tuned in to Thursday's presentation about the Jan 6 attack, and it reached a far larger number through social and others. https://www.cnn.com/2022/06/10/media/ratings-january-6-hearings/index.html",0,0.019984,0.000308,-0.062386,0.231902,0.000089,0.118905
3,1586769867381669889,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1667150391800,1586411168880807936,NOT_MISLEADING,,,,0,0,...,0,1,"Clinton is not alone in this claim, the F.B.I. has repeatedly said that extremist violence from right-wing actors is one of the biggest threats confronting the bureau. https://www.nytimes.com/2022/08/13/nyregion/right-wing-rhetoric-threats-violence.html Paul Pelosi’s attacker has written many racist, antisemitic and pro-Trump blog entries. https://www.washingtonpost.com/politics/2022/10/29/paul-pelosi-attack-republicans-target/",0,0.005089,0.017000,-0.130523,0.071395,0.062850,-0.210595
4,1599066819402162177,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1670082213627,1598827733072560129,NOT_MISLEADING,,,,0,0,...,1,0,"This tweet is a personal opinion, presumably about reporter Matt Taibbi’s release of previously confidential Twitter internal company communications as provided by Elon Musk.",0,0.056983,0.023523,-0.109443,0.393760,0.124310,0.223009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486072,1760936161583726863,BAAA2E8419D100B1ECC54C23789AC50686286EEEA716961E0025D2673A37DA08,1708674872191,1760643652605403262,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,1,0,...,0,1,"This is a WEB 3.0 (Crypto, nft) ad. Be warned that this could be a scam. Also this might be not licensed by Lego themselves. https://consumer.ftc.gov/articles/what-know-about-cryptocurrency-and-scams",0,0.239728,0.032486,0.081998,0.594134,0.408902,0.442170
486073,1763371957658038393,404E58567F61AE2C8B6174296F6A37E4B63A5284701B330176BCA1B274E215CB,1709255611229,1763147168221675865,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,1,...,0,1,Muneeb farooq is big tout of undemocratic forces and always praising wrongdoing of illegal PDM government and his masters. https://twitter.com/AlyMohsin/status/1763282619846799416?t=vC7UzcxD0R4W8VAf-pBlkw&amp;s=19,0,-0.043264,0.016541,-0.223953,0.001515,0.056761,-0.276330
486074,1762555312374989009,5F8ED34FAA6F658BF7BE6179524AB2AC3165C3DBB8ACAC7B66A827D7023208F8,1709060907821,1762311625980211640,NOT_MISLEADING,,,,0,0,...,0,1,"NNN. The suggested note refers to GPT-4. However, the image indicates that as of one hour ago, GPT-3.5 produced an identical response when subjected to the same test.",0,0.107130,0.013428,-0.024510,0.357544,0.158088,0.137478
486075,1767186539766002115,C77F7D961A69AECEEA7BE945C8B860A9C99532CBD58E20E4FFF4EEDCBC09A2A7,1710165078474,1766896413659533588,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,0,...,0,1,NNN- Go farm your writing points somewhere else https://simple.m.wikipedia.org/wiki/Farming,0,-0.045980,0.001159,-0.167217,-0.122422,0.002644,-0.294485


# Evaluation
As a first-pass for evaluation, compare the models (with and without causal inference) in how well they agree with the current Community Notes algorithm. Obviously, eventually we would like to show that we do better than the existing algorithm in some way, so we will need different evaluations down the line, but this is just a quick first pass.

The deconfounded model (that uses causal inference in stage 1) does significantly better in matching the existing algorithm's outputs than a baseline of matrix factorization + voting.

In [29]:
note_status_df = pd.read_csv(os.path.join(data_path, 'noteStatusHistory-00000.tsv'), sep='\t')

  note_status_df = pd.read_csv(os.path.join(data_path, 'noteStatusHistory-00000.tsv'), sep='\t')


In [40]:
merged_notes = scored_notes.merge(note_status_df, on='noteId')
misleading_notes = merged_notes[merged_notes['classification'] == 'MISINFORMED_OR_POTENTIALLY_MISLEADING']
misleading_notes

Unnamed: 0,noteId,noteAuthorParticipantId_x,createdAtMillis_x,tweetId,classification,believable,harmful,validationDifficulty,misleadingOther,misleadingFactualError,...,timestampMillisOfLatestNonNMRStatus,mostRecentNonNMRStatus,timestampMillisOfStatusLock,lockedStatus,timestampMillisOfRetroLock,currentCoreStatus,currentExpansionStatus,currentGroupStatus,currentDecidedBy,currentModelingGroup
1,1537147343715282945,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1655319460217,1537080831751102467,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,LITTLE_HARM,EASY,0,0,...,1.655518e+12,CURRENTLY_RATED_HELPFUL,1.674003e+12,CURRENTLY_RATED_HELPFUL,1.660435e+12,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,GroupModel13 (v1.1),13.0
2,1540422295029551104,5684B38EB58FD8BE75ABA37F0BE040EC70380B002ADF9DBDE2415CB32782EAEE,1656100269455,1540087463099736065,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,CONSIDERABLE_HARM,EASY,0,1,...,,,1.674003e+12,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,CoreModel (v1.1),13.0
6,1592778879885709312,F6D9C6C3398C95AFEDAC6D58133DBA15A17A9BF191FD599F83C9D348F28557C5,1668583052022,1592776673891581952,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,1,...,1.668649e+12,CURRENTLY_RATED_HELPFUL,1.674003e+12,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,CoreModel (v1.1),13.0
7,1535128588818653184,EBE1D39152E086F5D771B669B9FCFC7709B404D15529CFD74A637CEC019EDFC0,1654838151541,1535062308426510337,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,CONSIDERABLE_HARM,EASY,0,0,...,1.664299e+12,CURRENTLY_RATED_HELPFUL,1.674003e+12,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,CoreModel (v1.1),13.0
8,1564988898605862912,EBE1D39152E086F5D771B669B9FCFC7709B404D15529CFD74A637CEC019EDFC0,1661957404173,1564818029388734465,MISINFORMED_OR_POTENTIALLY_MISLEADING,BELIEVABLE_BY_MANY,CONSIDERABLE_HARM,EASY,0,0,...,,,1.674003e+12,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,CoreModel (v1.1),13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486030,1762830057058283976,029C2B4DC521BBEEA59FA45D5A49DC31C2CEBC5000883D31FD31A0124586A442,1709126412059,1762311625980211640,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,0,...,,,1.710337e+12,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,CoreModel (v1.1),
486035,1765860361159299264,029C2B4DC521BBEEA59FA45D5A49DC31C2CEBC5000883D31FD31A0124586A442,1709848892857,1765765398476337452,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,0,...,,,,,,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,CoreModel (v1.1),
486040,1760936161583726863,BAAA2E8419D100B1ECC54C23789AC50686286EEEA716961E0025D2673A37DA08,1708674872191,1760643652605403262,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,1,0,...,1.708749e+12,CURRENTLY_RATED_HELPFUL,1.709885e+12,CURRENTLY_RATED_HELPFUL,,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,,CoreModel (v1.1),
486041,1763371957658038393,404E58567F61AE2C8B6174296F6A37E4B63A5284701B330176BCA1B274E215CB,1709255611229,1763147168221675865,MISINFORMED_OR_POTENTIALLY_MISLEADING,,,,0,1,...,1.709409e+12,CURRENTLY_RATED_HELPFUL,,,,,,,ExpansionPlusModel (v1.1),


In [73]:
note_status_key = 'currentStatus'
num_rated_helpful = misleading_notes[note_status_key].value_counts()['CURRENTLY_RATED_HELPFUL']
print(f'Number of notes rated helpful under existing algorithm: {num_rated_helpful}')
agg_keys = list(aggs.keys())
agg_keys.remove('noteId')
for key in agg_keys:
    helpful_notes = misleading_notes.sort_values(key, ascending=False).head(num_rated_helpful)
    num_helpful = helpful_notes[note_status_key].value_counts()['CURRENTLY_RATED_HELPFUL']
    pct_helpful = num_helpful / num_rated_helpful
    print(f'Percentage of CURRENTLY_RATED_HELPFUL notes in top {num_rated_helpful} notes using {key} aggregation: {pct_helpful:.2%}')

Number of notes rated helpful under existing algorithm: 57264
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using mean aggregation: 63.36%
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using approval aggregation: 48.72%
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using quantile aggregation: 51.41%
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using decon_mean aggregation: 76.04%
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using decon_approval aggregation: 68.01%
Percentage of CURRENTLY_RATED_HELPFUL notes in top 57264 notes using decon_quantile aggregation: 73.60%


In conclusion, the deconfounded model (that uses causal inference in stage 1) seems to do significantly better in matching the existing algorithm's outputs than a baseline of matrix factorization + voting.

## Differences with existing model

### Notes rated helpful under deconfounder model (ours) but not under existing model

In [74]:
helpful_notes = misleading_notes.sort_values('decon_mean', ascending=False).head(num_rated_helpful)
diff_notes = helpful_notes[helpful_notes[note_status_key] != 'CURRENTLY_RATED_HELPFUL']
diff_notes[['noteId', 'tweetId', 'summary', 'currentStatus', 'currentCoreStatus', 'mostRecentNonNMRStatus', 'lockedStatus', 'decon_mean']]

Unnamed: 0,noteId,tweetId,summary,currentStatus,currentCoreStatus,mostRecentNonNMRStatus,lockedStatus,decon_mean
296778,1672597699558883334,1668945951824781316,"No, las bicicletas eléctricas no se “regalan”. La imagen se corresponde con el almacén de la empresa Divvy, un servicio público de la ciudad de Chicago. El enlace de la página web lleva a un portal de estafas y robo de identidad. https://quigley.house.gov/media-center/press-releases/quigleys-mike-move-tour-highlights-divvy-bicycle-ride-sharing-system",NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,,NEEDS_MORE_RATINGS,0.746510
388181,1686480529351176192,1686462233151184896,"Cet épisode du journal « Le Parisien » était une « fausse alerte », et date de octobre 2019 XDDL n’a jamais été retrouvé à l’heure d’aujourd’hui. https://www.leparisien.fr/faits-divers/xavier-dupont-de-ligonnes-a-ete-retrouve-a-glasgow-11-10-2019-8171406.php",NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,0.725028
24411,1743036484842631517,1742926149532205058,"El hilo menciona personas que no aparecen en los documentos que se hicieron públicos en la demanda de V. Giuffre, víctima del caso, sino que se basa en capturas alteradas y una lista no confirmada que se ha viralizado con anterioridad a la revelación de dichos documentos. https://www.courtlistener.com/docket/4355835/giuffre-v-maxwell/ https://www.usatoday.com/story/news/factcheck/2024/01/03/jeffrey-epstein-fact-check/72086418007/",NEEDS_MORE_RATINGS,,CURRENTLY_RATED_HELPFUL,,0.711302
467206,1676161872989413376,1676079422489890816,The cameras were in bunkers 5 miles away from ground zero with telescopic lenses allowing for the camera's to record the nuclear explosion. https://ahf.nuclearmuseum.org/ranger/tour-stop/preparing-test/ Hiroshima and Nagasaki have no fallout radiation today as the atomic bombs were detonated at an altitude of about 600m. https://k1project.columbia.edu/news/hiroshima-and-nagasaki,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,0.710889
155428,1684141375728541696,1684129692851339265,"The PM hasn't sacked her. The council &amp; taskforce is made up of business leaders. Given she has quit as CEO of NatWest, she can no longer fulfill that role. https://www.telegraph.co.uk/news/2023/07/25/alison-rose-natwest-leak-bbc-nigel-farage-coutts/ https://www.gov.uk/government/groups/energy-efficiency-taskforce#membership https://www.gov.uk/government/news/major-business-leaders-join-pms-new-business-council-to-turbocharge-economic-growth",NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,0.710652
...,...,...,...,...,...,...,...,...
59712,1749898931721441369,1749784694630064489,"Скорее всего, это фейковая повестка: 1. Военный комиссар Первомайского района Владивостока — С. П. Добрев. https://primorsky.ru/events/voennye-komissariaty-primorskogo-kraya/ 2. Код 812, указанный в телефонном номере на «повестке», — код Санкт-Петербурга. https://ru.m.wikipedia.org/wiki/Телефонный_план_нумерации_России",NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,0.426957
97485,1752036364021874958,1752014395650048471,This story was fake. President Zelensky has officially denied these internet rumors. https://www.pravda.com.ua/rus/news/2024/01/29/7439428/ https://x.com/revishvilig/status/1752032219898458395?s=20,NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,0.426956
319832,1741593643205468164,1741549032277786628,"Commercial aviation defined as: aircraft operation involving the transport of passengers, cargo or mail for remuneration 2022 Mutiny Bay DHC-3 Otter crash 2019 PenAir Flight 3296 2019 Atlas Air Flight 3591 2013 UPS Airlines Flight 1354 2013 Rediske Air DHC-3 Otter crash https://en.wikipedia.org/wiki/List_of_fatal_accidents_and_incidents_involving_commercial_aircraft_in_the_United_States",NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,0.426954
241806,1735386588132102178,1735340494878539849,"The individual pictured is not Hunter Biden. The photo in question, alleged to be among media leaked from Biden's iCloud account in 2020, was found to have been disseminated online since October 2018 and originally attributed to another person. https://www.snopes.com/fact-check/hunter-biden-girls-couch/?collection=464959/",NEEDS_MORE_RATINGS,NEEDS_MORE_RATINGS,,NEEDS_MORE_RATINGS,0.426920


In [77]:
diff_notes.columns

Index(['noteId', 'noteAuthorParticipantId_x', 'createdAtMillis_x', 'tweetId',
       'classification', 'believable', 'harmful', 'validationDifficulty',
       'misleadingOther', 'misleadingFactualError',
       'misleadingManipulatedMedia', 'misleadingOutdatedInformation',
       'misleadingMissingImportantContext', 'misleadingUnverifiedClaimAsFact',
       'misleadingSatire', 'notMisleadingOther',
       'notMisleadingFactuallyCorrect',
       'notMisleadingOutdatedButNotWhenWritten', 'notMisleadingClearlySatire',
       'notMisleadingPersonalOpinion', 'trustworthySources', 'summary',
       'isMediaNote', 'mean', 'approval', 'quantile', 'decon_mean',
       'decon_approval', 'decon_quantile', 'noteAuthorParticipantId_y',
       'createdAtMillis_y', 'timestampMillisOfFirstNonNMRStatus',
       'firstNonNMRStatus', 'timestampMillisOfCurrentStatus', 'currentStatus',
       'timestampMillisOfLatestNonNMRStatus', 'mostRecentNonNMRStatus',
       'timestampMillisOfStatusLock', 'lockedStatu

### Notes rated helpful under existing model but not under deconfounder model (ours)

In [76]:
notes_rated_helpful_by_deconf = misleading_notes.sort_values('decon_mean', ascending=False).head(num_rated_helpful)
helpful_note_ids_deconf = notes_rated_helpful_by_deconf['noteId'].values
notes_rated_helpful_by_existing_algo = misleading_notes[misleading_notes[note_status_key] == 'CURRENTLY_RATED_HELPFUL']
diff_notes = notes_rated_helpful_by_existing_algo[~notes_rated_helpful_by_existing_algo['noteId'].isin(helpful_note_ids_deconf)]
diff_notes = diff_notes.sort_values('decon_mean', ascending=False)
diff_notes[['noteId', 'tweetId', 'summary', 'currentStatus', 'currentCoreStatus', 'mostRecentNonNMRStatus', 'lockedStatus', 'decon_mean']].tail(500)

Unnamed: 0,noteId,tweetId,summary,currentStatus,currentCoreStatus,mostRecentNonNMRStatus,lockedStatus,decon_mean
228410,1675555310700830720,1675494663355084800,"A equipe de Zema disse que a citação foi compartilhada como um “alerta” sobre os riscos de um “Estado inchado”, não significando concordância com Mussolini. A publicação foi realizada após o ex-presidente Jair Bolsonaro ser declarado inelegível pelo TSE. https://www.poder360.com.br/brasil/zema-publica-frase-atribuida-a-mussolini/",CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,0.194604
254414,1579491050111913984,1578206898737844228,"The photograph was shot in Tehran in 1971, not 1973. Sima, the woman in the picture, had recently been hired as a flight attendant for Iran Air and was cutting the cake for a colleague’s birthday party. https://www.snopes.com/fact-check/woman-cake-iran-photo/",CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,0.194499
24091,1489312911063400456,1489277244765843459,"&quot;...women and people of color [...] have been dramatically underrepresented in the federal judiciary’s 232-year history.&quot; https://www.buzzfeednews.com/article/zoetillman/black-women-biden-supreme-court-federal-courts Thurgood Marshall, the Supreme Court's first African-American justice, was appointed in 1967 and served until 1991. https://en.wikipedia.org/wiki/Thurgood_Marshall#U.S._Supreme_Court",CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,,CURRENTLY_RATED_HELPFUL,0.194435
394327,1424756385875742727,1424735785689894914,"Per Tampa Bay Times, the alleged &quot;code&quot; was written to alter the programming of digital buttons on touch-screen voting machines. Touch screens were not contemplated before Nov 2000. It was punch-card machines that contributed to Bush/Gore election uncertainty. https://www.tampabay.com/archive/2005/04/09/blogs-spin-theories-of-computers-conspiracies/",CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,,CURRENTLY_RATED_HELPFUL,0.194311
158781,1711606242773287051,1711352775546659283,Both Israel and Ukrainian intelligence sources denied that this happened and called the assertions „lies“ https://www.newsweek.com/russia-false-flag-western-weapons-gaza-ukraine-1833115?amp=1 https://twitter.com/revishvilig/status/1711407888806064383,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,0.194287
...,...,...,...,...,...,...,...,...
125775,1357145516786798602,1357025133219700737,There is no law enforcement investigation charging “Trump supporters” with planting bombs all around congressional offices. The FBI hasn’t even identified a suspect yet. https://www.latimes.com/world-nation/story/2021-01-29/fbi-pipe-bombs-at-rnc-dnc-were-planted-night-before-riot,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,,CURRENTLY_RATED_HELPFUL,-0.073959
132739,1402062098469949446,1401633793698676742,The Nazi and the flag of Israel is in no way the same. It is an abomination and ignorance of world history to equate the 2. https://www.britannica.com/topic/flag-of-Israel https://www.adl.org/education/references/hate-symbols/nazi-party-flag,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,,CURRENTLY_RATED_HELPFUL,-0.109333
373609,1582881353741340673,1582743639381274625,"As the article states, these photos include everything, including the embryo, which is not discernible with the naked eye before nine weeks.",CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,-0.200706
34806,1688191666505523239,1687991382240002049,This message is fake. Actual account suspensions from OpenAI use different wording. Their community forums have multiple examples of suspension messages and they are all identical. The actual message is &quot;You do not have an account because it has been deleted or deactivated.&quot; https://community.openai.com/t/i-got-banned-you-do-not-have-an-account-because-it-has-been-deleted-or-deactivated/258921/2,CURRENTLY_RATED_HELPFUL,NEEDS_MORE_RATINGS,CURRENTLY_RATED_HELPFUL,CURRENTLY_RATED_HELPFUL,-0.247761
