In [1]:
# in a terminal run
# > make run-redis NS=train
# > make run-redis NS=test
# to allow access to the train and test namespaces

In [2]:
import os
import sys
import pandas as pd
import numpy as np

In [3]:
sys.path.append("..")
os.environ["USER_PATH"] = "../userdata"

In [4]:
from misc.redis import set_redis_slow_mode
from misc.util import highest_number
from model.datagenerator import create_train_test
from system.namespace.store import get_namespace

In [5]:
import torch

is_cuda = torch.cuda.is_available()
is_cuda

True

In [6]:
set_redis_slow_mode("never")
ns_test = get_namespace("test")
ns_train = get_namespace("train")
now = pd.Timestamp("2022-12-17", tz="UTC")
ttgen = create_train_test(
    train_ns=ns_train,
    train_validation_ns=ns_train,
    test_ns=ns_test,
    test_validation_ns=ns_test,
    batch_size=4 if is_cuda else 8,
    epoch_batches=5000 if is_cuda else 500,
    train_val_size=10000 if is_cuda else 1000,
    test_size=10000 if is_cuda else 1000,
    test_val_size=10000 if is_cuda else 1000,
    compute_batch_size=100 if is_cuda else 100,
    conversation_based=False,
    now=now)

In [7]:
import torch.nn as nn
from transformers import DistilBertTokenizer, DistilBertModel

In [8]:
device = torch.device("cuda") if is_cuda else torch.device("cpu")
device

device(type='cuda')

In [9]:
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def tokens(texts):
    res = tokenizer(texts.tolist(), return_tensors="pt", padding=True, truncation=True)
    return {k: v.to(device) for k, v in res.items()}

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self._bert_parent = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self._bert_child = DistilBertModel.from_pretrained("distilbert-base-uncased")
        
    def get_parent_embed(self, input_ids, attention_mask):
        outputs_parent = self._bert_parent(input_ids=input_ids, attention_mask=attention_mask)
        return outputs_parent.last_hidden_state[:, 0]
    
    def get_child_embed(self, input_ids, attention_mask):
        outputs_child = self._bert_child(input_ids=input_ids, attention_mask=attention_mask)
        return outputs_child.last_hidden_state[:, 0]
        
    def forward(self, x):
        parent_cls = self.get_parent_embed(input_ids=x["parent"]["input_ids"], attention_mask=x["parent"]["attention_mask"])
        child_cls = self.get_child_embed(input_ids=x["child"]["input_ids"], attention_mask=x["child"]["attention_mask"])
        batch_size = parent_cls.shape[0]
        return torch.bmm(parent_cls.reshape([batch_size, 1, -1]), child_cls.reshape([batch_size, -1, 1])).reshape([-1, 1])
    
class TrainingHarness(nn.Module):
    def __init__(self, model):
        super().__init__()
        self._model = model
        self._softmax = nn.Softmax(dim=1)
        self._loss = nn.BCELoss()
        
    def forward(self, left, right, labels):
        out_left = self._model(left)
        out_right = self._model(right)
        preds = self._softmax(torch.hstack((out_left, out_right)))
        return preds, self._loss(preds, labels)

In [10]:
from torch.optim import AdamW

model = Model()
model.to(device)
harness = TrainingHarness(model)
harness.to(device)

folder = "checkpoints"
postfix = "_lg" if is_cuda else ""
mprev = highest_number(os.listdir(folder), prefix=f"harness{postfix}_", postfix=".pkl")
if mprev is not None:
    prev_fname, prev_epoch = mprev
    harness.load_state_dict(torch.load(os.path.join(folder, prev_fname), map_location=device))
    epoch_offset = prev_epoch + 1
else:
    epoch_offset = 0

optimizer = AdamW(harness.parameters(), lr=5e-5)
mprev, epoch_offset

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias']
- T

(('harness_lg_18.pkl', 18), 19)

In [11]:
from transformers import get_scheduler
# from tqdm.notebook import tqdm
from tqdm.auto import tqdm
import evaluate

def compute(df):
    plefts = tokens(df["parent_left"])
    clefts = tokens(df["child_left"])
    prights = tokens(df["parent_right"])
    crights = tokens(df["child_right"])
    labels = torch.tensor([~df["correct_is_right"], df["correct_is_right"]], dtype=torch.float32).T.to(device)
    return harness({"parent": plefts, "child": clefts}, {"parent": prights, "child": crights}, labels)

num_epochs = max((50 if is_cuda else 10) - epoch_offset, 3)
num_training_steps = num_epochs * ttgen.get_epoch_train_size()
lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)
ttgen.set_epoch(epoch_offset)

for _ in range(num_epochs):
    epoch = ttgen.get_epoch()
    print(f"epoch {epoch}")
    
    model.train()
    harness.train()
    metric_train = evaluate.load("accuracy")
    train_loss = []
    with tqdm(desc="train", total=ttgen.get_epoch_train_size()) as progress_bar:
        for train_df in ttgen.train_dfs():
            preds, loss = compute(train_df)
            train_loss.append(loss.item())
            loss.backward()

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(train_df.shape[0])
            
            predictions = torch.argmax(preds, dim=-1)
            metric_train.add_batch(predictions=predictions, references=train_df["correct_is_right"].astype(int))

    folder = "checkpoints"
    postfix = "_lg" if is_cuda else ""
    torch.save(harness.state_dict(), os.path.join(folder, f"harness{postfix}_{epoch}.pkl"))
            
    model.eval()
    harness.eval()
    with torch.no_grad():
        metric_val_train = evaluate.load("accuracy")
        train_val_loss = []
        with tqdm(desc="train val", total=ttgen.get_epoch_train_validation_size()) as progress_bar:
            for train_validation_df in ttgen.train_validation_dfs():
                preds, loss = compute(train_validation_df)
                train_val_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_val_train.add_batch(predictions=predictions, references=train_validation_df["correct_is_right"].astype(int))
                progress_bar.update(train_validation_df.shape[0])
        
        metric_test = evaluate.load("accuracy")
        test_loss = []
        with tqdm(desc="test", total=ttgen.get_epoch_test_size()) as progress_bar:
            for test_df in ttgen.test_dfs():
                preds, loss = compute(test_df)
                test_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_test.add_batch(predictions=predictions, references=test_df["correct_is_right"].astype(int))
                progress_bar.update(test_df.shape[0])
        
        print(f"train: {metric_train.compute()} loss: {np.mean(train_loss)}")
        print(f"train val: {metric_val_train.compute()} loss: {np.mean(train_val_loss)}")
        print(f"test: {metric_test.compute()} loss: {np.mean(test_loss)}")
    ttgen.advance_epoch()

epoch 19


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.70515} loss: 0.49691728585479406
train val: {'accuracy': 0.7237} loss: 0.47613537827972324
test: {'accuracy': 0.6918} loss: 0.6079995319128036
epoch 20


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.70565} loss: 0.4951589522098191
train val: {'accuracy': 0.7223} loss: 0.48198627748908474
test: {'accuracy': 0.688} loss: 0.6217374253869057
epoch 21


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.71745} loss: 0.4865110945084016
train val: {'accuracy': 0.7271} loss: 0.4755089227847755
test: {'accuracy': 0.7077} loss: 0.6042299745857715
epoch 22


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7119} loss: 0.48844690099074506
train val: {'accuracy': 0.7303} loss: 0.4892575128066819
test: {'accuracy': 0.6991} loss: 0.5828479455530643
epoch 23


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7051} loss: 0.7950029267116423
train val: {'accuracy': 0.6548} loss: 1.5053374391078949
test: {'accuracy': 0.5461} loss: 1.27700903288126
epoch 24


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.6366} loss: 0.7552663428756939
train val: {'accuracy': 0.7139} loss: 0.5675663810410966
test: {'accuracy': 0.6493} loss: 0.6897878877162933
epoch 25


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.67225} loss: 0.573998237722479
train val: {'accuracy': 0.7208} loss: 0.50722761331177
test: {'accuracy': 0.7068} loss: 0.595474673384428
epoch 26


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.69795} loss: 0.625270451786368
train val: {'accuracy': 0.4407} loss: 4.548929134178161
test: {'accuracy': 0.6877} loss: 0.9877796171724796
epoch 27


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.5218} loss: 0.9062383192792535
train val: {'accuracy': 0.7119} loss: 0.5515488513678313
test: {'accuracy': 0.7013} loss: 0.6072057502031326
epoch 28


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.68615} loss: 0.5760485067058355
train val: {'accuracy': 0.7196} loss: 0.5018820135757327
test: {'accuracy': 0.7119} loss: 0.5958671214520931
epoch 29


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.69925} loss: 0.5339062033007853
train val: {'accuracy': 0.7211} loss: 0.4955943701907992
test: {'accuracy': 0.7121} loss: 0.5794672238260508
epoch 30


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.70755} loss: 0.5122326153045055
train val: {'accuracy': 0.7236} loss: 0.4876102487992495
test: {'accuracy': 0.6998} loss: 0.6017676570177078
epoch 31


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7116} loss: 0.49799328824256084
train val: {'accuracy': 0.7254} loss: 0.4822487237000838
test: {'accuracy': 0.692} loss: 0.6035611449062824
epoch 32


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7144} loss: 0.49191769065647967
train val: {'accuracy': 0.7295} loss: 0.4997046485569328
test: {'accuracy': 0.6889} loss: 0.6321652658462524
epoch 33


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7198} loss: 0.4870216885836562
train val: {'accuracy': 0.7325} loss: 0.4746491165464744
test: {'accuracy': 0.701} loss: 0.588238082832098
epoch 34


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7212} loss: 0.48192490099049173
train val: {'accuracy': 0.7354} loss: 0.4745048145078123
test: {'accuracy': 0.6998} loss: 0.6065252883702517
epoch 35


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.71875} loss: 0.48421624472145924
train val: {'accuracy': 0.7347} loss: 0.46808774839555845
test: {'accuracy': 0.7135} loss: 0.5990454430341721
epoch 36


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.71945} loss: 0.4785644005102862
train val: {'accuracy': 0.7331} loss: 0.4828217269738205
test: {'accuracy': 0.686} loss: 0.608249582991004
epoch 37


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7227} loss: 0.5021944485220609
train val: {'accuracy': 0.7219} loss: 0.5847745942809154
test: {'accuracy': 0.6669} loss: 0.6252411477059125
epoch 38


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.54265} loss: 1.5835967270457652
train val: {'accuracy': 0.5992} loss: 0.5198477613091469
test: {'accuracy': 0.4664} loss: 0.689698390185833
epoch 39


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.63105} loss: 1.2409868440274243
train val: {'accuracy': 0.6352} loss: 0.5535975641765399
test: {'accuracy': 0.4764} loss: 0.7145741174936294
epoch 40


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.60295} loss: 1.596321157275048
train val: {'accuracy': 0.5631} loss: 0.5211043565632776
test: {'accuracy': 0.41} loss: 0.6908902928352356
epoch 41


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.56955} loss: 1.6620785044210447
train val: {'accuracy': 0.7168} loss: 0.5262771712680638
test: {'accuracy': 0.6024} loss: 0.6763550567865372
epoch 42


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.51785} loss: 1.769661141531673
train val: {'accuracy': 0.5559} loss: 0.5930624656796455
test: {'accuracy': 0.4736} loss: 0.6856531033754348
epoch 43


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.54945} loss: 1.1847470459933847
train val: {'accuracy': 0.6159} loss: 2.0083774125993252
test: {'accuracy': 0.5785} loss: 1.8174085436165333
epoch 44


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.60845} loss: 1.431938061015724
train val: {'accuracy': 0.5503} loss: 0.5547171267044964
test: {'accuracy': 0.385} loss: 0.6953386434078217
epoch 45


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.62765} loss: 0.9681104407848208
train val: {'accuracy': 0.7047} loss: 0.6209893222388422
test: {'accuracy': 0.6441} loss: 0.720460255330801
epoch 46


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.63135} loss: 0.7679147211750271
train val: {'accuracy': 0.6749} loss: 0.5348613887228537
test: {'accuracy': 0.4728} loss: 0.7636929397821426
epoch 47


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.65935} loss: 0.5868986843761057
train val: {'accuracy': 0.6852} loss: 0.5154829158730805
test: {'accuracy': 0.4906} loss: 0.7007791452288628
epoch 48


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.66855} loss: 0.5479690822898178
train val: {'accuracy': 0.7079} loss: 0.5002410043415846
test: {'accuracy': 0.5444} loss: 0.6942198883533478
epoch 49


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.68915} loss: 0.5289541858037468
train val: {'accuracy': 0.7221} loss: 0.48607153421379623
test: {'accuracy': 0.622} loss: 0.6535257505774498


In [12]:
folder = "."
postfix = "_lg" if is_cuda else ""
torch.save(model.state_dict(), os.path.join(folder, f"model{postfix}.pkl"))
torch.save(harness.state_dict(), os.path.join(folder, f"harness{postfix}.pkl"))
torch.save(optimizer.state_dict(), os.path.join(folder, f"optimizer{postfix}.pkl"))

In [15]:
ttgen.reset()
model.eval()
harness.eval()
dfs = []
with torch.no_grad():
    metric_val_test = evaluate.load("accuracy")
    test_val_loss = []
    with tqdm(desc="test val", total=ttgen.get_epoch_test_validation_size()) as progress_bar:
        for test_val_df in ttgen.test_validation_dfs():
            preds, loss = compute(test_val_df)
            test_val_loss.append(loss.item())
            predictions = torch.argmax(preds, dim=-1)
            metric_val_test.add_batch(predictions=predictions, references=test_val_df["correct_is_right"].astype(int))
            cur_df = test_val_df.copy()
            cur_df["logit_left"] = preds[:, 0].cpu()
            cur_df["logit_right"] = preds[:, 1].cpu()
            cur_df["preds"] = predictions.cpu()
            cur_df["truth"] = test_val_df["correct_is_right"].astype(int)
            dfs.append(cur_df)
            progress_bar.update(test_val_df.shape[0])
print(f"test val: {metric_val_test.compute()} loss: {np.mean(test_val_loss)}")
validation_df = pd.concat(dfs)

test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.6221} loss: 0.6533476588845253


In [16]:
validation_df.to_csv(os.path.join(folder, "validation.csv"))

In [17]:
validation_df[validation_df["preds"] == validation_df["truth"]].head()

Unnamed: 0,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,"community, but calling it a sit-com is only 1/...","Its lifestyle is alternative, its influence is...",Gonna ride the baloney pony all the way to Was...,I almost killed a classmate with Xanax. Freshm...,0.880797,0.119203,False,0.513282,0.486718,0,0
1,"This seems like a big deal to us, because we h...",This was my first thought. He didnt want to st...,"Men are selective, as in, they'll select from ...","The way I see it, the moment terrorism is used...",0.880797,0.119203,False,0.585005,0.414995,0,0
2,When sitting wrong on your bike seat and cutti...,This is correct. 99% of them start learning eq...,Is it the 45 degree swastika pointing clockwis...,Nazis used both orientations.,0.006693,0.993307,True,0.246855,0.753145,1,1
3,I was very confused when I watched that movie....,Go Cocks!,> Iraq is a manufactured country created by fu...,> The people of Afghanistan voted for the corr...,0.377541,0.622459,True,0.446341,0.553659,1,1
0,"""Nice tits, shame about the face""",i feel terribly jealous when i see my best-fri...,"Really, are there men who can't find the clito...",I’ve found that in the dark it’s not so easily...,0.268941,0.731059,True,0.420848,0.579152,1,1


In [18]:
validation_df[validation_df["preds"] != validation_df["truth"]].head()

Unnamed: 0,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,User profile checks out. Bet you're on a list ...,Dungeons and Dragons,SENSETIVE People,I’m offended,0.2689414,0.7310586,True,0.50235,0.49765,0,1
1,"In the whole time coved has been a thing, I've...",Spore,You guys are officially common law married. St...,[Not so much. ](https://www.npr.org/2016/09/04...,0.04742587,0.9525741,True,0.527454,0.472546,0,1
2,"In Spain/Portugal we also cook them ""a feira"" ...",TLDR of this thread: Your best bet to avoid CO...,RIP Slurms MacKenzie,Wimmy wam wam wozzle,4.662937e-15,1.0,True,0.502139,0.497861,0,1
1,Seinfeld,& it's not even close,covid vaccines can protect from covid,"Marie-Antoinette, Queen of France under Louis ...",1.0,5.602796e-09,False,0.347522,0.652478,1,0
2,I don't trust anyone who wants to be around kids.,So that's where it came from albeit it's still...,Buffy,She saved the world a lot.,3.059022e-07,0.9999997,True,0.614598,0.385402,0,1
