In [1]:
import pandas as pd
import numpy as np
from glob import glob

# LIAR DATASET

In [98]:
def load_csv(part):
    columns = ["id", "label", "text", "subject", "speaker", "job", "state", "party", "int0", "int1", "int2", "int3", "int4", "context"]
    return pd.read_csv("./liar_dataset/" + part + ".tsv", sep="\t", names=columns)

In [44]:
test = load_csv("test")

In [45]:
def sort_label(label):
    if label in ["pants-fire", "false", "barely-true"]:
        return "false"
    else:
        return "true"
    
def clean_df(df):
    # 6 labels pants-fire, false, barely-true, half-true, mostly-true, true
    df["label"] = df["label"].apply(lambda label: sort_label(label))
    
    # per https://arxiv.org/pdf/1905.04749.pdf they only use text and label
    filtered_df=df.loc[:, ["id", "label", "text"]]
    
    return filtered_df

In [46]:
clean_df(test)

Unnamed: 0,id,label,text
0,11972.json,true,Building a wall on the U.S.-Mexico border will...
1,11685.json,false,Wisconsin is on pace to double the number of l...
2,11096.json,false,Says John McCain has done nothing to help the ...
3,5209.json,true,Suzanne Bonamici supports a plan that will cut...
4,9524.json,false,When asked by a reporter whether hes at the ce...
...,...,...,...
1262,7334.json,true,Says his budget provides the highest state fun...
1263,9788.json,false,Ive been here almost every day.
1264,10710.json,false,"In the early 1980s, Sen. Edward Kennedy secret..."
1265,3186.json,false,Says an EPA permit languished under Strickland...


In [40]:
for part in ["train", "test", "valid"]:
    df = load_csv(part)
    df = clean_df(df)
    df.to_csv("./liar_dataset/clean_" + part + ".csv")

## Post processing
Clean output from triple extractor

In [8]:
def bug_clean(df):
    condition = np.logical_and(df.triple.notna(), df.text.isna())
    indxs = df[condition]["Unnamed: 0"].values
    triples = df[condition]["triple"].values
    df.loc[indxs, "triple"] = triples
    df.drop(df.index[condition], inplace=True)
    return df

In [332]:
paths = glob("./liar_dataset_triples/*")

In [333]:
main_df = pd.DataFrame()
for path in paths:
    if not path.__contains__("train"):
        continue
    curr_df = pd.read_csv(path)
    curr_df = bug_clean(curr_df)
    main_df = pd.concat([main_df, curr_df])

In [334]:
main_df = main_df.drop(columns=list(set(main_df.columns.values) ^ set(["id", "label", "text", "triple"]))).drop_duplicates()

In [335]:
main_df.to_csv("./liar_dataset_triples/main_train_clean_triples.csv")

In [4]:
def percent_triples(df):
    return 1 - sum(df.triple.isna()) / df.shape[0]

def avg_word_count(df):
    tot = 0
    for sent in df.text.values:
        tot += len(sent.split(" "))
    return tot / df.shape[0]

In [5]:
paths = glob("./liar_dataset_triples/*")

In [6]:
drops = []
for indx, path in enumerate(paths):
    if path.__contains__("clean_train_triple"):
        drops.append(indx)
paths = np.delete(paths, drops)

In [9]:
for path in paths:
    name = "train"
    if path.__contains__("test"):
        name = "test"
    if path.__contains__("valid"):
        name = "valid"
    df = pd.read_csv(path)    
    df = bug_clean(df)
    p_triples = percent_triples(df)
    sent_size = avg_word_count(df)
    print(f"dataset: {name}. avg sent length: {sent_size:.2f} words. percent triples: {p_triples*100:.2f}%.")

dataset: test. avg sent length: 18.24 words. percent triples: 11.68%.
dataset: valid. avg sent length: 17.93 words. percent triples: 11.06%.
dataset: train. avg sent length: 17.97 words. percent triples: 3.39%.


In [10]:
df

Unnamed: 0.1,Unnamed: 0,id,label,text,triple
0,0,2635.json,False,Says the Annies List political group supports ...,
1,1,10540.json,True,When did the decline of coal start? It started...,
2,2,324.json,True,Hillary Clinton agrees with John McCain by vot...,
3,3,1123.json,False,Health care reform legislation is likely to ma...,
4,4,9028.json,True,The economic turnaround started at the end of ...,
...,...,...,...,...,...
10234,2434,1592.json,True,"Under the ruling of the Supreme Court, any lob...",
10235,2435,5473.json,True,There are a larger number of shark attacks in ...,
10236,2436,3408.json,True,Democrats have now become the party of the Atl...,
10237,2437,3959.json,True,Says an alternative to Social Security that op...,"[['County', 'location/hud_county_place/county'..."


In [6]:
#!pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

In [57]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def fb_to_common(freebase_id):
    endpoint_url = "https://query.wikidata.org/sparql"

    query = \
    '''SELECT ?sLabel WHERE { 
        ?s wdt:P646 "''' + freebase_id + '''".
        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    LIMIT 1'''
    res = get_results(endpoint_url, query)
    try:
        res = res['results']['bindings'][0]['sLabel']['value']
        return res
    except:
        return "No result"

def common_to_fb(common_name):
    endpoint_url = "https://query.wikidata.org/sparql"
    
    if common_name.islower():
        common_name = common_name.capitalize() + "s"

    query = '''
    SELECT ?fbid WHERE { 
        ?s wdt:P373 "''' + common_name + '''".
    OPTIONAL {
        ?s wdt:P646 ?fbid .
        }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    LIMIT 1'''
    res = get_results(endpoint_url, query)
    try:
        res = res['results']['bindings'][0]['fbid']['value']
        return res
    except:
        return common_name + "-1"

In [235]:
train = pd.read_csv("./liar_dataset_triples/train.csv", index_col=0)
valid = pd.read_csv("./liar_dataset_triples/valid.csv", index_col=0)
test = pd.read_csv("./liar_dataset_triples/test.csv", index_col=0)

In [217]:
def clean_df(df):
    df = df.dropna()
    df.drop_duplicates(inplace=True)
    df.triple = df.triple.apply(lambda x: eval(x))
    df = df.reset_index()
    df = df[["id", "label", "triple"]]
    return df

In [248]:
def expand_df(df):
    expanded_df = pd.DataFrame(columns=["text_id", "head", "relation", "tail", "label"])
    for text_id, label, triples in df.values:
        for h, r, t in triples:
            expanded_df = expanded_df.append({
                "text_id": text_id, 
                "head": h, 
                "relation": r, 
                "tail": t, 
                "label": label
            }, ignore_index=True)
    return expanded_df

In [95]:
import requests 
import urllib
from bs4 import BeautifulSoup

# desktop user-agent
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"
# mobile user-agent
MOBILE_USER_AGENT = "Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36"

def google(query):
    query = query.replace(' ', '+')
    URL = f"https://google.com/search?q={query}&hl=en"
    
    headers = {"user-agent" : USER_AGENT}
    resp = requests.get(URL, headers=headers)
    
    if resp.status_code == 200:
        soup = BeautifulSoup(resp.content, "html.parser")
        for g in soup.find_all("div", {"class": "g"}):
            titles = g.find_all("h3")
            if titles:
                text = titles[0].text
                if "Wikipedia" in text:
                    return text[:-12]
                if "Ballotpedia" in text:
                    return text[:-14]
    if resp.status_code == 429:
        print("Blocked!")
        return False
    return query

In [99]:
from tqdm import tqdm
import time

def google_triples(df):

    # iterate list of list 
    for i, pair in enumerate(tqdm(df[["head", "tail"]].values)):
        h = google(pair[0])
        t = google(pair[-1])
        if not bool(h) and not bool(t):
            return df
        time.sleep(2.1)
        df.at[i, 'head'] = h
        df.at[i, 'tail'] = t
    return df

In [262]:
t = "Scott Walker (Singer)"

In [270]:
remove_par(t)

'Scott Walker'

In [271]:
re.sub(r'.\(.*\)', "", t)

'Scott Walker'

In [272]:
import re
def remove_par(t):
    return re.sub(r'.\(.*\)', "", t)

In [274]:
def transform(df, name):
    df = clean_df(df)
    df = expand_df(df)
    df = google_triples(df)
    df['head'] = df["head"].apply(lambda x: remove_par(x))
    df['tail'] = df["tail"].apply(lambda x: remove_par(x))
    df.to_csv("./liar_dataset_triples/" + name + ".csv")

In [275]:
train = pd.read_csv("./liar_dataset_triples/train.csv", index_col=0)
valid = pd.read_csv("./liar_dataset_triples/valid.csv", index_col=0)
test = pd.read_csv("./liar_dataset_triples/test.csv", index_col=0)

In [276]:
transform(train, "train_google")
transform(valid, "valid_google")
transform(test, "test_google")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=453.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=178.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=201.0), HTML(value='')))




In [277]:
google_train = pd.read_csv("./liar_dataset_triples/train_google.csv", index_col=0)
google_valid = pd.read_csv("./liar_dataset_triples/valid_google.csv", index_col=0)
google_test = pd.read_csv("./liar_dataset_triples/test_google.csv", index_col=0)

In [286]:
google_train

Unnamed: 0,text_id,head,relation,tail,label
0,6873.json,Barack Obama,people/person/employment_history./business/emp...,President of the United States,False
1,7976.json,Barack Obama,people/person/employment_history./business/emp...,President of the United States,True
2,2354.json,Kristi Thibaut,organization/role/leaders./organization/leader...,Acorn,True
3,2354.json,Kristi Thibaut,people/person/employment_history./business/emp...,State+Rep.,True
4,980.json,Timothy Geithner,people/person/employment_history./business/emp...,Secretary,False
...,...,...,...,...,...
448,1550.json,Perry,people/person/employment_history./business/emp...,.gov,True
449,6032.json,Stan Wise,organization/role/leaders./organization/leader...,Georgia Public Service Commission,True
450,3744.json,Barack Obama,people/person/employment_history./business/emp...,President of the United States,True
451,4388.json,Fung,people/person/employment_history./business/emp...,Mayor,False


In [292]:
o_train = clean_df(train)
o_train = expand_df(o_train)
o_valid = clean_df(valid)
o_valid = expand_df(o_valid)
o_test = clean_df(test)
o_test = expand_df(o_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [293]:
google_train["o_head"] = o_train["head"]
google_train["o_tail"] = o_train["tail"]
google_valid["o_head"] = o_valid["head"]
google_valid["o_tail"] = o_valid["tail"]
google_test["o_head"] = o_test["head"]
google_test["o_tail"] = o_test["tail"]

In [303]:
google_train.to_csv("./liar_dataset_triples/train_cleaning.csv")
google_valid.to_csv("./liar_dataset_triples/valid_cleaning.csv")
google_test.to_csv("./liar_dataset_triples/test_cleaning.csv")

TypeError: to_csv() got an unexpected keyword argument 'index_col'

In [361]:
test_fix = pd.read_csv("./liar_dataset_triples/test_fix_1.csv", sep=";", header=0,  error_bad_lines=False)

In [367]:
def add_hash(df):
    fb_hash = []
    for h in df['head'].values:
        fb = common_to_fb(h)
        if not "/m/" in fb:
            fb_hash.append(h)
        else:
            fb_hash.append(fb)
    df["fb_head"] = fb_hash
    fb_hash = []
    for t in df['tail'].values:
        fb = common_to_fb(t)
        if not "/m/" in fb:
            fb_hash.append(t)
        else:
            fb_hash.append(fb)
    df["fb_tail"] = fb_hash
    return df

In [363]:
test_fix = add_hash(test_fix)

In [368]:
valid_fix = add_hash(google_valid)

In [369]:
train_fix = add_hash(google_train)

In [370]:
train_fix.to_csv("./liar_dataset_triples/train_processed.csv")
valid_fix.to_csv("./liar_dataset_triples/valid_processed.csv")
test_fix.to_csv("./liar_dataset_triples/test_processed.csv")

In [433]:
valid_fix = pd.read_csv("./liar_dataset_triples/valid_processed.csv", index_col=0, header=0,  error_bad_lines=False)

In [380]:
test_fix = pd.read_csv("./liar_dataset_triples/test_hashed.csv", sep=";", header=0,  error_bad_lines=False)

In [435]:
valid_fix = valid_fix.replace("Gov.", "governor")
valid_fix = valid_fix.replace(".gov", "governor")
valid_fix = valid_fix.replace("Governor", "governor")

In [437]:
for i in range(valid_fix.shape[0]-1):
    if sum(test_fix['head'] == valid_fix.at[i, 'head']) > 0:
        valid_fix.at[i, 'fb_head'] = test_fix[test_fix['head'] == valid_fix.at[i, 'head']]['fb_head'].values[0]
        
    if sum(test_fix['tail'] == valid_fix.at[i, 'tail']) > 0:
        valid_fix.at[i, 'fb_tail'] = test_fix[test_fix['tail'] == valid_fix.at[i, 'tail']]['fb_tail'].values[0]

In [444]:
valid_fix.to_csv("./liar_dataset_triples/valid_hashed.csv")

In [455]:
train_fix = train_fix.replace("Gov.", "governor")
train_fix = train_fix.replace(".gov", "governor")
train_fix = train_fix.replace("Governor", "governor")

In [471]:
fb_to_common("/m/0f8t6k")

'Chris Christie'

In [456]:
for i in range(train_fix.shape[0]-1):
    if sum(valid_fix['head'] == train_fix.at[i, 'head']) > 0:
        train_fix.at[i, 'fb_head'] = valid_fix[valid_fix['head'] == train_fix.at[i, 'head']]['fb_head'].values[0]
        
    if sum(valid_fix['tail'] == train_fix.at[i, 'tail']) > 0:
        train_fix.at[i, 'fb_tail'] = valid_fix[valid_fix['tail'] == train_fix.at[i, 'tail']]['fb_tail'].values[0]

In [457]:
train_fix.to_csv("./liar_dataset_triples/train_hashed.csv")

In [474]:
train[train.id == "5401.json"].text.values

array(['As Virginias governor, Allen cut spending and waste with bipartisan support.'],
      dtype=object)

# FakeNewsNet 

In [10]:
paths = glob("./fakenewsnet/*/*/*")

In [11]:
df = pd.DataFrame(columns=["text", "source", "label"])

In [12]:
def get_content(data):
    text = data['text']
    label = "false" if (path.split("\\")[-2] == "FakeNewsContent") else "true"
    source = path.split("\\")[1]
    return {"text":text, "source": source, "label": label}

In [14]:
from tqdm import tqdm
import json
for path in tqdm(paths):
    with open(path) as f:
        data = json.load(f)
    content = get_content(data)
    df = df.append(content, ignore_index=True)

100%|███████████████████████████████████████████████████████████████████████████████| 422/422 [00:02<00:00, 150.19it/s]


In [15]:
buzzfeed = df[df.source == "BuzzFeed"]
politifact = df[df.source == "PolitiFact"]
print(f"""
    BuzzFeed:
    num articles: {buzzfeed.shape[0]},
        false: {sum(buzzfeed.label == "false")},
        true: {sum(buzzfeed.label == "true")}
    
    PolitiFact:
    num articles: {politifact.shape[0]},
        false: {sum(politifact.label == "false")},
        true: {sum(politifact.label == "true")}
""")


    BuzzFeed:
    num articles: 182,
        false: 91,
        true: 91
    
    PolitiFact:
    num articles: 240,
        false: 120,
        true: 120



In [461]:
df.to_csv("./fakenewsnet/clean_fakenewsnet.csv")

In [19]:
df.iloc[219]

text                
source    PolitiFact
label          false
Name: 219, dtype: object

## Post processing
Clean output from triple extractor

In [None]:
def load_df(path):
    df = pd.read_csv(paths[0])
    return df[["text", "source", "label", "triple"]]

In [31]:
paths = glob("./fakenewsnet_triples/*")

In [32]:
df = pd.read_csv(paths[0], index_col=0)

In [33]:
df

Unnamed: 0,Unnamed: 0.1,text,source,label,triple
0,0.0,I woke up this morning to find a variation of ...,BuzzFeed,False,"[['Clinton', 'organization/role/leaders./organ..."
1,1.0,Former President Bill Clinton and his Clinton ...,BuzzFeed,False,"[['Bill Clinton', 'people/person/employment_hi..."
2,2.0,After collapsing just before trying to step in...,BuzzFeed,False,
3,3.0,"Donald Trump is, well, deplorable. Hes suggest...",BuzzFeed,False,"[['Obama', 'people/person/employment_history./..."
4,4.0,Website is Down For Maintenance,BuzzFeed,False,
...,...,...,...,...,...
419,419.0,"As my 25th wedding anniversary approached, I t...",PolitiFact,True,
420,420.0,Story highlights Trump was sitting in a chair ...,PolitiFact,True,
421,421.0,"Donald Trump Jr., a son of the Republican pres...",PolitiFact,True,
219,,,,,"[['Trump', 'people/person/employment_history./..."


In [49]:
df.iloc[219]

Unnamed: 0.1                                                220.0
text            Terror suspect, Rasheed Muhammad, was arrested...
source                                                 PolitiFact
label                                                       False
triple          [['Trump', 'people/person/employment_history./...
Name: 220, dtype: object

In [75]:
clean = pd.DataFrame(columns=["id_text","text", "source", "label", "triple"])
for i in range(df.shape[0]):
    if type(df.iloc[i].text) != np.float:
        t = df.iloc[i].values
        clean = clean.append({
                "id_text": i,
                "text": t[1],
                "source": t[2],
                "label": t[3],
                "triple": t[4]
            }, ignore_index=True)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  This is separate from the ipykernel package so we can avoid doing imports until


In [77]:
paths

['./fakenewsnet_triples\\clean_fakenewsnet_triple.csv']

In [78]:
clean.to_csv("./fakenewsnet_triples/fakenewsnet_triples_trimmed.csv")

In [89]:
trimmed = pd.read_csv("./fakenewsnet_triples/fakenewsnet_triples_trimmed.csv", index_col=0)

In [81]:
trimmed

Unnamed: 0,id_text,text,source,label,triple
0,0,I woke up this morning to find a variation of ...,BuzzFeed,False,"[['Clinton', 'organization/role/leaders./organ..."
1,1,Former President Bill Clinton and his Clinton ...,BuzzFeed,False,"[['Bill Clinton', 'people/person/employment_hi..."
2,2,After collapsing just before trying to step in...,BuzzFeed,False,
3,3,"Donald Trump is, well, deplorable. Hes suggest...",BuzzFeed,False,"[['Obama', 'people/person/employment_history./..."
4,4,Website is Down For Maintenance,BuzzFeed,False,
...,...,...,...,...,...
415,415,"KALLSTADT, Germany Few places in Germany are ...",PolitiFact,True,"[['Bank', 'organization/organization/headquart..."
416,416,Hollywood loses yet another one of their deare...,PolitiFact,True,"[['Donald Trump', 'people/person/employment_hi..."
417,417,"As my 25th wedding anniversary approached, I t...",PolitiFact,True,
418,418,Story highlights Trump was sitting in a chair ...,PolitiFact,True,


In [84]:
def clean_df(df):
    df = df.dropna()
    df.drop_duplicates(inplace=True)
    df.triple = df.triple.apply(lambda x: eval(x))
    df = df.reset_index()
    df = df[["id_text", "label", "triple"]]
    return df

In [82]:
def expand_df(df):
    expanded_df = pd.DataFrame(columns=["id_text", "head", "relation", "tail", "label"])
    for text_id, label, triples in df.values:
        for h, r, t in triples:
            expanded_df = expanded_df.append({
                "text_id": text_id, 
                "head": h, 
                "relation": r, 
                "tail": t, 
                "label": label
            }, ignore_index=True)
    return expanded_df

In [85]:
trimmed = clean_df(trimmed)
trimmed = expand_df(trimmed)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [86]:
trimmed

Unnamed: 0,text_id,head,relation,tail,label
0,0,Clinton,organization/role/leaders./organization/leader...,NPR,False
1,1,Bill Clinton,people/person/employment_history./business/emp...,President,False
2,1,Marsha Blackburn,people/person/employment_history./business/emp...,Rep.,False
3,1,Roger Bate,organization/role/leaders./organization/leader...,American Enterprise Institute,False
4,1,Hillary Clintons campaign.Blackburn,people/person/employment_history./business/emp...,candidate,False
...,...,...,...,...,...
995,415,William Seward,organization/role/leaders./organization/leader...,Republican Party,True
996,415,John Tyler,people/person/employment_history./business/emp...,President,True
997,415,Donald Trump,people/person/employment_history./business/emp...,President,True
998,415,Hillary Clinton,people/person/employment_history./business/emp...,President,True


In [100]:
import re
from tqdm import tqdm
import time

def remove_par(t):
    return re.sub(r'.\(.*\)', "", t)

def clean_df(df):
    df = df.dropna()
    df.drop_duplicates(inplace=True)
    df.triple = df.triple.apply(lambda x: eval(x))
    df = df.reset_index()
    df = df[["id_text", "label", "triple"]]
    return df

def expand_df(df):
    expanded_df = pd.DataFrame(columns=["id_text", "head", "relation", "tail", "label"])
    for text_id, label, triples in df.values:
        for h, r, t in triples:
            expanded_df = expanded_df.append({
                "text_id": text_id, 
                "head": h, 
                "relation": r, 
                "tail": t, 
                "label": label
            }, ignore_index=True)
    return expanded_df

def google_triples(df):

    # iterate list of list 
    for i, pair in enumerate(tqdm(df[["head", "tail"]].values)):
        h = google(pair[0])
        t = google(pair[-1])
        if not bool(h) and not bool(t):
            return df
        time.sleep(2.1)
        df.at[i, 'head'] = h
        df.at[i, 'tail'] = t
    return df

def transform(df, name):
    df = clean_df(df)
    df = expand_df(df)
    df = google_triples(df)
    df['head'] = df["head"].apply(lambda x: remove_par(x))
    df['tail'] = df["tail"].apply(lambda x: remove_par(x))
    df.to_csv("./fakenewsnet_triples/" + name + ".csv")

In [101]:
transform(trimmed, "googled")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value

  0%|                                                                                         | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                               | 1/1000 [00:04<1:21:19,  4.88s/it][A
  0%|▏                                                                              | 2/1000 [00:09<1:16:12,  4.58s/it][A
  0%|▏                                                    

  6%|████▋                                                                         | 60/1000 [04:40<1:17:09,  4.93s/it][A
  6%|████▊                                                                         | 61/1000 [04:45<1:17:29,  4.95s/it][A
  6%|████▊                                                                         | 62/1000 [04:49<1:13:59,  4.73s/it][A
  6%|████▉                                                                         | 63/1000 [04:53<1:10:45,  4.53s/it][A
  6%|████▉                                                                         | 64/1000 [04:58<1:08:56,  4.42s/it][A
  6%|█████                                                                         | 65/1000 [05:03<1:12:52,  4.68s/it][A
  7%|█████▏                                                                        | 66/1000 [05:07<1:10:07,  4.50s/it][A
  7%|█████▏                                                                        | 67/1000 [05:11<1:09:24,  4.46s/it][A
  7%|█████▎     

 13%|█████████▋                                                                   | 126/1000 [09:39<1:02:27,  4.29s/it][A
 13%|█████████▊                                                                   | 127/1000 [09:43<1:03:05,  4.34s/it][A
 13%|█████████▊                                                                   | 128/1000 [09:48<1:04:01,  4.41s/it][A
 13%|█████████▉                                                                   | 129/1000 [09:52<1:02:36,  4.31s/it][A
 13%|██████████                                                                   | 130/1000 [09:56<1:01:58,  4.27s/it][A
 13%|██████████                                                                   | 131/1000 [10:01<1:04:51,  4.48s/it][A
 13%|██████████▏                                                                  | 132/1000 [10:05<1:03:47,  4.41s/it][A
 13%|██████████▏                                                                  | 133/1000 [10:10<1:05:44,  4.55s/it][A
 13%|██████████▎

 19%|███████████████▏                                                               | 192/1000 [14:36<58:37,  4.35s/it][A
 19%|███████████████▏                                                               | 193/1000 [14:40<59:12,  4.40s/it][A
 19%|███████████████▎                                                               | 194/1000 [14:45<59:02,  4.39s/it][A
 20%|███████████████▍                                                               | 195/1000 [14:49<58:02,  4.33s/it][A
 20%|███████████████▍                                                               | 196/1000 [14:53<58:14,  4.35s/it][A
 20%|███████████████▌                                                               | 197/1000 [14:58<58:37,  4.38s/it][A
 20%|███████████████▋                                                               | 198/1000 [15:02<59:12,  4.43s/it][A
 20%|███████████████▋                                                               | 199/1000 [15:07<59:52,  4.49s/it][A
 20%|███████████

 26%|███████████████████▊                                                         | 258/1000 [19:53<1:01:00,  4.93s/it][A
 26%|███████████████████▉                                                         | 259/1000 [19:59<1:02:43,  5.08s/it][A
 26%|████████████████████                                                         | 260/1000 [20:03<1:00:12,  4.88s/it][A
 26%|████████████████████▌                                                          | 261/1000 [20:07<57:46,  4.69s/it][A
 26%|████████████████████▏                                                        | 262/1000 [20:13<1:01:25,  4.99s/it][A
 26%|████████████████████▎                                                        | 263/1000 [20:18<1:01:14,  4.99s/it][A
 26%|████████████████████▎                                                        | 264/1000 [20:23<1:00:54,  4.97s/it][A
 26%|████████████████████▉                                                          | 265/1000 [20:27<58:36,  4.78s/it][A
 27%|███████████

 32%|█████████████████████████▌                                                     | 324/1000 [24:51<47:38,  4.23s/it][A
 32%|█████████████████████████▋                                                     | 325/1000 [24:56<50:06,  4.45s/it][A
 33%|█████████████████████████▊                                                     | 326/1000 [25:01<50:41,  4.51s/it][A
 33%|█████████████████████████▊                                                     | 327/1000 [25:06<51:02,  4.55s/it][A
 33%|█████████████████████████▉                                                     | 328/1000 [25:10<51:22,  4.59s/it][A
 33%|█████████████████████████▉                                                     | 329/1000 [25:15<51:13,  4.58s/it][A
 33%|██████████████████████████                                                     | 330/1000 [25:19<50:50,  4.55s/it][A
 33%|██████████████████████████▏                                                    | 331/1000 [25:24<50:55,  4.57s/it][A
 33%|███████████

 39%|██████████████████████████████▊                                                | 390/1000 [29:44<44:21,  4.36s/it][A
 39%|██████████████████████████████▉                                                | 391/1000 [29:49<44:20,  4.37s/it][A
 39%|██████████████████████████████▉                                                | 392/1000 [29:53<45:49,  4.52s/it][A
 39%|███████████████████████████████                                                | 393/1000 [29:58<45:05,  4.46s/it][A
 39%|███████████████████████████████▏                                               | 394/1000 [30:02<44:33,  4.41s/it][A
 40%|███████████████████████████████▏                                               | 395/1000 [30:06<43:42,  4.33s/it][A
 40%|███████████████████████████████▎                                               | 396/1000 [30:10<43:17,  4.30s/it][A
 40%|███████████████████████████████▎                                               | 397/1000 [30:15<42:50,  4.26s/it][A
 40%|███████████

 46%|████████████████████████████████████                                           | 456/1000 [34:36<41:49,  4.61s/it][A
 46%|████████████████████████████████████                                           | 457/1000 [34:40<40:22,  4.46s/it][A
 46%|████████████████████████████████████▏                                          | 458/1000 [34:44<40:20,  4.47s/it][A
 46%|████████████████████████████████████▎                                          | 459/1000 [34:49<40:08,  4.45s/it][A
 46%|████████████████████████████████████▎                                          | 460/1000 [34:53<40:37,  4.51s/it][A
 46%|████████████████████████████████████▍                                          | 461/1000 [34:58<39:58,  4.45s/it][A
 46%|████████████████████████████████████▍                                          | 462/1000 [35:02<40:01,  4.46s/it][A
 46%|████████████████████████████████████▌                                          | 463/1000 [35:06<39:26,  4.41s/it][A
 46%|███████████

 52%|█████████████████████████████████████████▏                                     | 522/1000 [39:29<35:44,  4.49s/it][A
 52%|█████████████████████████████████████████▎                                     | 523/1000 [39:33<34:46,  4.37s/it][A
 52%|█████████████████████████████████████████▍                                     | 524/1000 [39:38<35:44,  4.51s/it][A
 52%|█████████████████████████████████████████▍                                     | 525/1000 [39:42<34:30,  4.36s/it][A
 53%|█████████████████████████████████████████▌                                     | 526/1000 [39:46<34:54,  4.42s/it][A
 53%|█████████████████████████████████████████▋                                     | 527/1000 [39:51<36:42,  4.66s/it][A
 53%|█████████████████████████████████████████▋                                     | 528/1000 [39:55<35:06,  4.46s/it][A
 53%|█████████████████████████████████████████▊                                     | 529/1000 [40:00<35:16,  4.49s/it][A
 53%|███████████

 59%|██████████████████████████████████████████████▍                                | 588/1000 [44:33<29:52,  4.35s/it][A
 59%|██████████████████████████████████████████████▌                                | 589/1000 [44:37<29:00,  4.23s/it][A
 59%|██████████████████████████████████████████████▌                                | 590/1000 [44:41<28:36,  4.19s/it][A
 59%|██████████████████████████████████████████████▋                                | 591/1000 [44:45<27:58,  4.10s/it][A
 59%|██████████████████████████████████████████████▊                                | 592/1000 [44:49<27:22,  4.03s/it][A
 59%|██████████████████████████████████████████████▊                                | 593/1000 [44:53<26:52,  3.96s/it][A
 59%|██████████████████████████████████████████████▉                                | 594/1000 [44:57<26:36,  3.93s/it][A
 60%|███████████████████████████████████████████████                                | 595/1000 [45:01<27:13,  4.03s/it][A
 60%|███████████

 65%|███████████████████████████████████████████████████▋                           | 654/1000 [50:05<31:21,  5.44s/it][A
 66%|███████████████████████████████████████████████████▋                           | 655/1000 [50:11<31:56,  5.55s/it][A
 66%|███████████████████████████████████████████████████▊                           | 656/1000 [50:16<30:42,  5.35s/it][A
 66%|███████████████████████████████████████████████████▉                           | 657/1000 [50:21<31:11,  5.46s/it][A
 66%|███████████████████████████████████████████████████▉                           | 658/1000 [50:26<29:52,  5.24s/it][A
 66%|████████████████████████████████████████████████████                           | 659/1000 [50:31<30:09,  5.31s/it][A
 66%|████████████████████████████████████████████████████▏                          | 660/1000 [50:37<30:15,  5.34s/it][A
 66%|████████████████████████████████████████████████████▏                          | 661/1000 [50:42<29:47,  5.27s/it][A
 66%|███████████

 72%|████████████████████████████████████████████████████████▉                      | 720/1000 [55:12<20:46,  4.45s/it][A
 72%|████████████████████████████████████████████████████████▉                      | 721/1000 [55:17<21:32,  4.63s/it][A
 72%|█████████████████████████████████████████████████████████                      | 722/1000 [55:22<21:16,  4.59s/it][A
 72%|█████████████████████████████████████████████████████████                      | 723/1000 [55:26<21:13,  4.60s/it][A
 72%|█████████████████████████████████████████████████████████▏                     | 724/1000 [55:31<20:50,  4.53s/it][A
 72%|█████████████████████████████████████████████████████████▎                     | 725/1000 [55:35<20:15,  4.42s/it][A
 73%|█████████████████████████████████████████████████████████▎                     | 726/1000 [55:40<21:34,  4.72s/it][A
 73%|█████████████████████████████████████████████████████████▍                     | 727/1000 [55:44<20:50,  4.58s/it][A
 73%|███████████

 79%|████████████████████████████████████████████████████████████▌                | 786/1000 [1:00:26<16:27,  4.62s/it][A
 79%|████████████████████████████████████████████████████████████▌                | 787/1000 [1:00:31<16:20,  4.60s/it][A
 79%|████████████████████████████████████████████████████████████▋                | 788/1000 [1:00:35<15:52,  4.49s/it][A
 79%|████████████████████████████████████████████████████████████▊                | 789/1000 [1:00:41<16:49,  4.79s/it][A
 79%|████████████████████████████████████████████████████████████▊                | 790/1000 [1:00:46<17:46,  5.08s/it][A
 79%|████████████████████████████████████████████████████████████▉                | 791/1000 [1:00:51<17:09,  4.93s/it][A
 79%|████████████████████████████████████████████████████████████▉                | 792/1000 [1:00:55<16:46,  4.84s/it][A
 79%|█████████████████████████████████████████████████████████████                | 793/1000 [1:01:02<18:19,  5.31s/it][A
 79%|███████████

 85%|█████████████████████████████████████████████████████████████████▌           | 852/1000 [1:06:14<11:55,  4.84s/it][A
 85%|█████████████████████████████████████████████████████████████████▋           | 853/1000 [1:06:20<12:28,  5.09s/it][A
 85%|█████████████████████████████████████████████████████████████████▊           | 854/1000 [1:06:26<12:50,  5.28s/it][A
 86%|█████████████████████████████████████████████████████████████████▊           | 855/1000 [1:06:31<12:42,  5.26s/it][A
 86%|█████████████████████████████████████████████████████████████████▉           | 856/1000 [1:06:35<11:53,  4.96s/it][A
 86%|█████████████████████████████████████████████████████████████████▉           | 857/1000 [1:06:41<12:22,  5.19s/it][A
 86%|██████████████████████████████████████████████████████████████████           | 858/1000 [1:06:45<11:49,  5.00s/it][A
 86%|██████████████████████████████████████████████████████████████████▏          | 859/1000 [1:06:50<11:42,  4.98s/it][A
 86%|███████████

 92%|██████████████████████████████████████████████████████████████████████▋      | 918/1000 [1:11:47<06:29,  4.75s/it][A
 92%|██████████████████████████████████████████████████████████████████████▊      | 919/1000 [1:11:51<06:10,  4.57s/it][A
 92%|██████████████████████████████████████████████████████████████████████▊      | 920/1000 [1:11:55<05:58,  4.48s/it][A
 92%|██████████████████████████████████████████████████████████████████████▉      | 921/1000 [1:11:59<05:43,  4.34s/it][A
 92%|██████████████████████████████████████████████████████████████████████▉      | 922/1000 [1:12:04<05:35,  4.30s/it][A
 92%|███████████████████████████████████████████████████████████████████████      | 923/1000 [1:12:08<05:28,  4.27s/it][A
 92%|███████████████████████████████████████████████████████████████████████▏     | 924/1000 [1:12:12<05:32,  4.37s/it][A
 92%|███████████████████████████████████████████████████████████████████████▏     | 925/1000 [1:12:17<05:25,  4.34s/it][A
 93%|███████████

 98%|███████████████████████████████████████████████████████████████████████████▊ | 984/1000 [1:16:44<01:16,  4.79s/it][A
 98%|███████████████████████████████████████████████████████████████████████████▊ | 985/1000 [1:16:49<01:11,  4.78s/it][A
 99%|███████████████████████████████████████████████████████████████████████████▉ | 986/1000 [1:16:54<01:05,  4.66s/it][A
 99%|███████████████████████████████████████████████████████████████████████████▉ | 987/1000 [1:16:58<01:00,  4.62s/it][A
 99%|████████████████████████████████████████████████████████████████████████████ | 988/1000 [1:17:03<00:55,  4.60s/it][A
 99%|████████████████████████████████████████████████████████████████████████████▏| 989/1000 [1:17:07<00:50,  4.57s/it][A
 99%|████████████████████████████████████████████████████████████████████████████▏| 990/1000 [1:17:11<00:44,  4.44s/it][A
 99%|████████████████████████████████████████████████████████████████████████████▎| 991/1000 [1:17:16<00:40,  4.54s/it][A
 99%|███████████

NameError: name 'remove_par' is not defined

# Fake or Real

In [97]:
paths = glob("./fake-or-real/*")

In [98]:
pd.read_csv(paths[0])