# Data For Hate Methodology
This contains stats and tables used in the article.

Each header in this notebook corresponds to a headers in the article.

In [1]:
import pandas as pd

In [2]:
fn_policy = '../data/output/placements_api_keyword_status/policy.csv'
fn_hate = '../data/output/placements_api_keyword_status/hate.csv'
fn_adhoc = '../data/output/placements_api_keyword_status/adhoc.csv'
fn_basewords = '../data/output/placements_api_keyword_status/basewords.csv'

In [3]:
df_policy = pd.read_csv(fn_policy)
df_hate = pd.read_csv(fn_hate)
df_basewords = pd.read_csv(fn_basewords)
df_adhoc = pd.read_csv(fn_adhoc)

In [4]:
display_cols = ['search_term', 'status']

## Google Ads API for Ad Placements

In [5]:
df_policy.status.value_counts(normalize=True)

Blocked          0.753333
Full             0.226667
Partial Block    0.020000
Name: status, dtype: float64

## Whats on the blocklist?

In [6]:
# example of keywords that return a full response
df_hate[df_hate.status == 'Full'].search_term.sample(5, random_state=303).tolist()

['14 words',
 'great replacement',
 'swastika',
 'you will not replace us',
 'color of crime']

In [7]:
df_hate.status.value_counts(normalize=True)

Full       0.678161
Blocked    0.321839
Name: status, dtype: float64

In [8]:
df_hate.status.value_counts()

Full       59
Blocked    28
Name: status, dtype: int64

In [9]:
df_hate[df_hate.search_term.str.split(' ').str.len() > 1].status_no_spaces.value_counts()

Full             10
Partial Block     3
Blocked           3
Empty             1
Name: status_no_spaces, dtype: int64

In [10]:
still_blocked = df_hate[
    (df_hate.search_term.str.split(' ').str.len() > 1) &
    (df_hate.status_no_spaces == "Blocked")
].search_term

In [11]:
still_blocked

59    white pride worldwide
60         holocaust denial
61      american nazi party
Name: search_term, dtype: object

In [12]:
unique_basewords = {
    word for sent in still_blocked.tolist() for word in sent.split(' ')
}

In [13]:
df_basewords[df_basewords.search_term.isin(unique_basewords)].append(
    df_hate[df_hate.search_term == 'white pride']
)[display_cols]

Unnamed: 0,search_term,status
14,american,Full
37,white,Full
40,party,Full
75,worldwide,Full
100,pride,Full
129,denial,Full
204,nazi,Blocked
228,holocaust,Blocked
62,white pride,Blocked


In [14]:
df_hate[df_hate.search_term.str.contains('white national')].append(
    df_adhoc[df_adhoc.search_term.str.contains('white national')]
)[display_cols]

Unnamed: 0,search_term,status
26,white nationalism,Full
64,white nationalist,Blocked
43,white nationalists,Full


In [15]:
df_basewords[df_basewords.search_term.str.contains('terrorist')].append(
    df_policy[df_policy.search_term.str.contains('terrorist')]
).append(
    df_adhoc[df_adhoc.search_term.str.contains('terrorist')]
)[display_cols]

Unnamed: 0,search_term,status
169,terrorist,Blocked
35,recruit terrorists,Full
36,praise terrorists,Full
66,video game terrorist mod,Blocked
69,terrorist recruitment,Blocked
70,terrorist ideology,Blocked
71,terrorist hostages,Blocked
72,terrorist attack,Blocked
73,terrorist acts,Blocked
11,terrorists,Full


## Hate phrases vs hate content

In [16]:
df_hate[df_hate.search_term == "white power"].n_youtube_videos

1    169420977.0
Name: n_youtube_videos, dtype: float64

In [17]:
fn_youtube_videos = '../data/output/placements_api_suggestions/videos_for_hate_terms.csv'
fn_youtube_channels = '../data/output/placements_api_suggestions/channels_for_hate_terms.csv'

In [18]:
df_hate_video = pd.read_csv(fn_youtube_videos)
df_hate_channels = pd.read_csv(fn_youtube_channels)

In [19]:
channels_from_video = df_hate_video.youtube_video_channel.value_counts()
channels_from_video[channels_from_video >= 8]

CNN                    31
Ruptly                 29
AP Archive             23
The Young Turks        13
Dystopia Now           12
The F/S Effect         12
Newsy                  12
Destiny                12
CharlesFockaert        11
Global News            11
VICE News              10
Journeyman Pictures     9
Associated Press        9
Soap - Sim Racer        9
PBS NewsHour            8
NowThis News            8
Name: youtube_video_channel, dtype: int64

In [20]:
channels_from_channels= df_hate_channels.youtube_channel_name.value_counts()
channels_from_channels[channels_from_channels>= 3]

CinemaSins                  4
PragerU                     4
VICE                        4
act.tv                      4
Beau of the Fifth Column    4
Democracy Now!              4
Pixel_Hipster               3
ContraPoints                3
Trae Crowder                3
Shaun                       3
StevenCrowder               3
The Officer Tatum           3
Dia Beltran                 3
Name: youtube_channel_name, dtype: int64