# Data For Social Justice Methodology
Each header in this notebook corresponds to a headers in the methodology.

In [1]:
import os
import pandas as pd
from utils import value_counts

In [2]:
# inputs
fn_policy = '../data/output/placements_api_keyword_status/policy.csv'
fn_hate = '../data/output/placements_api_keyword_status/hate.csv'
fn_adhoc = '../data/output/placements_api_keyword_status/adhoc.csv'
fn_basewords = '../data/output/placements_api_keyword_status/basewords.csv'
fn_social_justice = '../data/output/placements_api_keyword_status/social_justice.csv'
fn_youtube_videos = '../data/output/placements_api_suggestions/videos_for_social_justice_terms.csv'
fn_youtube_channels = '../data/output/placements_api_suggestions/channels_for_social_justice_terms.csv'

# outputs
table_dir = '../data/output/tables/socialjustice'
fn_table1 = '../data/output/tables/socialjustice/table1.csv'
fn_table2 = '../data/output/tables/socialjustice/table2.csv'
fn_table3 = '../data/output/tables/socialjustice/table3.csv'
os.makedirs(table_dir, exist_ok=True)

In [3]:
# columns we'll display in the notebook
display_cols = ['search_term', 'status']
display_cols_bandaids = ['search_term', 'status', 'status_no_spaces']

# how we're going to sort results
sort_cols = ['status', 'search_term']
sort_cols_bandaids = ['status', 'status_no_spaces', 'search_term']

In [4]:
df_soc_just = pd.read_csv(fn_social_justice)
df_adhoc = pd.read_csv(fn_adhoc)
df_policy = pd.read_csv(fn_policy)
df_basewords = pd.read_csv(fn_basewords)
df_hate = pd.read_csv(fn_hate)

## What is Blocked?

In [5]:
# create order of statuses for sorting
status_order = ["Blocked", "Partial Block", "Empty", "Full"]
for col in ['status', 'status_no_spaces']:
    df_soc_just[col] = pd.Categorical(df_soc_just[col], status_order)

In [6]:
col2social_justice_table_col = {
    'search_term': "Search Term", 
    'status': "Statusfor Ad Placement",
    'status_no_spaces': "Status with Spaces Removed",
    'n_youtube_videos': "N YouTube Videos",
    'n_youtube_channels': "N Channels Videos",
}

In [7]:
social_justice_table = df_soc_just[[c for c in col2social_justice_table_col.keys()]]
social_justice_table = social_justice_table.sort_values(
    by=['status', 'status_no_spaces', 'search_term'], 
    ascending=(True, False, True)
)
social_justice_table.columns = col2social_justice_table_col.values()
social_justice_table.to_csv(fn_table1, index=False)
social_justice_table.head(10).reset_index(drop=True)

Unnamed: 0,Search Term,Statusfor Ad Placement,Status with Spaces Removed,N YouTube Videos,N Channels Videos
0,american muslim,Blocked,Full,,
1,black lives matter,Blocked,Full,,
2,black power,Blocked,Full,,
3,electoral justice,Blocked,Full,,
4,i stand with ilhan,Blocked,Full,,
5,i stand with kaepernick,Blocked,Full,,
6,muslim american,Blocked,Full,,
7,muslim fashion,Blocked,Full,,
8,muslim parenting,Blocked,Full,,
9,muslim solidarity,Blocked,Full,,


In [29]:
social_justice_table

Unnamed: 0,Search Term,Statusfor Ad Placement,Status with Spaces Removed,N YouTube Videos,N Channels Videos
45,american muslim,Blocked,Full,,
46,black lives matter,Blocked,Full,,
47,black power,Blocked,Full,,
48,electoral justice,Blocked,Full,,
49,i stand with ilhan,Blocked,Full,,
...,...,...,...,...,...
37,say her name,Full,,153173549.0,441555.0
38,say their names,Full,,156323370.0,476101.0
39,tell black stories,Full,,56566840.0,128222.0
40,white fragility,Full,,165212.0,38.0


In [8]:
value_counts(df_soc_just, col='status')

Unnamed: 0,count,percentage
Full,42,0.677419
Blocked,17,0.274194
Partial Block,3,0.048387
Empty,0,0.0


In [32]:
# what terms have a partial block?
df_soc_just[df_soc_just.status == 'Partial Block'].search_term.tolist()

['antifascist', 'colonialism', 'reparations']

## White power vs Black power

In [9]:
whose_power = df_soc_just[df_soc_just.search_term == 'black power'].append(
    df_hate[df_hate.search_term == 'white power']
)[display_cols]

whose_power.set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
black power,Blocked
white power,Full


In [10]:
whose_lives_matter_to_yt = (df_soc_just[df_soc_just.search_term.str.contains('matter')]
    .append(df_hate[df_hate.search_term == 'white lives matter'])
    .append(df_adhoc[df_adhoc.search_term.str.contains('matter')]
)[display_cols].sort_values(by=sort_cols))

whose_lives_matter_to_yt.set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
black lives matter,Blocked
all lives matter,Full
black girls matter,Full
black trans lives matter,Full
blue lives matter,Full
white lives matter,Full


Create a table for the methodology

In [11]:
blocked = whose_power[whose_power.status == 'Blocked'].search_term.tolist() + \
    whose_lives_matter_to_yt[whose_lives_matter_to_yt.status == 'Blocked'].search_term.tolist()
    
full = whose_power[whose_power.status == 'Full'].search_term.tolist() + \
    [', '.join(whose_lives_matter_to_yt[whose_lives_matter_to_yt.status == 'Full'].search_term.tolist())]

table2 = pd.DataFrame({
    "Blocked response": blocked,
    "Full response": full
})
table2

Unnamed: 0,Blocked response,Full response
0,black power,white power
1,black lives matter,"all lives matter, black girls matter, black tr..."


In [12]:
# save the table
table2.to_csv(fn_table2, index=False,  escapechar='"')

The next three cells look at terms containing "muslim":

In [13]:
# How are religions treated as basewords...
religions = ['buddhist', 'jewish', 'christian', 'muslim']
relgion = df_basewords[df_basewords.search_term.isin(religions)][display_cols]
relgion.set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
buddhist,Blocked
christian,Blocked
jewish,Blocked
muslim,Blocked


In [14]:
# How are terms that contain "muslim" treated?
df_soc_just[
    df_soc_just.search_term.str.contains('muslim')
][display_cols].set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
american muslim,Blocked
muslim american,Blocked
muslim fashion,Blocked
muslim parenting,Blocked
muslim solidarity,Blocked
no muslim ban ever,Blocked
black muslim,Blocked


In [15]:
# Are there religions treated the same way?
fashion = df_soc_just[df_soc_just.search_term.str.contains('muslim fashion')].append(
    df_adhoc[df_adhoc.search_term.str.contains('fashion')]
)[display_cols]

fashion.set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
muslim fashion,Blocked
buddhist fashion,Full
christian fashion,Full
jewish fashion,Full


In [16]:
df_soc_just[df_soc_just.search_term.str.contains('muslim parenting')].append(
    df_adhoc[df_adhoc.search_term.str.contains('parenting')]
)[display_cols].sort_values(by='status').set_index('search_term')

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
muslim parenting,Blocked
buddhist parenting,Blocked
jewish parenting,Blocked
christian parenting,Full


let's format this into a table for the methodology

In [17]:
# this is to combine "muslim" and "muslim fashion" as one entry
muslim_fashion = {
   "muslim" : "muslim, muslim fashion"   
}
blocked_religion = relgion[relgion.status == 'Blocked'].search_term.tolist()
blocked_religion = [muslim_fashion.get(term, term) for term in blocked_religion]

full_religion = fashion[fashion.status == 'Full'].search_term.tolist()
full_religion.append(None)

In [18]:
table3 = pd.DataFrame({
    "Blocked response": blocked_religion,
    "Full response": full_religion
})
table3

Unnamed: 0,Blocked response,Full response
0,buddhist,buddhist fashion
1,christian,christian fashion
2,jewish,jewish fashion
3,"muslim, muslim fashion",


In [19]:
table3.to_csv(fn_table3, index=False, escapechar='"')

In [20]:
# What's the status of blocked terms with spaces removed?
value_counts(df_soc_just[df_soc_just.status == 'Blocked'], 
             col='status_no_spaces')

Unnamed: 0,count,percentage
Full,13,0.764706
Blocked,3,0.176471
Partial Block,1,0.058824
Empty,0,0.0


In [21]:
# What terms are still blocked with spaces removed?
(df_soc_just[df_soc_just.status == 'Blocked'][display_cols_bandaids ]
    .sort_values(by=sort_cols, ascending=False)
    .set_index('search_term'))

Unnamed: 0_level_0,status,status_no_spaces
search_term,Unnamed: 1_level_1,Unnamed: 2_level_1
white supremacy,Blocked,Full
stand with ilhan,Blocked,Full
sexual liberation,Blocked,Blocked
sex work,Blocked,Blocked
sex ed,Blocked,Blocked
no muslim ban ever,Blocked,Full
muslim solidarity,Blocked,Full
muslim parenting,Blocked,Full
muslim fashion,Blocked,Full
muslim american,Blocked,Full


In [22]:
# anti-muslim terms from the hate list:
anti_muslim = ['civilization jihad', 'white sharia', 'radical islamic terror']
(df_hate[df_hate.search_term.isin(anti_muslim)][display_cols]
    .set_index('search_term'))

Unnamed: 0_level_0,status
search_term,Unnamed: 1_level_1
civilization jihad,Full
white sharia,Full
radical islamic terror,Blocked


## Band-Aids
"covid" and "sex" are blocked as words and substrings.

In [23]:
(df_policy[df_policy.search_term.str.contains('covid-19')]
    .append(df_basewords[df_basewords.search_term.str.contains('covid-19')])
    .sort_values(sort_cols_bandaids)[display_cols_bandaids]
    .set_index('search_term'))

Unnamed: 0_level_0,status,status_no_spaces
search_term,Unnamed: 1_level_1,Unnamed: 2_level_1
covid-19 asian food,Blocked,Blocked
covid-19 doesn't exist,Blocked,Blocked
covid-19 from 5g,Blocked,Blocked
covid-19 guaranteed vaccine,Blocked,Blocked
covid-19 home remedy,Blocked,Blocked
covid-19 people don't die,Blocked,Blocked
covid-19 prayer,Blocked,Blocked
covid-19 ritual,Blocked,Blocked
covid-19 vaccine kills people,Blocked,Blocked
covid-19,Blocked,


In [24]:
(df_policy[df_policy.search_term.str.contains('sex')]
    .append(df_basewords[df_basewords.search_term.str.contains('sex')])
    .sort_values(sort_cols_bandaids)[display_cols_bandaids]
    .set_index('search_term'))

Unnamed: 0_level_0,status,status_no_spaces
search_term,Unnamed: 1_level_1,Unnamed: 2_level_1
sex industry,Blocked,Blocked
sex services,Blocked,Blocked
sex toys,Blocked,Blocked
sexual abuse,Blocked,Blocked
sexual acts,Blocked,Blocked
sexual device,Blocked,Blocked
sexual fetishes,Blocked,Blocked
sexual fluids,Blocked,Blocked
sexual gratification,Blocked,Blocked
sexual harassment,Blocked,Blocked


What channels are suggested?

In [25]:
df_video = pd.read_csv(fn_youtube_videos)
df_channel = pd.read_csv(fn_youtube_channels)

In [26]:
channels_from_videos = df_video.youtube_video_channel.value_counts()
channels_from_channels = df_channel.youtube_channel_name.value_counts()

In [27]:
channels_from_videos.head(20)

NowThis News                  12
TEDx Talks                    11
Global News                   10
Black Excellence Excellist    10
FOR THE CULTURE PODCAST        9
NBC News                       7
The Root                       7
CNN                            6
euronews (in English)          6
Democracy Now!                 6
PBS NewsHour                   5
Guardian News                  5
Ruptly                         5
Congressmember Karen Bass      5
TED                            5
The Guardian                   4
Dax                            4
them                           4
CBS New York                   4
reelblack                      4
Name: youtube_video_channel, dtype: int64

In [28]:
channels_from_channels.head(20)

RE-EDUCATION                           3
StevenCrowder                          3
NBC News                               3
For Harriet                            3
Donut Operator                         3
Democracy Now!                         3
PragerU                                3
Logo                                   3
QueerAsCat                             2
Liberty Hangout                        2
Knowing Better                         2
SPIRITUAL So                           2
Roland S. Martin                       2
Vladimir Jaffe                         2
RoyBling                               2
VergeNetwork                           2
Cyberdemon531                          2
ContraPoints                           2
Diamond and Silk - The Viewers View    2
JennaMarbles                           2
Name: youtube_channel_name, dtype: int64