# Data For Social Justice Methodology
Each header in this notebook corresponds to a headers in the methodology.

In [1]:
import os
import pandas as pd
from utils import value_counts

In [2]:
# inputs
fn_policy = '../data/output/placements_api_keyword_status/policy.csv'
fn_hate = '../data/output/placements_api_keyword_status/hate.csv'
fn_adhoc = '../data/output/placements_api_keyword_status/adhoc.csv'
fn_basewords = '../data/output/placements_api_keyword_status/basewords.csv'
fn_social_justice = '../data/output/placements_api_keyword_status/social_justice.csv'
fn_youtube_videos = '../data/output/placements_api_suggestions/videos_for_social_justice_terms.csv'
fn_youtube_channels = '../data/output/placements_api_suggestions/channels_for_social_justice_terms.csv'

# outputs
table_dir = '../data/output/tables/socialjustice'
fn_table1 = '../data/output/tables/socialjustice/table1.csv'
os.makedirs(table_dir, exist_ok=True)

In [3]:
df_soc_just = pd.read_csv(fn_social_justice)
df_adhoc = pd.read_csv(fn_adhoc)
df_policy = pd.read_csv(fn_policy)
df_basewords = pd.read_csv(fn_basewords)
df_hate = pd.read_csv(fn_hate)

In [4]:
display_cols = ['search_term', 'status']

## What is Blocked?

In [5]:
# create order of statuses for sorting
status_order = ["Blocked", "Partial Block", "Empty", "Full"]
for col in ['status', 'status_no_spaces']:
    df_soc_just[col] = pd.Categorical(df_soc_just[col], status_order)

In [6]:
col2social_justice_table_col = {
    'search_term': "Search Term", 
    'status': "Statusfor Ad Placement",
    'status_no_spaces': "Status with Spaces Removed",
    'n_youtube_videos': "N YouTube Videos",
    'n_youtube_channels': "N Channels Videos",
}

In [7]:
social_justice_table = df_soc_just[[c for c in col2social_justice_table_col.keys()]]
social_justice_table = social_justice_table.sort_values(
    by=['status', 'status_no_spaces', 'search_term'], 
    ascending=(True, False, True)
)
social_justice_table.columns = col2social_justice_table_col.values()
social_justice_table.to_csv(fn_table1, index=False)
social_justice_table.head(10)

Unnamed: 0,Search Term,Statusfor Ad Placement,Status with Spaces Removed,N YouTube Videos,N Channels Videos
45,american muslim,Blocked,Full,,
46,black lives matter,Blocked,Full,,
48,black power,Blocked,Full,,
49,electoral justice,Blocked,Full,,
50,i stand with ilhan,Blocked,Full,,
51,i stand with kaepernick,Blocked,Full,,
52,muslim american,Blocked,Full,,
53,muslim fashion,Blocked,Full,,
54,muslim parenting,Blocked,Full,,
55,muslim solidarity,Blocked,Full,,


In [8]:
value_counts(df_soc_just, col='status')

Unnamed: 0,count,percentage
Full,42,0.677419
Blocked,17,0.274194
Partial Block,3,0.048387
Empty,0,0.0


## White power vs Black power

In [11]:
df_soc_just[df_soc_just.search_term == 'black power'].append(
    df_hate[df_hate.search_term == 'white power']
)[display_cols]

Unnamed: 0,search_term,status
48,black power,Blocked
54,white power,Full


In [30]:
df_soc_just[df_soc_just.search_term.str.contains('matter')].append(
    df_hate[df_hate.search_term == 'white lives matter']
).append(
    df_adhoc[df_adhoc.search_term.str.contains('matter')]
)[display_cols].sort_values(by=['status', 'search_term'])

Unnamed: 0,search_term,status
46,black lives matter,Blocked
0,all lives matter,Full
10,black girls matter,Full
17,black trans lives matter,Full
7,blue lives matter,Full
52,white lives matter,Full


The next three cells look at terms containing "muslim":

In [13]:
df_soc_just[df_soc_just.search_term.str.contains('muslim')][display_cols]

Unnamed: 0,search_term,status
45,american muslim,Blocked
47,black muslim,Blocked
52,muslim american,Blocked
53,muslim fashion,Blocked
54,muslim parenting,Blocked
55,muslim solidarity,Blocked
56,no muslim ban ever,Blocked


In [14]:
df_soc_just[df_soc_just.search_term.str.contains('muslim fashion')].append(
    df_adhoc[df_adhoc.search_term.str.contains('fashion')]
)[display_cols]

Unnamed: 0,search_term,status
53,muslim fashion,Blocked
8,buddhist fashion,Full
9,christian fashion,Full
14,jewish fashion,Full


In [29]:
df_soc_just[df_soc_just.search_term.str.contains('muslim parenting')].append(
    df_adhoc[df_adhoc.search_term.str.contains('parenting')]
)[display_cols].sort_values(by='status')

Unnamed: 0,search_term,status
54,muslim parenting,Blocked
35,buddhist parenting,Blocked
40,jewish parenting,Blocked
10,christian parenting,Full


In [16]:
# What's the status of blocked terms with spaces removed?
value_counts(df_soc_just[df_soc_just.status == 'Blocked'], 
             col='status_no_spaces')

Unnamed: 0,count,percentage
Full,13,0.764706
Blocked,3,0.176471
Partial Block,1,0.058824
Empty,0,0.0


In [17]:
# What terms are still blocked with spaces removed?
df_soc_just[df_soc_just.status == 'Blocked'][
    display_cols + ['status_no_spaces']
].sort_values(
    by=['status_no_spaces', 'search_term'], 
    ascending=False
)

Unnamed: 0,search_term,status,status_no_spaces
61,white supremacy,Blocked,Full
60,stand with ilhan,Blocked,Full
56,no muslim ban ever,Blocked,Full
55,muslim solidarity,Blocked,Full
54,muslim parenting,Blocked,Full
53,muslim fashion,Blocked,Full
52,muslim american,Blocked,Full
51,i stand with kaepernick,Blocked,Full
50,i stand with ilhan,Blocked,Full
49,electoral justice,Blocked,Full


In [18]:
# anti-muslim terms from the hate list:
df_hate[
    df_hate.search_term.str.contains('sharia|jihad|radical islamic terror')
][display_cols]

Unnamed: 0,search_term,status
12,civilization jihad,Full
56,white sharia,Full
77,radical islamic terror,Blocked


## Band-Aids
"covid" and "sex" are blocked as words and substrings.

In [35]:
display_cols_bandaids = ['search_term', 'status', 'status_no_spaces']

In [36]:
df_policy[df_policy.search_term.str.contains('covid-19')].append(
    df_basewords[df_basewords.search_term.str.contains('covid-19')]
)[display_cols_bandaids]

Unnamed: 0,search_term,status,status_no_spaces
61,covid-19,Blocked,
62,covid-19 asian food,Blocked,Blocked
63,covid-19 doesn't exist,Blocked,Blocked
64,covid-19 from 5g,Blocked,Blocked
65,covid-19 guaranteed vaccine,Blocked,Blocked
66,covid-19 home remedy,Blocked,Blocked
67,covid-19 people don't die,Blocked,Blocked
68,covid-19 prayer,Blocked,Blocked
69,covid-19 ritual,Blocked,Blocked
70,covid-19 vaccine kills people,Blocked,Blocked


In [37]:
df_policy[df_policy.search_term.str.contains('sex')].append(
    df_basewords[df_basewords.search_term.str.contains('sex')]
)[display_cols_bandaids]

Unnamed: 0,search_term,status,status_no_spaces
120,sex,Blocked,
121,sex industry,Blocked,Blocked
122,sex services,Blocked,Blocked
123,sex toys,Blocked,Blocked
124,sexual abuse,Blocked,Blocked
125,sexual acts,Blocked,Blocked
126,sexual device,Blocked,Blocked
127,sexual fetishes,Blocked,Blocked
128,sexual fluids,Blocked,Blocked
129,sexual gratification,Blocked,Blocked


What channels are suggested?

In [21]:
df_video = pd.read_csv(fn_youtube_videos)
df_channel = pd.read_csv(fn_youtube_channels)

In [22]:
channels_from_videos = df_video.youtube_video_channel.value_counts()
channels_from_channels = df_channel.youtube_channel_name.value_counts()

In [24]:
channels_from_videos.head(20)

NowThis News                  12
TEDx Talks                    11
Global News                   10
Black Excellence Excellist    10
FOR THE CULTURE PODCAST        9
NBC News                       7
The Root                       7
Democracy Now!                 6
CNN                            6
euronews (in English)          6
PBS NewsHour                   5
Guardian News                  5
Ruptly                         5
Congressmember Karen Bass      5
TED                            5
VICE News                      4
them                           4
reelblack                      4
MSNBC                          4
The Guardian                   4
Name: youtube_video_channel, dtype: int64

In [25]:
channels_from_channels.head(20)

Democracy Now!                         3
For Harriet                            3
RE-EDUCATION                           3
PragerU                                3
Donut Operator                         3
StevenCrowder                          3
NBC News                               3
Logo                                   3
Teaching Tolerance                     2
Yvette Carnell                         2
Diamond and Silk - The Viewers View    2
PBS NewsHour                           2
Dr. T. Hasan Johnson                   2
John Pavlovitz Official                2
Age Of Truth TV                        2
MTV Impact                             2
Cyberdemon531                          2
Timcast                                2
Roland S. Martin                       2
VergeNetwork                           2
Name: youtube_channel_name, dtype: int64