# Data For Social Justice Methodology
Each header in this notebook corresponds to a headers in the methodology.

In [1]:
import os
import pandas as pd
from utils import value_counts

In [2]:
# inputs
fn_policy = '../data/output/placements_api_keyword_status/policy.csv'
fn_hate = '../data/output/placements_api_keyword_status/hate.csv'
fn_adhoc = '../data/output/placements_api_keyword_status/adhoc.csv'
fn_basewords = '../data/output/placements_api_keyword_status/basewords.csv'
fn_social_justice = '../data/output/placements_api_keyword_status/social_justice.csv'

# outputs
table_dir = '../data/output/tables/socialjustice'
fn_table1 = '../data/output/tables/socialjustice/table1.csv'
os.makedirs(table_dir, exist_ok=True)

In [3]:
df_soc_just = pd.read_csv(fn_social_justice)
df_adhoc = pd.read_csv(fn_adhoc)
df_policy = pd.read_csv(fn_policy)
df_basewords = pd.read_csv(fn_basewords)
df_hate = pd.read_csv(fn_hate)

In [4]:
display_cols = ['search_term', 'status']

## What is Blocked?

In [5]:
# create order of statuses for sorting
status_order = ["Blocked", "Partial Block", "Empty", "Full"]
for col in ['status', 'status_no_spaces']:
    df_soc_just[col] = pd.Categorical(df_soc_just[col], status_order)

In [6]:
col2social_justice_table_col = {
    'search_term': "Search Term", 
    'status': "Statusfor Ad Placement",
    'status_no_spaces': "Status with Spaces Removed",
    'n_youtube_videos': "N YouTube Videos",
    'n_youtube_channels': "N YouTube Videos",
}

In [7]:
social_justice_table = df_soc_just[[c for c in col2social_justice_table_col.keys()]]
social_justice_table.sort_values(by=['status', 'status_no_spaces', 'search_term'], 
                                 ascending=(True, False, True), inplace=True)
social_justice_table.columns = col2social_justice_table_col.values()
social_justice_table.to_csv(fn_table1)
social_justice_table.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  social_justice_table.sort_values(by=['status', 'status_no_spaces', 'search_term'],


Unnamed: 0,Search Term,Statusfor Ad Placement,Status with Spaces Removed,N YouTube Videos,N YouTube Videos.1
61,american muslim,Blocked,Full,,
60,black lives matter,Blocked,Full,,
58,black power,Blocked,Full,,
57,electoral justice,Blocked,Full,,
56,i stand with ilhan,Blocked,Full,,
55,i stand with kaepernick,Blocked,Full,,
54,muslim american,Blocked,Full,,
53,muslim fashion,Blocked,Full,,
52,muslim parenting,Blocked,Full,,
51,muslim solidarity,Blocked,Full,,


In [8]:
value_counts(df_soc_just, col='status')

Unnamed: 0,count,percentage
Full,42.0,0.677419
Blocked,17.0,0.274194
Partial Block,3.0,0.048387
Empty,0.0,0.0


## White power vs Black power

In [9]:
df_soc_just[df_soc_just.search_term == 'black power'].append(
    df_hate[df_hate.search_term == 'white power']
)[display_cols]

Unnamed: 0,search_term,status
58,black power,Blocked
1,white power,Full


In [10]:
df_soc_just[df_soc_just.search_term.str.contains('matter')].append(
    df_hate[df_hate.search_term == 'white lives matter']
).append(
    df_adhoc[df_adhoc.search_term.str.contains('matter')]
)[display_cols]

Unnamed: 0,search_term,status
10,black girls matter,Full
30,black trans lives matter,Full
60,black lives matter,Blocked
10,white lives matter,Full
1,all lives matter,Full
5,blue lives matter,Full


In [11]:
df_soc_just[df_soc_just.search_term.str.contains('muslim')][display_cols]

Unnamed: 0,search_term,status
50,no muslim ban ever,Blocked
51,muslim solidarity,Blocked
52,muslim parenting,Blocked
53,muslim fashion,Blocked
54,muslim american,Blocked
59,black muslim,Blocked
61,american muslim,Blocked


In [12]:
df_soc_just[df_soc_just.search_term.str.contains('muslim fashion')].append(
    df_adhoc[df_adhoc.search_term.str.contains('fashion')]
)[display_cols]

Unnamed: 0,search_term,status
53,muslim fashion,Blocked
6,christian fashion,Full
11,jewish fashion,Full
17,buddhist fashion,Full


In [13]:
df_soc_just[df_soc_just.search_term.str.contains('muslim parenting')].append(
    df_adhoc[df_adhoc.search_term.str.contains('parenting')]
)[display_cols]

Unnamed: 0,search_term,status
52,muslim parenting,Blocked
9,christian parenting,Full
45,jewish parenting,Blocked
49,buddhist parenting,Blocked


In [14]:
value_counts(df_soc_just[df_soc_just.status == 'Blocked'], 
             col='status_no_spaces')

Unnamed: 0,count,percentage
Full,13.0,0.764706
Blocked,3.0,0.176471
Partial Block,1.0,0.058824
Empty,0.0,0.0


In [15]:
df_soc_just[df_soc_just.status == 'Blocked'][
    display_cols + ['status_no_spaces']
].sort_values(
    by=['status_no_spaces', 'search_term'], 
    ascending=False
)

Unnamed: 0,search_term,status,status_no_spaces
48,white supremacy,Blocked,Full
49,stand with ilhan,Blocked,Full
50,no muslim ban ever,Blocked,Full
51,muslim solidarity,Blocked,Full
52,muslim parenting,Blocked,Full
53,muslim fashion,Blocked,Full
54,muslim american,Blocked,Full
55,i stand with kaepernick,Blocked,Full
56,i stand with ilhan,Blocked,Full
57,electoral justice,Blocked,Full


In [16]:
df_hate[
    df_hate.search_term.str.contains('sharia|jihad|radical islamic terror')
][display_cols]

Unnamed: 0,search_term,status
33,white sharia,Full
40,civilization jihad,Full
68,radical islamic terror,Blocked


## Band-Aids

In [17]:
df_policy[df_policy.search_term.str.contains('covid-19')].append(
    df_basewords[df_basewords.search_term.str.contains('covid-19')]
)[display_cols]

Unnamed: 0,search_term,status
57,covid-19 vaccine kills people,Blocked
58,covid-19 ritual,Blocked
59,covid-19 prayer,Blocked
60,covid-19 people don't die,Blocked
61,covid-19 home remedy,Blocked
62,covid-19 guaranteed vaccine,Blocked
63,covid-19 from 5g,Blocked
64,covid-19 doesn't exist,Blocked
65,covid-19 asian food,Blocked
138,covid-19,Blocked


In [18]:
df_policy[df_policy.search_term.str.contains('sex')].append(
    df_basewords[df_basewords.search_term.str.contains('sex')]
)[display_cols]

Unnamed: 0,search_term,status
39,sexually unwanted acts,Blocked
40,sexually explict content,Blocked
41,sexual harassment,Blocked
42,sexual gratification,Blocked
43,sexual fluids,Blocked
44,sexual fetishes,Blocked
45,sexual device,Blocked
46,sexual acts,Blocked
47,sexual abuse,Blocked
48,sex toys,Blocked
