# Analyzing Suggested Videos and Channels

This section follows the "Hate phrases vs hate content" section of the methodology.

In [1]:
import os
import pandas as pd
from tqdm import tqdm

In [2]:
# inputs
fn_adl = '../data/input/adl_extremist_alternative_channels_overlap.csv'
fn_topline = '../data/input/video_metadata/topline_hate_videos.csv'
fn_deeper_catalog = '../data/input/video_metadata/deep_catalog_wwk_wg_we.csv'
fn_youtube_videos = '../data/output/placements_api_suggestions/videos_for_hate_terms.csv'
fn_youtube_channels = '../data/output/placements_api_suggestions/channels_for_hate_terms.csv'

# outputs
fn_table = '../data/output/adl_extremist_alternative_channel_overlap.csv'

We cross-reference suggestions from the "PlacementSuggestionService" API against channels that a [recent report](https://www.adl.org/resources/reports/exposure-to-alternative-extremist-content-on-youtube) published by the ADL identified as "extremist" and "alternative".

We use a subset of channels that overlap with those in our study (`fn_adl`).

In [3]:
df_adl = pd.read_csv(fn_adl)
df_adl.iloc[0]

channel_name               StevenCrowder
channel_id      UCIveFvW-ARp_B_RckhweNJw
type                         alternative
sources            Ledwich|Ribeiro|Lewis
Name: 0, dtype: object

In [4]:
extremist_channels = df_adl[df_adl.type == 'extremist'].channel_id.tolist()
len(extremist_channels)

28

In [5]:
alt_channels = df_adl[df_adl.type == 'alternative'].channel_id.tolist()
len(alt_channels)

64

## Getting YouTube Metadata
We used `channel_id` to match videos we were suggested with channels from the ADL report. This is not available in the suggested videos, so we collect the video metadata using the YouTube Data API (v3) Python client.

This code is just an example of what we did for all `video_id`'s:

```python
from youtube_api import YouTubeDataAPI

def custom_parser(resp : dict) -> dict:
    """
    Parses the JSON response from the YouTube Data API.
    Mostly descriptive fields from the snippet.
    """
    return {
        "video_id" : resp['id'],
        "video_title": resp['snippet']['title'],
        "video_description": resp['snippet']['description'],
        "video_tags": "|".join(resp['snippet'].get('tags', [])),
        "video_published_at": resp['snippet']['publishedAt'],
        "channel_title": resp['snippet']['channelTitle'],
        "channel_id": resp['snippet']['channelId'],
    }

# you need an API key for the YouTube Data API v3
YT_KEY = "AKAI- ..."
yt = YouTubeDataAPI(YT_KEY)

video_id = "5qap5aO4i9A"
video_metadata = yt.get_video_metadata(video_id, parser=custom_parser)
```

## How many Extremist and Alternative channels were we suggested for Topline suggestions?

In [6]:
df_topline_channels = pd.read_csv(fn_youtube_channels)
df_topline_channels = df_topline_channels[~df_topline_channels.youtube_channel_id.isnull()]

In [7]:
df_topline = pd.read_csv(fn_topline)
print(f"N unique videos: {len(df_topline)}")
df_topline.iloc[0]

N unique videos: 1311


video_id                                                    04axDDRVy_o
video_title           The “ethnic cleansing” of Myanmar’s Rohingya M...
video_description     The Rohingya have been systematically driven o...
video_tags            vox.com|vox|explain|Rohingya|Myanmer|Bangledes...
video_published_at                             2017-09-25T12:00:01.000Z
channel_title                                                       Vox
channel_id                                     UCLXo7UDZvByw2ixzpQCufnA
Name: 0, dtype: object

In [8]:
# video suggestions
topline_extreme = df_topline[df_topline.channel_id.isin(extremist_channels)]
topline_alt = df_topline[df_topline.channel_id.isin(alt_channels)]

In [9]:
# channel suggestions
topline_extreme_channel_suggestions = df_topline_channels[
    df_topline_channels.youtube_channel_id.isin(extremist_channels)]

topline_alt_channel_suggestions = df_topline_channels[
    df_topline_channels.youtube_channel_id.isin(alt_channels)]

In [10]:
# combine unique channels from videos and channels
topline_extreme_channels= list(set(
    topline_extreme_channel_suggestions.youtube_channel_name.tolist() + 
    topline_extreme.channel_title.tolist()
))

topline_alt_channels= list(set(
    topline_alt_channel_suggestions.youtube_channel_name.tolist() + 
    topline_alt.channel_title.tolist()
))

## How many Extremist and Alternative channels were we suggested for the deeper catalog of three terms?

In [11]:
df_deeper_catalog = pd.read_csv(fn_deeper_catalog)
print(f"N unique videos: {len(df_deeper_catalog)}")
df_deeper_catalog.iloc[0]

N unique videos: 1635


video_id                                                    puJ-arJgkZU
video_title           Gary Younge interviews Richard Spencer: 'Afric...
video_description     In a dramatic interview, the Guardian's Gary Y...
video_tags            Richard Spencer|Gary Younge|white supremacy|ra...
video_published_at                             2017-11-07T09:28:00.000Z
channel_title                                              The Guardian
channel_id                                     UCHpw8xwDNhU9gdohEcJu4aA
Name: 0, dtype: object

In [12]:
deep_extreme = df_deeper_catalog[df_deeper_catalog.channel_id.isin(extremist_channels)]
deep_alt = df_deeper_catalog[df_deeper_catalog.channel_id.isin(alt_channels)]

In [13]:
result = pd.DataFrame([
    {
        "data source": "topline",
        "category": "extremist",
        "n_videos": topline_extreme.video_id.nunique(),
        "perc_videos" : topline_extreme.video_id.nunique() / df_topline.video_id.nunique(),
        "n_channels": len(topline_extreme_channels),
        "channels" : " | ".join(set(topline_extreme_channels))
    },
    
    {
        "data source": "topline",
        "category": "alternative",
        "n_videos": topline_alt.video_id.nunique(),
        "perc_videos" : topline_alt.video_id.nunique() / df_topline.video_id.nunique(),
        "n_channels": len(topline_alt_channels),
        "channels" : " | ".join(set(topline_alt_channels))
    },
    
    {
        "data source": "deep3",
        "category": "extremist",
        "n_videos": deep_extreme.video_id.nunique(),
        "perc_videos" : deep_extreme.video_id.nunique() / df_deeper_catalog.video_id.nunique(),
        "n_channels": deep_extreme.channel_title.nunique(),
        "channels" : " | ".join(set(deep_extreme.channel_title.unique()))
    },
    
    {
        "data source": "deep3",
        "category": "alternative",
        "n_videos": deep_alt.video_id.nunique(),
        "perc_videos" : deep_alt.video_id.nunique() / df_deeper_catalog.video_id.nunique(),
        "n_channels": deep_alt.channel_title.nunique(),
        "channels" : " | ".join(set(deep_alt.channel_title.unique()))
    }
])

result

Unnamed: 0,data source,category,n_videos,perc_videos,n_channels,channels
0,topline,extremist,32,0.024409,23,MILO | CoachDave.TV | Eli Harman | Arktos | St...
1,topline,alternative,53,0.040427,41,RockingMrE | Jesse Lee Peterson | BlazeTV | Kr...
2,deep3,extremist,23,0.021375,14,Nightmare Fuel | Sargon of Akkad | Styxhexenha...
3,deep3,alternative,219,0.203532,38,Coach Red Pill | RockingMrE | Dame Pesos | Jes...


In [14]:
result.to_csv(fn_table, index=False)

## Do suggested videos contain metadata that matches the query?

In [15]:
# these are suggested videos, we'll merge the video metadata to make this search.
df_hate_video = pd.read_csv(fn_youtube_videos)

In [16]:
def search_metadata(row : dict) -> bool:
    """
    Check for metadata that corresponds with the `search_term`
    """
    term = row['search_term'].replace(' ', '')
    if term in row['youtube_video_title'].lower().replace(' ', ''):
        return True
    elif term in str(row['video_tags']).replace(' ', ''):
        return True
    elif term in str(row['video_description']).lower().replace(' ', ''):
        return True
    return False

In [17]:
df_hate_video.merge(df_topline, on='video_id').apply(search_metadata, axis=1).value_counts(normalize=True)

True     0.586107
False    0.413893
dtype: float64