## Initial Set Up

In [1]:
import tweepy
import configparser
import os
import json
import GetOldTweets3 as got
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import nltk
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
import string
import random
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
import csv
import math

from collections import Counter

In [2]:
%run functions.ipynb

## Notes

Within my 'data' folder:
- 1 refers to tweets from January
- 2 refers to tweets from February
- 3 refers to tweets from March
- 4 refers to tweets from April
- all_time refers to tweets from the start of January to the end of April
- text1 and corpus_index1.json refer to news articles from January
- text2 and corpus_index2.json refer to news articles from February
- text3 and corpus_index3.json refer to news articles from March
- text4 and corpus_index4.json refer to news articles from April
- text_all refers to news articles from the start of January to the end of April

## Twitter Data

In [3]:
twitter_cred = {}
config = configparser.ConfigParser()
config.read('config.ini')
for item,value in config['TWITTER'].items():
    twitter_cred[item]=value

In [4]:
auth = tweepy.OAuthHandler(twitter_cred['consumer_key'], twitter_cred['consumer_secret'])
auth.set_access_token(twitter_cred['access_key'], twitter_cred['access_secret'])
api = tweepy.API(auth)

In [5]:
!pip install --user GetOldTweets3



In [6]:
queries = ['"asian american" OR "asian-american" OR #asianamerican AND \
            racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND \
            coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR \
            #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus']

**January**

In [7]:
DATA_DIR = 'data/1'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-01-01"
until = "2020-02-01"

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"asian american" OR "asian-american" OR #asianamerican AND             racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND             coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR             #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus' from 2020-01-01 to 2020-02-01
Downloaded 7 tweets...



**February**

In [8]:
DATA_DIR = 'data/2'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-02-01"
until = "2020-03-01"

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"asian american" OR "asian-american" OR #asianamerican AND             racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND             coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR             #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus' from 2020-02-01 to 2020-03-01
Downloaded 131 tweets...



**March**

In [9]:
DATA_DIR = 'data/3'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-03-01"
until = "2020-04-01"

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"asian american" OR "asian-american" OR #asianamerican AND             racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND             coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR             #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus' from 2020-03-01 to 2020-04-01
Downloaded 1702 tweets...



**April**

In [10]:
DATA_DIR = 'data/4'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-04-01"
until = "2020-05-01"

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"asian american" OR "asian-american" OR #asianamerican AND             racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND             coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR             #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus' from 2020-04-01 to 2020-05-01
Downloaded 1260 tweets...



**All-time**

In [11]:
DATA_DIR = 'data/all_time'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-01-01"
until = "2020-05-01"

for query in queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"asian american" OR "asian-american" OR #asianamerican AND             racism OR racist OR xenophobia OR #racism OR #racist OR #xenophobia AND             coronavirus OR "corona virus" OR covid19 OR covid-19 OR pandemic OR virus OR "chinese virus" OR "china virus" OR             #coronavirus OR #covid19 OR #pandemic OR #chinavirus OR #chinesevirus' from 2020-01-01 to 2020-05-01
Downloaded 3100 tweets...



In [7]:
racist_queries = ['"ching chong" OR chink OR chingchong OR #chingchong OR #chink\
                    AND "kung flu" OR "kung fu flu" OR "ching chong virus" OR #kungflu OR #kungfuflu OR #chingchongvirus']

In [8]:
import datetime
DATA_DIR = 'data/all_time'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-01-01"
until = "2020-05-01"

for query in racist_queries:
    tweet_list = download_query_tweets(query, since, until)
    
    outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, 'racist_tweets', since, until)
    
    print('Downloaded {} tweets...\n'.format(len(tweet_list)))
    with open(outfilename,'w') as out:
        for tweet in tweet_list:
            out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading tweets for query: '"ching chong" OR chink OR chingchong OR #chingchong OR #chink                    AND "kung flu" OR "kung fu flu" OR "ching chong virus" OR #kungflu OR #kungfuflu OR #chingchongvirus' from 2020-01-01 to 2020-05-01
Downloaded 1840 tweets...



**Donald Trump's tweets**

In [12]:
username = 'realdonaldtrump'
query = 'Chinese Virus'

DATA_DIR='data/all_time'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-01-01"
until = "2020-05-01"

tweet_list = download_user_tweets(username, query, since, until)
outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, username, since, until)
print('Downloaded {} tweets...\n'.format(len(tweet_list)))
with open(outfilename,'w') as out:
    for tweet in tweet_list:
        out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading for realdonaldtrump and Chinese Virus
Downloaded 8 tweets...



**Mike Pompeo's Tweets**

In [6]:
username = 'secpompeo'
query = 'Wuhan Virus OR #WuhanVirus'

DATA_DIR='data/all_time'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

since = "2020-01-01"
until = "2020-05-01"

tweet_list = download_user_tweets(username, query, since, until)
outfilename = "{}/{}_{}_to_{}.json".format(DATA_DIR, username, since, until)
print('Downloaded {} tweets...\n'.format(len(tweet_list)))
with open(outfilename,'w') as out:
    for tweet in tweet_list:
        out.write(json.dumps(tweet, default=jsonconverter) + '\n')

Downloading for secpompeo and Wuhan Virus
Downloaded 8 tweets...



## News Articles

<b>January</b>

In [15]:
ZIPFILE_NAME1='christyq-b862-s2020-04-20.zip'

if not os.path.exists('data'):
    os.makedirs('data/text1')
    
manifest_data=[]

text_file_cnt=0

with zipfile.ZipFile(ZIPFILE_NAME1) as zf:
    for f in zf.filelist:
        if f.filename.count('plaintext')>0:
            fn=os.path.basename(f.filename)
            print('Extracting', fn)
            with open(os.path.join('data','text1',fn),'wb') as out:
                zipfile.shutil.copyfileobj(zf.open(f),out)
                text_file_cnt+=1
                
        if f.filename.endswith('.csv'):
            mdf = pd.read_csv(zf.open(f))
            manifest_data.append(mdf)
            
mdf=pd.concat(manifest_data)
mdf.to_json('data/corpus_index1.json', orient='records')

Extracting cbs-news-uc-berkeley-deletes-770ecfdc-8629-11ea-8212-0242ac160002.txt
Extracting miami-herald-uc-berkeley-apologizes-77182bea-8629-11ea-8212-0242ac160002.txt
Extracting greenville-news-coronavirus-spreads-racism_-7713f034-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-this-lunar-new-772bdfa0-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-this-lunar-new-77261a02-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-a-fire-may-77204be0-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-jeff-lewis-apologizes-7736b4f2-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-university-of-california-773310fe-8629-11ea-8212-0242ac160002.txt


In [23]:
fn

'thai-news-service-china_-chinese-americans-d85d59f2-9105-11ea-8d39-0242ac160002.txt'

<b>February</b>

In [16]:
ZIPFILE_NAME2='christyq-b863-s2020-04-20.zip'

if not os.path.exists('data'):
    os.makedirs('data/text2')
    
manifest_data=[]

text_file_cnt=0

with zipfile.ZipFile(ZIPFILE_NAME2) as zf:
    for f in zf.filelist:
        if f.filename.count('plaintext')>0:
            fn=os.path.basename(f.filename)
            print('Extracting', fn)
            with open(os.path.join('data','text2',fn),'wb') as out:
                zipfile.shutil.copyfileobj(zf.open(f),out)
                text_file_cnt+=1
                
        if f.filename.endswith('.csv'):
            mdf = pd.read_csv(zf.open(f))
            manifest_data.append(mdf)
            
mdf=pd.concat(manifest_data)
mdf.to_json('data/corpus_index2.json', orient='records')

Extracting ktvu-coronavirus-scare-affecting-7762fc06-8629-11ea-8212-0242ac160002.txt
Extracting newsy-coronavirus-outbreak-reportedly-775c0838-8629-11ea-8212-0242ac160002.txt
Extracting forbes.com-how-covid-19-coronavirus-77e7ba04-8629-11ea-8212-0242ac160002.txt
Extracting laist-rage-tweets_-jokes-and-77c79e0e-8629-11ea-8212-0242ac160002.txt
Extracting china.org.cn-xinhua-headlines_-xenophobia-77d16a1a-8629-11ea-8212-0242ac160002.txt
Extracting msn-south-africa-fear-of-coronavirus-777100b2-8629-11ea-8212-0242ac160002.txt
Extracting abc7-la-leaders-speak-77bc0210-8629-11ea-8212-0242ac160002.txt
Extracting nbc-los-angeles-restaurants-in-la’s-774cc364-8629-11ea-8212-0242ac160002.txt
Extracting abc7-coronavirus_-local-boy-77c4708a-8629-11ea-8212-0242ac160002.txt
Extracting sina-xenophobia-against-asians-776c5620-8629-11ea-8212-0242ac160002.txt
Extracting business-insider-india-the-wuhan-coronavirus-778412c4-8629-11ea-8212-0242ac160002.txt
Extracting southwest-times-record-commentary_-i-did

Extracting the-easterner_-eastern-washington-university-racism-isn_t-a-7978149a-8629-11ea-8212-0242ac160002.txt
Extracting daily-trojan_-university-of-southern-california-as-coronavirus-grows-79893bf8-8629-11ea-8212-0242ac160002.txt
Extracting daily-princetonian_-princeton-university-racism-and-the-79738e02-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-the-coronavirus-and-79939bfc-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-on-america_s-college-7995d82c-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-the-2003-sars-798fe9bc-8629-11ea-8212-0242ac160002.txt
Extracting ce-noticias-financieras-english-racism-and-the-7999e5e8-8629-11ea-8212-0242ac160002.txt
Extracting china-daily---us-edition-racial-prejudice-rears-79a5de8e-8629-11ea-8212-0242ac160002.txt
Extracting china-daily-european-edition-asians-battle-outbreak-79b281a2-8629-11ea-8212-0242ac160002.txt
Extracting financial-times-(london_-england)-coronavirus-makes-_sniffling-79bad

<b>March</b>

In [17]:
ZIPFILE_NAME3='christyq-b864-s2020-04-20.zip'

if not os.path.exists('data'):
    os.makedirs('data/text3')
    
manifest_data=[]

text_file_cnt=0

with zipfile.ZipFile(ZIPFILE_NAME3) as zf:
    for f in zf.filelist:
        if f.filename.count('plaintext')>0:
            fn=os.path.basename(f.filename)
            print('Extracting', fn)
            with open(os.path.join('data','text3',fn),'wb') as out:
                zipfile.shutil.copyfileobj(zf.open(f),out)
                text_file_cnt+=1
                
        if f.filename.endswith('.csv'):
            mdf = pd.read_csv(zf.open(f))
            manifest_data.append(mdf)
            
mdf=pd.concat(manifest_data)
mdf.to_json('data/corpus_index3.json', orient='records')

Extracting east-bay-times-coronavirus-cartoons_-trump-4003aba2-8629-11ea-8212-0242ac160002.txt
Extracting ecns.cn-washington-under-fire-3fe900f4-8629-11ea-8212-0242ac160002.txt
Extracting xinhua-news-agency-spotlight_-u.s.-politicians_-40ac6e54-8629-11ea-8212-0242ac160002.txt
Extracting msn-south-africa-i’m-chinese.-that-3eeb605c-8629-11ea-8212-0242ac160002.txt
Extracting xinhua-news-agency-los-angeles-mayor-409f1b82-8629-11ea-8212-0242ac160002.txt
Extracting hr-magazine-coronavirus-and-racism_-41ceba08-8629-11ea-8212-0242ac160002.txt
Extracting click2houston.com-as-coronavirus-spreads_-4173f83e-8629-11ea-8212-0242ac160002.txt
Extracting alternet.org-xenophobia-and-racism-3eb3ec8a-8629-11ea-8212-0242ac160002.txt
Extracting latino-rebels-nahj-stands-with-40b051fe-8629-11ea-8212-0242ac160002.txt
Extracting salt-lake-tribune-3-news-live-4047e24a-8629-11ea-8212-0242ac160002.txt
Extracting newsr.in-coronavirus-increases-racist-41833eac-8629-11ea-8212-0242ac160002.txt
Extracting newsbusters-

Extracting the-moderate-voice-trump_-tropes_-and-3f24d15c-8629-11ea-8212-0242ac160002.txt
Extracting greenville-news-calling-coronavirus-wuhan-3f8cea80-8629-11ea-8212-0242ac160002.txt
Extracting firstpost.com-‘not-a-racist-4026f990-8629-11ea-8212-0242ac160002.txt
Extracting alternet.org-cnn-reporter-subjected-419adeb8-8629-11ea-8212-0242ac160002.txt
Extracting npr-new-site-collects-4258d4ea-8629-11ea-8212-0242ac160002.txt
Extracting greenville-news-yang-and-adl-3ee83c10-8629-11ea-8212-0242ac160002.txt
Extracting south-africa-news-shafaqna-donald-trump-says-413cdd18-8629-11ea-8212-0242ac160002.txt
Extracting idaho-state-journal-editorial-roundup_-us-3f1fb2a8-8629-11ea-8212-0242ac160002.txt
Extracting yahoo_-singapore-photographer-captures-trump_s-4038bb6c-8629-11ea-8212-0242ac160002.txt
Extracting newsbusters-shocker_-hollywood-reporter-41ef38d2-8629-11ea-8212-0242ac160002.txt
Extracting cnn-trump-and-beijing-42d64b64-8629-11ea-8212-0242ac160002.txt
Extracting china.org.cn-us-politician

Extracting cnn-women-trailblazers-who-4139810e-8629-11ea-8212-0242ac160002.txt
Extracting yahoo_-singapore-10-of-trump_s-3fde7b70-8629-11ea-8212-0242ac160002.txt
Extracting gates-of-vienna-gates-of-vienna-40417d1a-8629-11ea-8212-0242ac160002.txt
Extracting huffington-post-canada-for-asian-americans_-42244fd6-8629-11ea-8212-0242ac160002.txt
Extracting laist-coronavirus-fears-drive-3f3e049c-8629-11ea-8212-0242ac160002.txt
Extracting the-hill-strengthening-immunity-against-3f9b8bd0-8629-11ea-8212-0242ac160002.txt
Extracting cbs-news-trump-criticized-for-420b9202-8629-11ea-8212-0242ac160002.txt
Extracting mediaite.com-cnn-correspondent_-man-3ff15b5a-8629-11ea-8212-0242ac160002.txt
Extracting abc-online-chinese-official-suggests-3fa059d0-8629-11ea-8212-0242ac160002.txt
Extracting emirates-business.ae-trump-eases-blame-41558e8a-8629-11ea-8212-0242ac160002.txt
Extracting kron4-xenophobia-against-chinese-410b32cc-8629-11ea-8212-0242ac160002.txt
Extracting blaze.com-asian-americans-facing-3e8e9

Extracting newstex-blogs-gothamist-asian-man-says-4493e100-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-huffington-post-gop-senator-scorched-4380a17c-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-daily-orange-professors-host-online-451779d4-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-newsbusters.org-new-york-times_-44aa8ce8-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-newsbusters.org-nbc-&-cbs-43f34362-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-international-business-times-news-trump-to-_protect_-4440b598-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-thenextweb.com-twitter-sees-900_-43a9ee92-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-volokh-conspiracy-it_s-time-for-442e1bea-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-blue-virginia-sen.-mark-warner-44e4d998-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-international-business-times-news-jeremy-lin-blasts-43fea964-862

Extracting targeted-news-service-national-organization-of-45afd7a6-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-people-for-the-4574f5aa-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-rep.-moulton_-statement-45d28e2c-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-n.y.-a.g.-james-4645386e-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-council-on-american-islamic-4633fee6-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-council-on-american-islamic-458eafa4-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-grand-valley-state-4579bd10-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-new-york-immigration-4584c1ec-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-southern-poverty-law-45ec6428-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-chairman-jeffries-on-45e776e8-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-japanese-

Extracting states-news-service-meng-introduces-resolution-47be1030-8629-11ea-8212-0242ac160002.txt
Extracting states-news-service-while-democrats-spin-4723b648-8629-11ea-8212-0242ac160002.txt
Extracting states-news-service-coronavirus-and-stigma_-47316824-8629-11ea-8212-0242ac160002.txt
Extracting states-news-service-from-cat_-staying-47c208ac-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-msnbc-relieved-as-481c001e-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-video_-media-suffer-48252716-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-new-york-times_-482b6b76-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-nightline-blames-trump-48110d9e-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-ironic_-ny-times-47d659c4-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-blue-virginia-sen.-mark-warner-47e7227c

Extracting the-independent-(united-kingdom)-coronavirus_-hundreds-of-4abd1092-8629-11ea-8212-0242ac160002.txt
Extracting the-independent-(united-kingdom)-coronavirus_-lost-star-4ab24932-8629-11ea-8212-0242ac160002.txt
Extracting the-independent-(united-kingdom)-coronavirus_-karen-o-4aa4dc0c-8629-11ea-8212-0242ac160002.txt
Extracting the-independent-(united-kingdom)-to-all-the-4ab906d2-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-jeannie-mai-speaks-4af482c0-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-meghan-mccain-praises-4aec13ce-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-california-gun-store-4ae22bac-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-daniel-dae-kim_-4ad182ac-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-_why-not-call-4af0e156-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-fox-news-host-4ad8db2e-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-michael-bane_s-coronavirus-4add92fe-8629-11ea-8212-0242ac160002.txt
Extrac

<b>April</b>

In [18]:
ZIPFILE_NAME4='christyq-b902-s2020-05-08.zip'

if not os.path.exists('data'):
    os.makedirs('data/text4')
    
manifest_data=[]

text_file_cnt=0

with zipfile.ZipFile(ZIPFILE_NAME4) as zf:
    for f in zf.filelist:
        if f.filename.count('plaintext')>0:
            fn=os.path.basename(f.filename)
            print('Extracting', fn)
            with open(os.path.join('data','text4',fn),'wb') as out:
                zipfile.shutil.copyfileobj(zf.open(f),out)
                text_file_cnt+=1
                
        if f.filename.endswith('.csv'):
            mdf = pd.read_csv(zf.open(f))
            manifest_data.append(mdf)
            
mdf=pd.concat(manifest_data)
mdf.to_json('data/corpus_index4.json', orient='records')

Extracting public-radio-international-xenophobia-‘takes-its-d382778c-9105-11ea-8d39-0242ac160002.txt
Extracting wbez-91.5-chicago-coronavirus-in-illinois_-d29c7a2a-9105-11ea-8d39-0242ac160002.txt
Extracting outlook-india-coronavirus_-former-nba-d1f18142-9105-11ea-8d39-0242ac160002.txt
Extracting southwest-times-record-lululemon-apologizes-after-d2cbaf5c-9105-11ea-8d39-0242ac160002.txt
Extracting abs-cbn-news-jeremy-lin-pledges-d394f754-9105-11ea-8d39-0242ac160002.txt
Extracting scpr-_a-perfect-storm__-d2b0667a-9105-11ea-8d39-0242ac160002.txt
Extracting business-standard-india-jeremy-lin-pledges-d28f7442-9105-11ea-8d39-0242ac160002.txt
Extracting npr-q&a-with-leonardo-d202f8aa-9105-11ea-8d39-0242ac160002.txt
Extracting colorado-springs-gazette-reports_-pandemic-giving-d228a3a2-9105-11ea-8d39-0242ac160002.txt
Extracting miami-herald-tzi-ma-is-d366c3ac-9105-11ea-8d39-0242ac160002.txt
Extracting jakarta-post-young-people-share-d3aec99a-9105-11ea-8d39-0242ac160002.txt
Extracting wbez-91.5-c

Extracting business-insider-india-people-are-accusing-d25a8f0c-9105-11ea-8d39-0242ac160002.txt
Extracting yahoo_-singapore-i-shouldn_t-have-d255ab0e-9105-11ea-8d39-0242ac160002.txt
Extracting greenville-news-asian-actors-on-d3911170-9105-11ea-8d39-0242ac160002.txt
Extracting salt-lake-tribune-shirley-ann-higuchi_-d3b06674-9105-11ea-8d39-0242ac160002.txt
Extracting theweek-why-comedian-bill-d2788c5a-9105-11ea-8d39-0242ac160002.txt
Extracting cnn-china-won_t-let-d32d1666-9105-11ea-8d39-0242ac160002.txt
Extracting wbez-91.5-chicago-coronavirus-in-illinois_-d24854f4-9105-11ea-8d39-0242ac160002.txt
Extracting wonkette-the-pandemic-will-d21ebb26-9105-11ea-8d39-0242ac160002.txt
Extracting wfaa.com-john-cho-pens-d239c0e2-9105-11ea-8d39-0242ac160002.txt
Extracting naked-capitalism-inequality-and-the-d384d70c-9105-11ea-8d39-0242ac160002.txt
Extracting wbez-91.5-chicago-coronavirus-in-illinois_-d30b7b00-9105-11ea-8d39-0242ac160002.txt
Extracting msn-south-africa-chinese-official_-claims-d2e3a030-

Extracting netindia123.com-john-cho-on-d1edfbe4-9105-11ea-8d39-0242ac160002.txt
Extracting colorado-springs-gazette-racial-discrimination-against-d1ffb6f4-9105-11ea-8d39-0242ac160002.txt
Extracting japan-today-from-guns-to-d3253144-9105-11ea-8d39-0242ac160002.txt
Extracting downwithtyranny_-midnight-meme-of-d2dbe138-9105-11ea-8d39-0242ac160002.txt
Extracting informed-comment-inequality-and-the-d3a9961e-9105-11ea-8d39-0242ac160002.txt
Extracting china.org.cn-lin-pledges-up-d32886dc-9105-11ea-8d39-0242ac160002.txt
Extracting taxi-lululemon-gets-fried-d232198c-9105-11ea-8d39-0242ac160002.txt
Extracting alternet.org-how-the-language-d361cf3c-9105-11ea-8d39-0242ac160002.txt
Extracting cbs-chicago.com-‘worried-about-disproportionate-d31b3766-9105-11ea-8d39-0242ac160002.txt
Extracting business-insider-india-_the-country-is-d392fe36-9105-11ea-8d39-0242ac160002.txt
Extracting blaze.com-clothing-retailer-fires-d3150210-9105-11ea-8d39-0242ac160002.txt
Extracting abc-7-news-tv-check-in-with-d2ca0e

Extracting newstex-blogs-newsbusters.org-pbs-anchor-nawaz-d47184c6-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-futurity.org-these-words-and-d4043b82-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-foreign-policy-after-the-coronavirus_-d43276dc-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-newsbusters.org-washpost-cites-soros-affiliated-d43ac7d8-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-axios-the-coronavirus-is-d421b45a-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-the-huffington-post-in-netflix_s-_the-d47b74d6-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-talking-points-memo-gopers-intro-bill-d4476524-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-phil_s-stock-world-lululemon-fires-art-d44b970c-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-law-at-the-end-of-the-day-register-now-for-d4790f7a-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-the-business-insider-senate-democrats-call-d4636c4

Extracting daily-targum_-rutgers-university-coronavirus-pandemic-should-d4d6df38-9105-11ea-8d39-0242ac160002.txt
Extracting the-johns-hopkins-news-letter_-johns-hopkins-university-oma-hosts-virtual-d4dc9d6a-9105-11ea-8d39-0242ac160002.txt
Extracting the-horizon_-westmont-college-what_s-in-a-d4f35f00-9105-11ea-8d39-0242ac160002.txt
Extracting university-times_-california-state-university---los-angeles-coronavirus-and-racism_-d49c9828-9105-11ea-8d39-0242ac160002.txt
Extracting pipe-dream_-suny-at-binghamton-_what-do-i-d4e8803a-9105-11ea-8d39-0242ac160002.txt
Extracting daily-trojan_-university-of-southern-california-discrimination-against-asian-d512b094-9105-11ea-8d39-0242ac160002.txt
Extracting arches_-mount-mary-college-asian-american-students-d4b91ae8-9105-11ea-8d39-0242ac160002.txt
Extracting targeted-news-service-advancing-justice-_-d556e782-9105-11ea-8d39-0242ac160002.txt
Extracting targeted-news-service-sen.-cantwell_-colleagues-d5808a10-9105-11ea-8d39-0242ac160002.txt
Extracting 

Extracting states-news-service-paul-robeson-cultural-d5e84510-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-advocate-in-place-d5cefccc-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-rosen-statement-on-d5bb7576-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-let_s-stop-the-d5cbcd9a-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-trump-adds-to-d5e6737a-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-aclu-calls-on-d59f7b46-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-in-covid-19-crisis_-d5de02f8-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-rosen-calls-on-d5c45cc2-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-emily_s-list-recognizes-d5d9279c-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-new-cap-report-d5c18ab0-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-coronavirus-and-stigma_-d5965836-9105-11ea-8d39-0242ac160002.txt
Extracti

Extracting us-fed-news-murphy_-senate-dems-d77501d4-9105-11ea-8d39-0242ac160002.txt
Extracting us-fed-news-cantwell_-colleagues-call-d7819b24-9105-11ea-8d39-0242ac160002.txt
Extracting us-fed-news-van-hollen_-senators-d764a686-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d79fb654-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d7962f62-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d78f46b6-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d79879de-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d78cfa00-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d78abfba-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d79d6db8-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d79ac37e-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinfo-united-states-_-d7913228-9105-11ea-8d39-0242ac160002.txt
Extracting tendersinf

**All-time**

In [20]:
ZIPFILE_NAMES= ['christyq-b862-s2020-04-20.zip','christyq-b863-s2020-04-20.zip','christyq-b864-s2020-04-20.zip','christyq-b902-s2020-05-08.zip']

if not os.path.exists('data'):
    os.makedirs('data/text_all')
    
manifest_data=[]

text_file_cnt=0

for file in ZIPFILE_NAMES:
    with zipfile.ZipFile(file) as zf:
        for f in zf.filelist:
            if f.filename.count('plaintext')>0:
                fn=os.path.basename(f.filename)
                print('Extracting', fn)
                with open(os.path.join('data','text_all',fn),'wb') as out:
                    zipfile.shutil.copyfileobj(zf.open(f),out)
                    text_file_cnt+=1
                
            if f.filename.endswith('.csv'):
                mdf = pd.read_csv(zf.open(f))
                manifest_data.append(mdf)
            
    mdf=pd.concat(manifest_data)
    mdf.to_json('data/corpus_index_all.json', orient='records')

Extracting cbs-news-uc-berkeley-deletes-770ecfdc-8629-11ea-8212-0242ac160002.txt
Extracting miami-herald-uc-berkeley-apologizes-77182bea-8629-11ea-8212-0242ac160002.txt
Extracting greenville-news-coronavirus-spreads-racism_-7713f034-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-this-lunar-new-772bdfa0-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-this-lunar-new-77261a02-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-nation-blogs-a-fire-may-77204be0-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-jeff-lewis-apologizes-7736b4f2-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-university-of-california-773310fe-8629-11ea-8212-0242ac160002.txt
Extracting ktvu-coronavirus-scare-affecting-7762fc06-8629-11ea-8212-0242ac160002.txt
Extracting newsy-coronavirus-outbreak-reportedly-775c0838-8629-11ea-8212-0242ac160002.txt
Extracting forbes.com-how-covid-19-coronavirus-77e7ba04-8629-11ea-8212-0242ac160002.txt
Extractin

Extracting the-easterner_-eastern-washington-university-racism-isn_t-a-7978149a-8629-11ea-8212-0242ac160002.txt
Extracting daily-trojan_-university-of-southern-california-as-coronavirus-grows-79893bf8-8629-11ea-8212-0242ac160002.txt
Extracting daily-princetonian_-princeton-university-racism-and-the-79738e02-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-the-coronavirus-and-79939bfc-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-on-america_s-college-7995d82c-8629-11ea-8212-0242ac160002.txt
Extracting washington-post-blogs-the-2003-sars-798fe9bc-8629-11ea-8212-0242ac160002.txt
Extracting ce-noticias-financieras-english-racism-and-the-7999e5e8-8629-11ea-8212-0242ac160002.txt
Extracting china-daily---us-edition-racial-prejudice-rears-79a5de8e-8629-11ea-8212-0242ac160002.txt
Extracting china-daily-european-edition-asians-battle-outbreak-79b281a2-8629-11ea-8212-0242ac160002.txt
Extracting financial-times-(london_-england)-coronavirus-makes-_sniffling-79bad

Extracting greenville-news-trump-distances-himself-3f03ecf8-8629-11ea-8212-0242ac160002.txt
Extracting wamu88.5-new-site-collects-422e6656-8629-11ea-8212-0242ac160002.txt
Extracting salt-lake-tribune-6-news-live-3ec07838-8629-11ea-8212-0242ac160002.txt
Extracting foreign-affairs.co.nz-mil-osi-global_-anti-asian-41d8d6d2-8629-11ea-8212-0242ac160002.txt
Extracting foreign-affairs.co.nz-mil-osi-usa_-meng-4280c798-8629-11ea-8212-0242ac160002.txt
Extracting cbs-boston-‘not-a-time-40955192-8629-11ea-8212-0242ac160002.txt
Extracting daily-beast-oan-reporter-asks-3ed122c8-8629-11ea-8212-0242ac160002.txt
Extracting sina-spotlight_-u.s.-politicians-3f1a6e56-8629-11ea-8212-0242ac160002.txt
Extracting politico-bloomberg-weighs-dropping-3fc556c2-8629-11ea-8212-0242ac160002.txt
Extracting mothership.sg-jeremy-lin-calls-41dd03ce-8629-11ea-8212-0242ac160002.txt
Extracting wcvb.com-asian-americans-call-on-42cb50ba-8629-11ea-8212-0242ac160002.txt
Extracting kdfw-fox-4-news-trump-dubs-covid-19-41b3075e-8

Extracting business-insider-india-a-virus-originating-421f8d02-8629-11ea-8212-0242ac160002.txt
Extracting firstpost.com-coronavirus-outbreak-live-3e4adac4-8629-11ea-8212-0242ac160002.txt
Extracting foreign-affairs.co.nz-mil-osi-global_-anti-asian-430fe0d6-8629-11ea-8212-0242ac160002.txt
Extracting cbs-news-nyc-subway-rider-3e6ff5de-8629-11ea-8212-0242ac160002.txt
Extracting daily-beast-tucker_-trump-at-3f0c12d4-8629-11ea-8212-0242ac160002.txt
Extracting the-hill-asian-american-lawmaker-warns-3e929f12-8629-11ea-8212-0242ac160002.txt
Extracting stltoday.com-letter_-coronavirus-hysteria-42f19216-8629-11ea-8212-0242ac160002.txt
Extracting kunm-new-site-collects-4313f158-8629-11ea-8212-0242ac160002.txt
Extracting alternet.org-sociology-professor_-‘people-4217a510-8629-11ea-8212-0242ac160002.txt
Extracting conversation-au-anti-asian-racism-during-42a8031c-8629-11ea-8212-0242ac160002.txt
Extracting cnn-women-trailblazers-who-4139810e-8629-11ea-8212-0242ac160002.txt
Extracting yahoo_-singapore

Extracting newstex-blogs-international-business-times-news-lana-condor-responds-43d1f8a6-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-mediaite-asian-american-journalist-says-44c9547a-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-multicultclassics-14961_-pride-train-452a492e-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-huffington-post-for-asian-americans_-43c5fbfa-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-economist_-prospero-the-power-of-44a42e66-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-capitol-fax-blog-please_-don_t-be-44571518-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-business-insider-dr.-fauci-said-43be52f6-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-international-business-times-news-racist-acts-surge-447dafb6-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-the-huffington-post-self-care-tips-for-437849be-8629-11ea-8212-0242ac160002.txt
Extracting newstex-blogs-newsbu

Extracting targeted-news-service-n.y.-a.g.-james-4645386e-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-council-on-american-islamic-4633fee6-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-council-on-american-islamic-458eafa4-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-grand-valley-state-4579bd10-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-new-york-immigration-4584c1ec-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-southern-poverty-law-45ec6428-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-chairman-jeffries-on-45e776e8-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-japanese-american-citizens-4600f1cc-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-marymount-manhattan-college_-456b0694-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-oca-and-over-45dc851c-8629-11ea-8212-0242ac160002.txt
Extracting targeted-news-service-new-

Extracting state-capital-newsfeed-newsbusters.org-msnbc-relieved-as-481c001e-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-video_-media-suffer-48252716-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-new-york-times_-482b6b76-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-nightline-blames-trump-48110d9e-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-ironic_-ny-times-47d659c4-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-blue-virginia-sen.-mark-warner-47e7227c-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-florida-politics-donald-trump-dubs-47dcfd4c-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-the-capitol-fax-blog(-illinois)-please_-don_t-be-483bcfe8-8629-11ea-8212-0242ac160002.txt
Extracting state-capital-newsfeed-western-free-press(-arizona)-trump_s-retort-to-480bcabe-8629-11ea-8212-0242ac

Extracting mailonline-california-gun-store-4ae22bac-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-daniel-dae-kim_-4ad182ac-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-_why-not-call-4af0e156-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-fox-news-host-4ad8db2e-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-michael-bane_s-coronavirus-4add92fe-8629-11ea-8212-0242ac160002.txt
Extracting mailonline-daniel-dae-kim-4ae6d396-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-trump-tweeted-about-4b28178e-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-trump-tweeted-about-4b15f2e8-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-trump_s-continued-use-4b230b4a-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-trump-tweeted-about-4b1f036a-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-shree-paradkar_-_you-4b19f064-8629-11ea-8212-0242ac160002.txt
Extracting thestar.com-trump-tweeted-about-4b10ec76-8629-11ea-8212-0242ac160002.txt
Extracti

Extracting yahoo_-singapore-‘master-of-none’-d2ea03bc-9105-11ea-8d39-0242ac160002.txt
Extracting tubefilter-warner-bros.’-digital-d2450e0c-9105-11ea-8d39-0242ac160002.txt
Extracting business-standard-india-covid-19-pandemic-reminding-d37a7244-9105-11ea-8d39-0242ac160002.txt
Extracting dcist-graffiti-with-anti-chinese-d24a290a-9105-11ea-8d39-0242ac160002.txt
Extracting elder-of-ziyon---israel-news-04_07-links-pt2_-d222bfe6-9105-11ea-8d39-0242ac160002.txt
Extracting sangai-express-stop-discrimination-and-d3a1bb6a-9105-11ea-8d39-0242ac160002.txt
Extracting outlook-india-john-cho-on-d28dbf1c-9105-11ea-8d39-0242ac160002.txt
Extracting wls-am-unemployment-claims-from-d1da78f8-9105-11ea-8d39-0242ac160002.txt
Extracting yahoo_-singapore-racist-trump-ad-d201c5de-9105-11ea-8d39-0242ac160002.txt
Extracting east-bay-times-‘race-&-coronavirus_-d349605a-9105-11ea-8d39-0242ac160002.txt
Extracting nbc-philadelphia-asian-americans-report-d3639f2e-9105-11ea-8d39-0242ac160002.txt
Extracting yahoo_-singap

Extracting abc-7-news-tv-coronavirus-and-racism_-d3a834ae-9105-11ea-8d39-0242ac160002.txt
Extracting netindia123.com-john-cho-on-d1edfbe4-9105-11ea-8d39-0242ac160002.txt
Extracting colorado-springs-gazette-racial-discrimination-against-d1ffb6f4-9105-11ea-8d39-0242ac160002.txt
Extracting japan-today-from-guns-to-d3253144-9105-11ea-8d39-0242ac160002.txt
Extracting downwithtyranny_-midnight-meme-of-d2dbe138-9105-11ea-8d39-0242ac160002.txt
Extracting informed-comment-inequality-and-the-d3a9961e-9105-11ea-8d39-0242ac160002.txt
Extracting china.org.cn-lin-pledges-up-d32886dc-9105-11ea-8d39-0242ac160002.txt
Extracting taxi-lululemon-gets-fried-d232198c-9105-11ea-8d39-0242ac160002.txt
Extracting alternet.org-how-the-language-d361cf3c-9105-11ea-8d39-0242ac160002.txt
Extracting cbs-chicago.com-‘worried-about-disproportionate-d31b3766-9105-11ea-8d39-0242ac160002.txt
Extracting business-insider-india-_the-country-is-d392fe36-9105-11ea-8d39-0242ac160002.txt
Extracting blaze.com-clothing-retailer-fi

Extracting newstex-blogs-talking-points-memo-gopers-intro-bill-d4476524-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-phil_s-stock-world-lululemon-fires-art-d44b970c-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-law-at-the-end-of-the-day-register-now-for-d4790f7a-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-the-business-insider-senate-democrats-call-d4636c42-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-moonbattery-asian-cultural-marxists-d4883da6-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-saportareport-dealing-with-xenophobia-d46f735c-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-the-business-insider-activists-fighting-coronavirus-driven-d409a072-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-medical-xpress-asian-americans-already-d4134c6c-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-the-huffington-post-teen-girls-attack-d429e1b6-9105-11ea-8d39-0242ac160002.txt
Extracting newstex-blogs-marketbe

Extracting the-daily-campus_-university-of-connecticut-katsouleas-reflects-on-d4cdabac-9105-11ea-8d39-0242ac160002.txt
Extracting the-orion_-california-state-university---chico-dac-and-as-d4a4dd26-9105-11ea-8d39-0242ac160002.txt
Extracting the-miscellany-news_-vassar-college-legacy-of-activism_-d4c9ed14-9105-11ea-8d39-0242ac160002.txt
Extracting michigan-daily_-university-of-michigan-ann-arbor-_avatar__-a-series-d4f1ee22-9105-11ea-8d39-0242ac160002.txt
Extracting daily-targum_-rutgers-university-coronavirus-pandemic-should-d4d6df38-9105-11ea-8d39-0242ac160002.txt
Extracting the-johns-hopkins-news-letter_-johns-hopkins-university-oma-hosts-virtual-d4dc9d6a-9105-11ea-8d39-0242ac160002.txt
Extracting the-horizon_-westmont-college-what_s-in-a-d4f35f00-9105-11ea-8d39-0242ac160002.txt
Extracting university-times_-california-state-university---los-angeles-coronavirus-and-racism_-d49c9828-9105-11ea-8d39-0242ac160002.txt
Extracting pipe-dream_-suny-at-binghamton-_what-do-i-d4e8803a-9105-11ea-8d

Extracting states-news-service-rosen_-lankford-join-d5c5b63a-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-4-1-20_-president_s-update_-d5e140e4-9105-11ea-8d39-0242ac160002.txt
Extracting states-news-service-duckworth_-durbin_-democratic-d5d33bc0-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-asian-americans-still-have-d661e190-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-does-andrew-yang-d67e5b22-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-does-andrew-yang-d66ca9a4-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-when-asian-americans-have-d6447a74-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-the-slur-i-d65d5c6a-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-does-andrew-yang-d684257a-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-one-story-that-d64d280e-9105-11ea-8d39-0242ac160002.txt
Extracting the-new-york-times-crazy-serious-asian-american-d627a8cc-9105-11ea-

Extracting south-china-morning-post.com-chinese-american-nfl-star-d7ce7ea8-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-nbc-blames-trump-d7ec0a90-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-conwebblog(-virginia)-following-orders_-mrc-d7f28320-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-blue-virginia-the-covid-19-divide_-d7ee00de-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-pbs-anchor-nawaz-d7f001f4-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-saportareport(-georgia)-dealing-with-xenophobia-d7f878f2-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-iowa-center-for-public-affairs-reporting-asians-in-iowa-d7f5d610-9105-11ea-8d39-0242ac160002.txt
Extracting state-capital-newsfeed-newsbusters.org-washpost-cites-soros-affiliated-d7e9bd80-9105-11ea-8d39-0242ac160002.txt
Extracting china-daily-young-people-share-d808b712-9105-11ea-8d39-0242a