Load API key from .env

In [60]:
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv("API_KEY")


Request items from API

In [61]:

import requests
import json

URL = "https://drvk.createuky.net/news-articles/api/items"
r = requests.get(URL)


Grab descriptions from each item, clean them

In [62]:
import re

newline_pattern = re.compile(r'\n+')
html_pattern = re.compile(r'<.*?>')
nbsp_pattern = re.compile(r'&nbsp;')

def remove_tags(text):
    no_newlines = newline_pattern.sub('', text)
    no_tags = html_pattern.sub('', no_newlines)
    return nbsp_pattern.sub(' ', no_tags)

items = json.loads(r.text)

cleaned_text = []
for item in items:
    for text in item["element_texts"]:
        if text["element"]["name"] == "Description":
            cleaned = remove_tags(text["text"]).replace("\n", "")
            if len(cleaned): cleaned_text.append(cleaned)


Imports for frequency analysis

In [None]:
import nltk
from nltk import word_tokenize
from nltk.probability import FreqDist
from nltk.corpus import stopwords
nltk.download('punkt_tab')
nltk.download('punkt')
nltk.download("stopwords")

In [78]:
# join every cleaned description into a string 
text = " ".join(cleaned_text)

# tokenize
words = word_tokenize(text)
cleaned = []

# remove punctuation and stop words
stopwords_list = stopwords.words("english")
custom_stopwords = []
stopwords_list.extend(custom_stopwords)

for word in words:
    if word.isalpha() and word.lower() not in stopwords_list:
        cleaned.append(word.lower())


print(f"Total number of words is {len(cleaned)}")

fdist = FreqDist(cleaned)

for word, freq in fdist.most_common():
    print(word, freq)


Total number of words is 47554
mob 713
negro 627
men 441
jail 439
county 382
night 302
one 273
taken 241
morning 233
body 232
man 228
said 226
two 202
last 187
house 184
found 183
shot 178
near 178
lynching 177
clock 168
went 168
made 161
took 157
crime 156
would 151
jailer 149
death 148
hanged 146
time 143
home 140
town 135
lynched 133
sheriff 131
came 130
officers 124
three 123
several 122
white 122
door 120
tree 120
city 119
upon 118
sic 118
people 117
prisoner 116
miles 113
little 111
judge 108
place 106
jury 105
girl 103
west 103
court 101
kentucky 101
hanging 97
killed 95
arrested 95
away 94
years 94
john 93
rope 91
citizens 90
brought 89
left 89
could 88
murder 88
charged 86
ago 86
young 85
colored 85
negroes 84
special 82
fired 81
told 80
known 80
work 79
held 78
woman 78
miss 77
another 77
crowd 76
placed 74
back 74
returned 74
hands 74
called 73
trial 72
keys 71
yesterday 71
dead 71
masked 71
cut 70
james 70
armed 69
coroner 69
way 68
first 68
law 68
day 68
old 68
street 67
t