In [None]:
import codecs
import copy
import csv
import gc
from itertools import chain
import os
import random
import re
from string import punctuation
from typing import Dict, List, Tuple, Union
import warnings

In [None]:
import catboost
import nltk
import numpy as np
from scipy.sparse import hstack
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import VarianceThreshold, SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_is_fitted
import spacy
from spacy.language import Language
from spacy.tokens import Doc
import torch
from tqdm.notebook import tqdm

In [None]:
NUMBERS = re.compile(r"\d")
WORD_DICT = {
    "0": 2,
    "1": 1,
    "2": 1,
    "3": 1,
    "4": 1,
    "5": 1,
    "6": 1,
    "7": 2,
    "8": 1,
    "9": 1,
    "10": 1,
    "11": 3,
    "12": 1,
    "13": 2,
    "14": 2,
    "15": 2,
    "16": 2,
    "17": 2,
    "18": 2,
    "19": 2,
    "20": 2,
    "30": 2,
    "40": 2,
    "50": 2,
    "60": 2,
    "70": 3,
    "80": 2,
    "90": 2,
    "100": 3,
    "a": 1,
    "aaron": 2,
    "abandon": 3,
    "ability": 4,
    "able": 2,
    "aboard": 2,
    "abortion": 3,
    "about": 2,
    "above": 2,
    "abroad": 2,
    "absence": 2,
    "absolute": 3,
    "absolutely": 4,
    "absorb": 2,
    "abstract": 2,
    "abuse": 2,
    "academic": 4,
    "academy": 4,
    "accelerate": 4,
    "accent": 2,
    "accept": 2,
    "acceptable": 4,
    "acceptance": 3,
    "accepted": 3,
    "access": 2,
    "accessibility": 6,
    "accessible": 4,
    "accessories": 4,
    "accessory": 4,
    "accident": 3,
    "accommodate": 4,
    "accommodation": 5,
    "accompany": 4,
    "accomplish": 3,
    "accomplishment": 4,
    "accordance": 3,
    "according": 3,
    "account": 2,
    "accountability": 6,
    "accounting": 3,
    "accounts": 2,
    "accuracy": 4,
    "accurate": 3,
    "accurately": 3,
    "accusation": 4,
    "accuse": 2,
    "achieve": 2,
    "achieved": 2,
    "achievement": 3,
    "acid": 2,
    "acknowledge": 3,
    "acquire": 2,
    "acquired": 2,
    "acquisition": 4,
    "acres": 2,
    "acrobat": 3,
    "across": 2,
    "act": 1,
    "acting": 2,
    "action": 2,
    "actions": 2,
    "active": 2,
    "actively": 2,
    "activist": 3,
    "activities": 4,
    "activity": 4,
    "actor": 2,
    "actors": 2,
    "actress": 2,
    "acts": 1,
    "actual": 3,
    "actually": 4,
    "acute": 2,
    "ad": 1,
    "adam": 2,
    "adams": 2,
    "adapt": 2,
    "adapter": 3,
    "add": 1,
    "added": 1,
    "adding": 2,
    "addition": 3,
    "additional": 4,
    "additions": 3,
    "address": 2,
    "addressed": 2,
    "addresses": 3,
    "adds": 1,
    "adequate": 3,
    "adjective": 3,
    "adjust": 2,
    "adjustable": 4,
    "adjustment": 3,
    "admin": 2,
    "administer": 4,
    "administration": 5,
    "administrative": 5,
    "administrator": 5,
    "admire": 2,
    "admission": 3,
    "admit": 2,
    "adobe": 3,
    "adolescent": 4,
    "adopt": 2,
    "adopted": 3,
    "adoption": 3,
    "adult": 2,
    "advance": 2,
    "advanced": 2,
    "advantage": 3,
    "advantages": 4,
    "adventure": 3,
    "adverse": 2,
    "advertise": 3,
    "advertisement": 4,
    "advertising": 4,
    "advice": 2,
    "advise": 2,
    "adviser": 3,
    "advisor": 3,
    "advisory": 4,
    "advocate": 3,
    "aesthetic": 3,
    "affair": 2,
    "affect": 2,
    "affected": 3,
    "affiliate": 4,
    "afford": 2,
    "affordable": 4,
    "afghanistan": 4,
    "afraid": 2,
    "africa": 3,
    "african": 3,
    "african-american": 1,
    "after": 2,
    "afternoon": 3,
    "afterward": 3,
    "again": 2,
    "against": 2,
    "age": 1,
    "aged": 1,
    "agencies": 3,
    "agency": 3,
    "agenda": 3,
    "agent": 2,
    "ages": 2,
    "aggression": 3,
    "aggressive": 3,
    "aging": 2,
    "ago": 2,
    "agree": 2,
    "agreed": 2,
    "agreement": 3,
    "agricultural": 4,
    "agriculture": 4,
    "ah": 1,
    "ahead": 2,
    "aid": 1,
    "aide": 1,
    "aids": 1,
    "aim": 1,
    "aims": 1,
    "air": 1,
    "aircraft": 2,
    "airline": 2,
    "airplane": 2,
    "airport": 2,
    "aisle": 1,
    "alabama": 4,
    "alan": 2,
    "alarm": 2,
    "alaska": 3,
    "albany": 3,
    "albert": 2,
    "alberta": 3,
    "album": 2,
    "alcohol": 3,
    "alert": 2,
    "alex": 2,
    "alexander": 4,
    "algorithm": 3,
    "alice": 2,
    "alien": 2,
    "alike": 2,
    "alive": 2,
    "all": 1,
    "allegation": 4,
    "alleged": 2,
    "allegedly": 4,
    "allen": 2,
    "alley": 2,
    "alliance": 3,
    "allocated": 4,
    "allocation": 4,
    "allow": 2,
    "allowed": 2,
    "allowing": 3,
    "allows": 2,
    "ally": 2,
    "almost": 2,
    "alone": 2,
    "along": 2,
    "alongside": 3,
    "aloud": 2,
    "alpha": 2,
    "alphabet": 3,
    "alphabetical": 5,
    "already": 3,
    "also": 2,
    "alter": 2,
    "alternate": 3,
    "alternative": 4,
    "although": 2,
    "altogether": 4,
    "aluminum": 4,
    "alumni": 3,
    "always": 2,
    "am": 1,
    "amateur": 3,
    "amazing": 3,
    "amazon": 3,
    "ambassador": 4,
    "amber": 2,
    "ambition": 3,
    "ambitious": 3,
    "amendment": 3,
    "amendments": 3,
    "america": 4,
    "american": 4,
    "americans": 4,
    "amid": 2,
    "among": 2,
    "amount": 2,
    "amsterdam": 3,
    "an": 1,
    "anal": 2,
    "analog": 3,
    "analysis": 4,
    "analyst": 3,
    "analyze": 3,
    "ancestor": 3,
    "ancient": 2,
    "and": 1,
    "and\/or": 1,
    "anderson": 3,
    "andrew": 2,
    "andy": 2,
    "angel": 2,
    "angeles": 3,
    "anger": 2,
    "angle": 2,
    "angry": 2,
    "animal": 3,
    "animals": 3,
    "animated": 4,
    "animation": 4,
    "anime": 3,
    "ankle": 2,
    "ann": 1,
    "anna": 2,
    "anne": 1,
    "anniversary": 5,
    "annotation": 4,
    "announce": 2,
    "announced": 2,
    "announcement": 3,
    "annual": 3,
    "annually": 3,
    "anonymous": 4,
    "another": 3,
    "answer": 2,
    "antenna": 3,
    "anthony": 3,
    "anti": 2,
    "anticipate": 4,
    "antique": 2,
    "antonio": 4,
    "ants": 1,
    "anxiety": 4,
    "anxious": 2,
    "any": 2,
    "anybody": 4,
    "anymore": 3,
    "anyone": 3,
    "anything": 3,
    "anytime": 3,
    "anyway": 3,
    "anywhere": 3,
    "apache": 3,
    "apart": 2,
    "apartment": 3,
    "apologize": 4,
    "apology": 4,
    "apparel": 3,
    "apparent": 3,
    "apparently": 3,
    "appeal": 2,
    "appear": 2,
    "appearance": 3,
    "appendix": 3,
    "apple": 2,
    "appliances": 4,
    "applicable": 4,
    "applicant": 3,
    "application": 4,
    "applied": 2,
    "apply": 2,
    "applying": 3,
    "appoint": 2,
    "appointed": 3,
    "appointment": 3,
    "appreciate": 4,
    "appreciation": 5,
    "approach": 2,
    "appropriate": 4,
    "approval": 3,
    "approve": 2,
    "approved": 2,
    "approximately": 4,
    "april": 2,
    "arab": 2,
    "arcade": 2,
    "architect": 3,
    "architecture": 4,
    "archive": 2,
    "are": 1,
    "area": 3,
    "arena": 3,
    "argentina": 4,
    "argue": 2,
    "argument": 3,
    "arise": 2,
    "arizona": 4,
    "arkansas": 3,
    "arm": 1,
    "armed": 1,
    "arms": 1,
    "army": 2,
    "around": 2,
    "arrange": 2,
    "arrangement": 3,
    "array": 2,
    "arrest": 2,
    "arrival": 3,
    "arrive": 2,
    "arrived": 2,
    "arrow": 2,
    "art": 1,
    "arthur": 2,
    "article": 3,
    "articles": 3,
    "articulate": 4,
    "artifact": 3,
    "artificial": 4,
    "artist": 2,
    "artistic": 3,
    "arts": 1,
    "as": 1,
    "ash": 1,
    "asia": 2,
    "asian": 2,
    "aside": 2,
    "ask": 1,
    "asked": 1,
    "asking": 2,
    "asleep": 2,
    "aspect": 2,
    "ass": 1,
    "assault": 2,
    "assemble": 3,
    "assembly": 3,
    "assert": 2,
    "assess": 2,
    "assessment": 3,
    "asset": 2,
    "assets": 2,
    "assign": 2,
    "assignment": 3,
    "assist": 2,
    "assistance": 3,
    "assistant": 3,
    "associate": 4,
    "associated": 4,
    "association": 5,
    "assume": 2,
    "assumed": 2,
    "assumption": 3,
    "assurance": 3,
    "assure": 2,
    "astronomer": 4,
    "at": 1,
    "ate": 1,
    "athlete": 2,
    "athletic": 3,
    "athletics": 3,
    "atlanta": 3,
    "atlantic": 3,
    "atlas": 2,
    "atmosphere": 3,
    "atom": 2,
    "atomic": 3,
    "atop": 2,
    "attach": 2,
    "attached": 2,
    "attachment": 3,
    "attachments": 3,
    "attack": 2,
    "attempt": 2,
    "attend": 2,
    "attendance": 3,
    "attention": 3,
    "attitude": 3,
    "attorney": 3,
    "attorneys": 3,
    "attract": 2,
    "attraction": 3,
    "attractive": 3,
    "attribute": 3,
    "auction": 2,
    "auctions": 2,
    "audience": 3,
    "audio": 3,
    "audit": 2,
    "august": 2,
    "aunt": 1,
    "austin": 2,
    "australia": 3,
    "australian": 3,
    "austria": 3,
    "authentication": 5,
    "author": 2,
    "authorities": 4,
    "authority": 4,
    "authorize": 3,
    "authorized": 3,
    "authors": 2,
    "auto": 2,
    "automated": 4,
    "automatic": 4,
    "automatically": 6,
    "automation": 4,
    "automobile": 4,
    "automotive": 4,
    "autonomy": 4,
    "autumn": 2,
    "availability": 4,
    "available": 4,
    "avatar": 3,
    "avenue": 3,
    "average": 3,
    "aviation": 4,
    "avoid": 2,
    "await": 2,
    "awake": 2,
    "award": 2,
    "aware": 2,
    "awareness": 3,
    "away": 2,
    "awesome": 2,
    "awful": 2,
    "axis": 2,
    "babe": 1,
    "babes": 1,
    "babies": 2,
    "baby": 2,
    "bachelor": 3,
    "back": 1,
    "background": 2,
    "backup": 2,
    "backyard": 2,
    "bacteria": 4,
    "bad": 1,
    "badly": 2,
    "bag": 1,
    "bags": 1,
    "bake": 1,
    "baker": 2,
    "balance": 2,
    "balanced": 2,
    "ball": 1,
    "balloon": 2,
    "ballot": 2,
    "balls": 1,
    "baltimore": 3,
    "ban": 1,
    "banana": 3,
    "band": 1,
    "bands": 1,
    "bandwidth": 2,
    "bang": 1,
    "bank": 1,
    "banker": 2,
    "banking": 2,
    "bankruptcy": 3,
    "banks": 1,
    "banner": 2,
    "bar": 1,
    "barbara": 3,
    "bare": 1,
    "barely": 2,
    "bargain": 2,
    "bark": 1,
    "barn": 1,
    "barrel": 2,
    "barrier": 3,
    "barry": 2,
    "bars": 1,
    "base": 1,
    "baseball": 2,
    "based": 1,
    "basement": 2,
    "basic": 2,
    "basically": 4,
    "basis": 2,
    "basket": 2,
    "basketball": 3,
    "bass": 1,
    "bat": 1,
    "bath": 1,
    "bathroom": 2,
    "batteries": 3,
    "battery": 3,
    "battle": 2,
    "bay": 1,
    "be": 1,
    "beach": 1,
    "beam": 1,
    "bean": 1,
    "bear": 1,
    "beard": 1,
    "bears": 1,
    "beast": 1,
    "beat": 1,
    "beautiful": 3,
    "beauty": 2,
    "became": 2,
    "because": 2,
    "become": 2,
    "becoming": 3,
    "bed": 1,
    "bedroom": 2,
    "beds": 1,
    "bee": 1,
    "beef": 1,
    "been": 1,
    "beer": 1,
    "before": 2,
    "beg": 1,
    "began": 2,
    "begin": 2,
    "beginning": 3,
    "begun": 2,
    "behalf": 2,
    "behave": 2,
    "behavior": 3,
    "behavioral": 3,
    "behaviour": 3,
    "behind": 2,
    "being": 2,
    "belgium": 2,
    "belief": 2,
    "believe": 2,
    "believed": 2,
    "believes": 2,
    "bell": 1,
    "belly": 2,
    "belong": 2,
    "below": 2,
    "belt": 1,
    "ben": 1,
    "bench": 1,
    "bend": 1,
    "beneath": 2,
    "benefit": 3,
    "bent": 1,
    "berlin": 2,
    "beside": 2,
    "besides": 2,
    "best": 1,
    "bestiality": 5,
    "bet": 1,
    "beta": 2,
    "betsy": 2,
    "better": 2,
    "betting": 2,
    "between": 2,
    "beyond": 2,
    "bias": 2,
    "bible": 2,
    "bibliography": 5,
    "bicycle": 3,
    "bid": 1,
    "bidder": 2,
    "bidding": 2,
    "bids": 1,
    "big": 1,
    "bigger": 1,
    "biggest": 1,
    "bike": 1,
    "bikini": 3,
    "bill": 1,
    "billing": 2,
    "billion": 2,
    "bills": 1,
    "billy": 2,
    "binary": 3,
    "bind": 1,
    "binding": 2,
    "biography": 4,
    "biological": 5,
    "biology": 4,
    "bird": 1,
    "birds": 1,
    "birmingham": 3,
    "birth": 1,
    "birthday": 2,
    "bishop": 2,
    "bit": 1,
    "bite": 1,
    "bits": 1,
    "bitter": 2,
    "black": 1,
    "blackjack": 2,
    "blade": 1,
    "blame": 1,
    "blank": 1,
    "blanket": 2,
    "blast": 1,
    "blend": 1,
    "bless": 1,
    "blessing": 2,
    "blew": 1,
    "blind": 1,
    "blink": 1,
    "block": 1,
    "blocks": 1,
    "blog": 1,
    "blogger": 2,
    "blogging": 2,
    "blogs": 1,
    "blond": 1,
    "blonde": 1,
    "blood": 1,
    "bloody": 2,
    "blow": 1,
    "blowjob": 2,
    "blue": 1,
    "blues": 1,
    "blvd": 1,
    "board": 1,
    "boards": 1,
    "boast": 1,
    "boat": 1,
    "boats": 1,
    "bob": 1,
    "bodies": 2,
    "body": 2,
    "boil": 1,
    "bold": 1,
    "bolt": 1,
    "bomb": 1,
    "bombing": 1,
    "bond": 1,
    "bondage": 2,
    "bonds": 1,
    "bone": 1,
    "bonus": 2,
    "boobs": 1,
    "book": 1,
    "booking": 2,
    "bookmark": 2,
    "books": 1,
    "bookstore": 2,
    "boom": 1,
    "boost": 1,
    "boot": 1,
    "booth": 1,
    "boots": 1,
    "booty": 2,
    "border": 2,
    "boring": 2,
    "born": 1,
    "borrow": 2,
    "boss": 1,
    "boston": 2,
    "both": 1,
    "bother": 2,
    "bottle": 2,
    "bottom": 2,
    "bought": 1,
    "bounce": 1,
    "bound": 1,
    "boundary": 3,
    "bow": 1,
    "bowl": 1,
    "box": 1,
    "boxes": 2,
    "boy": 1,
    "boyfriend": 2,
    "boys": 1,
    "brain": 1,
    "brake": 1,
    "branch": 1,
    "brand": 1,
    "brands": 1,
    "brass": 1,
    "brave": 1,
    "brazil": 2,
    "bread": 1,
    "break": 1,
    "breakfast": 2,
    "breaking": 2,
    "breaks": 1,
    "breast": 1,
    "breasts": 1,
    "breath": 1,
    "breathe": 1,
    "breathing": 2,
    "breeze": 1,
    "brian": 2,
    "brick": 1,
    "bride": 1,
    "bridge": 1,
    "brief": 1,
    "briefly": 2,
    "bright": 1,
    "brilliant": 2,
    "bring": 1,
    "bringing": 2,
    "brings": 1,
    "bristol": 2,
    "britain": 2,
    "british": 2,
    "broad": 1,
    "broadband": 2,
    "broadcast": 2,
    "broadway": 2,
    "brochure": 2,
    "broke": 1,
    "broken": 2,
    "broker": 2,
    "bronze": 1,
    "brother": 2,
    "brought": 1,
    "brown": 1,
    "browse": 1,
    "browser": 2,
    "bruce": 1,
    "brush": 1,
    "brutal": 2,
    "bubble": 2,
    "buck": 1,
    "bucket": 2,
    "buddy": 2,
    "budget": 2,
    "buffalo": 3,
    "buffer": 2,
    "bug": 1,
    "bugs": 1,
    "build": 1,
    "builder": 2,
    "building": 2,
    "built": 1,
    "bulb": 1,
    "bulgaria": 4,
    "bulk": 1,
    "bull": 1,
    "bullet": 2,
    "bulletin": 3,
    "bunch": 1,
    "burden": 2,
    "bureau": 2,
    "buried": 2,
    "burn": 1,
    "burning": 2,
    "burst": 1,
    "bury": 2,
    "bus": 1,
    "bush": 1,
    "business": 2,
    "businesses": 3,
    "businessman": 3,
    "busty": 2,
    "busy": 2,
    "but": 1,
    "butt": 1,
    "butter": 2,
    "butterfly": 3,
    "button": 2,
    "buttons": 2,
    "buy": 1,
    "buyer": 2,
    "buying": 2,
    "by": 1,
    "bytes": 1,
    "cab": 1,
    "cabin": 2,
    "cabinet": 3,
    "cable": 2,
    "cache": 1,
    "cached": 1,
    "cafe": 2,
    "cage": 1,
    "cake": 1,
    "calculate": 3,
    "calculated": 4,
    "calculation": 4,
    "calculator": 4,
    "calendar": 3,
    "california": 4,
    "call": 1,
    "called": 1,
    "calling": 2,
    "calls": 1,
    "calm": 1,
    "cambridge": 2,
    "camcorder": 3,
    "came": 1,
    "camera": 3,
    "cameras": 3,
    "camp": 1,
    "campaign": 2,
    "campbell": 2,
    "camping": 2,
    "camps": 1,
    "campus": 2,
    "can": 1,
    "canada": 3,
    "canadian": 4,
    "canal": 2,
    "cancel": 2,
    "cancer": 2,
    "candidate": 3,
    "candle": 2,
    "candy": 2,
    "cannot": 2,
    "canon": 2,
    "canvas": 2,
    "canyon": 2,
    "cap": 1,
    "capabilities": 5,
    "capability": 5,
    "capable": 3,
    "capacity": 4,
    "cape": 1,
    "capital": 3,
    "caps": 1,
    "captain": 2,
    "capture": 2,
    "captured": 2,
    "car": 1,
    "carbohydrate": 4,
    "carbon": 2,
    "card": 1,
    "cards": 1,
    "care": 1,
    "career": 2,
    "careful": 2,
    "carefully": 2,
    "cargo": 2,
    "caribbean": 4,
    "carl": 1,
    "carlos": 2,
    "carol": 2,
    "carolina": 4,
    "carpet": 2,
    "carried": 2,
    "carrier": 3,
    "carrot": 2,
    "carry": 2,
    "carrying": 3,
    "cars": 1,
    "cart": 1,
    "carter": 2,
    "cartoon": 2,
    "cartridge": 2,
    "cartridges": 3,
    "carve": 1,
    "case": 1,
    "casey": 2,
    "cash": 1,
    "casino": 3,
    "casinos": 3,
    "cast": 1,
    "castle": 2,
    "casual": 3,
    "casualty": 4,
    "cat": 1,
    "catalog": 3,
    "catalogue": 3,
    "catch": 1,
    "categories": 4,
    "category": 4,
    "catholic": 3,
    "cats": 1,
    "cattle": 2,
    "caught": 1,
    "cause": 1,
    "caused": 1,
    "cave": 1,
    "cease": 1,
    "ceiling": 2,
    "celebrate": 3,
    "celebration": 4,
    "celebrity": 4,
    "cell": 1,
    "cells": 1,
    "cellular": 3,
    "cemetery": 4,
    "census": 2,
    "cent": 1,
    "center": 2,
    "central": 2,
    "centre": 2,
    "centres": 1,
    "century": 3,
    "ceo": 1,
    "ceremony": 4,
    "certain": 2,
    "certainly": 3,
    "certificate": 4,
    "certification": 5,
    "certified": 3,
    "chain": 1,
    "chair": 1,
    "chairman": 2,
    "chairs": 1,
    "challenge": 2,
    "chamber": 2,
    "champion": 3,
    "championship": 4,
    "chance": 1,
    "change": 1,
    "changed": 1,
    "changing": 1,
    "channel": 2,
    "chaos": 2,
    "chapter": 2,
    "char": 1,
    "character": 3,
    "characteristic": 5,
    "characteristics": 5,
    "characterize": 4,
    "characters": 3,
    "charge": 1,
    "charged": 1,
    "charger": 2,
    "charity": 3,
    "charles": 1,
    "charlie": 2,
    "charlotte": 2,
    "charm": 1,
    "chart": 1,
    "charter": 2,
    "charts": 1,
    "chase": 1,
    "chat": 1,
    "cheap": 1,
    "cheat": 1,
    "cheats": 1,
    "check": 1,
    "checked": 1,
    "checkout": 2,
    "checks": 1,
    "cheek": 1,
    "cheer": 1,
    "cheese": 1,
    "chef": 1,
    "chemical": 3,
    "chemistry": 3,
    "cherry": 2,
    "chest": 1,
    "chew": 1,
    "chicago": 3,
    "chicken": 2,
    "chief": 1,
    "child": 1,
    "childhood": 2,
    "children": 2,
    "chile": 2,
    "chill": 1,
    "chin": 1,
    "china": 2,
    "chinese": 2,
    "chip": 1,
    "chips": 1,
    "chocolate": 3,
    "choice": 1,
    "choices": 2,
    "cholesterol": 4,
    "choose": 1,
    "choosing": 2,
    "chop": 1,
    "chose": 1,
    "chosen": 2,
    "chris": 1,
    "christ": 1,
    "christian": 2,
    "christianity": 5,
    "christmas": 2,
    "christopher": 3,
    "chronic": 2,
    "chunk": 1,
    "church": 1,
    "cigarette": 3,
    "cincinnati": 4,
    "cinema": 3,
    "circle": 2,
    "circuit": 2,
    "circumstance": 3,
    "circumstances": 4,
    "circus": 2,
    "cisco": 2,
    "cite": 1,
    "cited": 2,
    "cities": 2,
    "citizen": 3,
    "citizenship": 4,
    "city": 2,
    "civic": 2,
    "civil": 2,
    "civilian": 3,
    "civilization": 5,
    "claim": 1,
    "claims": 1,
    "clark": 1,
    "class": 1,
    "classes": 2,
    "classic": 2,
    "classical": 3,
    "classification": 5,
    "classified": 3,
    "classify": 3,
    "classroom": 2,
    "clause": 1,
    "claws": 1,
    "clay": 1,
    "clean": 1,
    "cleaner": 2,
    "cleaning": 2,
    "clear": 1,
    "clearance": 2,
    "clearly": 2,
    "clerk": 1,
    "cleveland": 2,
    "click": 1,
    "clicking": 2,
    "client": 2,
    "cliff": 1,
    "climate": 2,
    "climb": 1,
    "climbing": 2,
    "cling": 1,
    "clinic": 2,
    "clinical": 3,
    "clinton": 2,
    "clip": 1,
    "clips": 1,
    "clock": 1,
    "close": 1,
    "closed": 1,
    "closely": 1,
    "closer": 2,
    "closest": 1,
    "closet": 2,
    "closing": 2,
    "cloth": 1,
    "clothes": 1,
    "clothing": 2,
    "cloud": 1,
    "cloudy": 2,
    "club": 1,
    "clubs": 1,
    "clue": 1,
    "cluster": 2,
    "coach": 1,
    "coal": 1,
    "coalition": 4,
    "coast": 1,
    "coastal": 2,
    "coat": 1,
    "cocaine": 2,
    "cock": 1,
    "cocks": 1,
    "code": 1,
    "codes": 1,
    "coding": 2,
    "coffee": 2,
    "cognitive": 3,
    "coin": 1,
    "coins": 1,
    "cold": 1,
    "collaboration": 5,
    "collapse": 2,
    "collar": 2,
    "colleague": 2,
    "collect": 2,
    "collected": 3,
    "collection": 3,
    "collective": 3,
    "collector": 3,
    "college": 2,
    "colleges": 3,
    "collins": 2,
    "colombia": 4,
    "colonial": 4,
    "colony": 3,
    "color": 2,
    "colorado": 4,
    "colorful": 3,
    "colors": 2,
    "colour": 2,
    "columbia": 4,
    "columbus": 3,
    "column": 2,
    "columnist": 3,
    "combat": 2,
    "combination": 4,
    "combine": 2,
    "combined": 2,
    "come": 1,
    "comedy": 3,
    "comes": 1,
    "comfort": 2,
    "comfortable": 4,
    "comic": 2,
    "coming": 2,
    "command": 2,
    "commander": 3,
    "comment": 2,
    "commentary": 4,
    "commerce": 2,
    "commercial": 3,
    "commission": 3,
    "commissioner": 4,
    "commit": 2,
    "commitment": 3,
    "committed": 3,
    "committee": 3,
    "commodity": 4,
    "common": 2,
    "commonly": 3,
    "commons": 2,
    "commonwealth": 3,
    "communicate": 4,
    "communication": 5,
    "communities": 4,
    "community": 4,
    "compact": 2,
    "companies": 3,
    "companion": 3,
    "company": 3,
    "comparable": 4,
    "compare": 2,
    "compared": 2,
    "comparison": 4,
    "compass": 2,
    "compatibility": 6,
    "compatible": 4,
    "compel": 2,
    "compelling": 3,
    "compensation": 4,
    "compete": 2,
    "competition": 4,
    "competitive": 4,
    "competitor": 4,
    "compilation": 4,
    "compiled": 2,
    "complain": 2,
    "complaint": 2,
    "complete": 2,
    "completed": 3,
    "completely": 2,
    "completion": 3,
    "complex": 2,
    "complexity": 4,
    "compliance": 3,
    "complicated": 4,
    "comply": 2,
    "component": 3,
    "compose": 2,
    "composed": 2,
    "composition": 4,
    "compound": 2,
    "comprehensive": 4,
    "comprise": 2,
    "compromise": 3,
    "computer": 3,
    "computing": 3,
    "concede": 2,
    "conceive": 2,
    "concentrate": 3,
    "concentration": 4,
    "concept": 2,
    "conception": 3,
    "concern": 2,
    "concerned": 2,
    "concerning": 3,
    "concert": 2,
    "conclude": 2,
    "conclusion": 3,
    "concrete": 2,
    "condemn": 2,
    "condition": 3,
    "conditioning": 4,
    "conduct": 2,
    "conference": 3,
    "confess": 2,
    "confession": 3,
    "confidence": 3,
    "confident": 3,
    "configuration": 5,
    "configure": 3,
    "confirm": 2,
    "confirmation": 4,
    "confirmed": 2,
    "conflict": 2,
    "confront": 2,
    "confrontation": 4,
    "confuse": 2,
    "confusion": 3,
    "congress": 2,
    "congressional": 4,
    "connect": 2,
    "connected": 3,
    "connecticut": 4,
    "connection": 3,
    "conscience": 2,
    "conscious": 2,
    "consciousness": 3,
    "consecutive": 4,
    "consensus": 3,
    "consent": 2,
    "consequence": 3,
    "consequences": 4,
    "consequently": 4,
    "conservation": 4,
    "conservative": 4,
    "consider": 3,
    "considerable": 5,
    "considerably": 5,
    "consideration": 5,
    "considered": 3,
    "considering": 4,
    "consist": 2,
    "consistent": 3,
    "consistently": 3,
    "console": 2,
    "consolidation": 5,
    "consonant": 3,
    "conspiracy": 4,
    "const": 1,
    "constant": 2,
    "constantly": 2,
    "constitute": 3,
    "constitution": 4,
    "constitutional": 5,
    "constraint": 2,
    "construct": 2,
    "construction": 3,
    "consult": 2,
    "consultant": 3,
    "consultation": 4,
    "consulting": 3,
    "consume": 2,
    "consumer": 3,
    "consumption": 3,
    "contact": 2,
    "contain": 2,
    "contained": 2,
    "container": 3,
    "contemplate": 3,
    "contemporary": 5,
    "contend": 2,
    "content": 2,
    "contents": 2,
    "contest": 2,
    "context": 2,
    "continent": 3,
    "continental": 4,
    "continue": 3,
    "continued": 3,
    "continuing": 3,
    "continuous": 4,
    "contract": 2,
    "contractor": 3,
    "contrast": 2,
    "contribute": 3,
    "contributed": 4,
    "contribution": 4,
    "contributor": 4,
    "control": 2,
    "controlled": 2,
    "controller": 3,
    "controversial": 4,
    "controversy": 4,
    "convened": 2,
    "convenience": 3,
    "convenient": 3,
    "convention": 3,
    "conventional": 4,
    "conversation": 4,
    "conversion": 3,
    "convert": 2,
    "converted": 3,
    "converter": 3,
    "convey": 2,
    "convict": 2,
    "conviction": 3,
    "convince": 2,
    "convinced": 2,
    "cook": 1,
    "cookie": 2,
    "cookies": 2,
    "cooking": 2,
    "cool": 1,
    "cooling": 2,
    "cooper": 2,
    "cooperate": 4,
    "cooperation": 5,
    "cooperative": 5,
    "coordinate": 4,
    "coordinator": 5,
    "cop": 1,
    "cope": 1,
    "copies": 2,
    "copper": 2,
    "copy": 2,
    "copyright": 3,
    "cord": 1,
    "core": 1,
    "corn": 1,
    "corner": 2,
    "corp": 1,
    "corporate": 3,
    "corporation": 4,
    "corps": 1,
    "correct": 2,
    "correction": 3,
    "correctly": 2,
    "correlation": 4,
    "correspondent": 4,
    "corresponding": 4,
    "corridor": 3,
    "corruption": 3,
    "cost": 1,
    "costa": 2,
    "costly": 2,
    "costs": 1,
    "costume": 2,
    "cottage": 2,
    "cotton": 2,
    "couch": 1,
    "could": 1,
    "council": 2,
    "counsel": 2,
    "counseling": 3,
    "counselor": 3,
    "count": 1,
    "counter": 2,
    "counterpart": 3,
    "counties": 2,
    "countries": 2,
    "country": 2,
    "counts": 1,
    "county": 2,
    "coup": 1,
    "couple": 2,
    "courage": 2,
    "course": 1,
    "court": 1,
    "courtroom": 2,
    "courts": 1,
    "cousin": 2,
    "cover": 2,
    "coverage": 3,
    "covered": 2,
    "covering": 3,
    "cow": 1,
    "cowboy": 2,
    "crack": 1,
    "craft": 1,
    "crafts": 1,
    "craig": 1,
    "crash": 1,
    "crawl": 1,
    "crazy": 2,
    "cream": 1,
    "create": 2,
    "created": 3,
    "creating": 3,
    "creation": 3,
    "creative": 3,
    "creativity": 5,
    "creator": 3,
    "creature": 2,
    "credibility": 5,
    "credit": 2,
    "creek": 1,
    "crew": 1,
    "cricket": 2,
    "crime": 1,
    "criminal": 3,
    "crisis": 2,
    "criteria": 4,
    "critic": 2,
    "critical": 3,
    "criticism": 3,
    "criticize": 3,
    "crop": 1,
    "cross": 1,
    "crowd": 1,
    "crowded": 2,
    "crown": 1,
    "crucial": 2,
    "cruel": 2,
    "cruise": 1,
    "crush": 1,
    "cry": 1,
    "crystal": 2,
    "cuba": 2,
    "cuban": 2,
    "cue": 1,
    "cultural": 3,
    "culture": 2,
    "cup": 1,
    "cure": 1,
    "curiosity": 5,
    "curious": 3,
    "currency": 3,
    "current": 2,
    "currently": 3,
    "curriculum": 4,
    "curtain": 2,
    "curve": 1,
    "custody": 3,
    "custom": 2,
    "customer": 3,
    "customize": 3,
    "customized": 3,
    "customs": 2,
    "cut": 1,
    "cute": 1,
    "cuts": 1,
    "cutting": 2,
    "cycle": 2,
    "cycling": 2,
    "cyprus": 2,
    "czech": 1,
    "dad": 1,
    "daily": 2,
    "dakota": 3,
    "dallas": 2,
    "dam": 1,
    "damage": 2,
    "damn": 1,
    "dan": 1,
    "dance": 1,
    "dancer": 2,
    "dancing": 1,
    "danger": 2,
    "dangerous": 3,
    "daniel": 2,
    "danny": 2,
    "dare": 1,
    "dark": 1,
    "darkness": 2,
    "data": 2,
    "database": 3,
    "date": 1,
    "dated": 2,
    "dates": 1,
    "dating": 2,
    "daughter": 2,
    "dave": 1,
    "david": 2,
    "davis": 2,
    "dawn": 1,
    "day": 1,
    "days": 1,
    "dead": 1,
    "deadline": 2,
    "deadly": 2,
    "deal": 1,
    "dealer": 2,
    "dealing": 2,
    "deals": 1,
    "dean": 1,
    "dear": 1,
    "death": 1,
    "deaths": 1,
    "debate": 2,
    "debris": 2,
    "debt": 1,
    "debut": 2,
    "decade": 2,
    "december": 3,
    "decent": 2,
    "decide": 2,
    "decided": 3,
    "decision": 3,
    "deck": 1,
    "declaration": 4,
    "declare": 2,
    "declared": 2,
    "decline": 2,
    "decor": 2,
    "decorate": 3,
    "decrease": 2,
    "dedicate": 3,
    "dedicated": 4,
    "deem": 1,
    "deep": 1,
    "deeply": 2,
    "deer": 1,
    "default": 2,
    "defeat": 2,
    "defence": 2,
    "defend": 2,
    "defendant": 3,
    "defender": 2,
    "defense": 2,
    "defensive": 3,
    "deficit": 3,
    "define": 2,
    "defined": 2,
    "definitely": 4,
    "definition": 4,
    "degree": 2,
    "delaware": 3,
    "delay": 2,
    "delete": 2,
    "deleted": 3,
    "deliberately": 4,
    "delicate": 3,
    "delight": 2,
    "deliver": 3,
    "delivery": 4,
    "dell": 1,
    "delta": 2,
    "deluxe": 2,
    "demand": 2,
    "demo": 2,
    "democracy": 4,
    "democrat": 3,
    "democratic": 4,
    "demographic": 4,
    "demonstrate": 3,
    "demonstrated": 4,
    "demonstration": 4,
    "denial": 3,
    "denied": 2,
    "denmark": 2,
    "dennis": 2,
    "dense": 1,
    "density": 3,
    "dental": 2,
    "denver": 2,
    "deny": 2,
    "depart": 2,
    "department": 3,
    "departure": 3,
    "depend": 2,
    "dependent": 3,
    "depending": 2,
    "depict": 2,
    "deploy": 2,
    "deposit": 3,
    "depressed": 2,
    "depression": 3,
    "depth": 1,
    "deputy": 3,
    "derive": 2,
    "derived": 2,
    "descend": 2,
    "describe": 2,
    "described": 2,
    "description": 3,
    "desert": 2,
    "deserve": 2,
    "design": 2,
    "designated": 4,
    "designed": 2,
    "designer": 3,
    "desire": 2,
    "desired": 2,
    "desk": 1,
    "desktop": 2,
    "desperate": 3,
    "desperately": 3,
    "despite": 2,
    "dessert": 2,
    "destination": 4,
    "destroy": 2,
    "destruction": 3,
    "detail": 2,
    "detailed": 2,
    "detect": 2,
    "detection": 3,
    "detective": 3,
    "determination": 5,
    "determine": 3,
    "determined": 3,
    "determining": 4,
    "detroit": 2,
    "devastating": 4,
    "develop": 3,
    "developed": 3,
    "developer": 4,
    "developing": 4,
    "development": 4,
    "developmental": 4,
    "device": 2,
    "devices": 3,
    "devil": 2,
    "devote": 2,
    "diabetes": 4,
    "diagnose": 3,
    "diagnosis": 4,
    "diagram": 3,
    "dial": 2,
    "dialogue": 3,
    "diameter": 4,
    "diamond": 2,
    "diary": 3,
    "dick": 1,
    "dictate": 2,
    "dictionary": 4,
    "did": 1,
    "die": 1,
    "died": 1,
    "diego": 3,
    "diesel": 2,
    "diet": 2,
    "diff": 1,
    "differ": 2,
    "difference": 3,
    "differences": 4,
    "different": 3,
    "differently": 3,
    "difficult": 3,
    "difficulty": 4,
    "dig": 1,
    "digest": 2,
    "digital": 3,
    "dignity": 3,
    "dildo": 2,
    "dilemma": 3,
    "dimension": 3,
    "diminish": 3,
    "dining": 1,
    "dinner": 2,
    "dip": 1,
    "diplomat": 3,
    "diplomatic": 4,
    "direct": 2,
    "directed": 3,
    "direction": 3,
    "directly": 3,
    "director": 3,
    "directories": 4,
    "directory": 4,
    "dirt": 1,
    "dirty": 2,
    "disabilities": 5,
    "disability": 5,
    "disabled": 3,
    "disagree": 3,
    "disappear": 3,
    "disappointed": 4,
    "disappointment": 4,
    "disaster": 3,
    "disc": 1,
    "discharge": 2,
    "discipline": 3,
    "disclaimer": 3,
    "disclose": 2,
    "disclosure": 3,
    "discount": 2,
    "discourage": 3,
    "discourse": 2,
    "discover": 3,
    "discovered": 3,
    "discovery": 4,
    "discrimination": 5,
    "discuss": 2,
    "discussed": 2,
    "discussion": 3,
    "discussions": 3,
    "disease": 2,
    "dish": 1,
    "disk": 1,
    "dismiss": 2,
    "disney": 2,
    "disorder": 3,
    "display": 2,
    "displayed": 2,
    "disposal": 3,
    "dispute": 2,
    "dissolve": 2,
    "distance": 2,
    "distant": 2,
    "distinct": 2,
    "distinction": 3,
    "distinctive": 3,
    "distinguish": 3,
    "distract": 2,
    "distribute": 3,
    "distributed": 4,
    "distribution": 4,
    "district": 2,
    "disturb": 2,
    "disturbing": 3,
    "diverse": 2,
    "diversity": 4,
    "divide": 2,
    "divided": 3,
    "divine": 2,
    "diving": 2,
    "division": 3,
    "divorce": 2,
    "dna": 1,
    "do": 1,
    "dock": 1,
    "doctor": 2,
    "doctrine": 2,
    "document": 3,
    "documentary": 5,
    "documentation": 5,
    "dodge": 1,
    "does": 1,
    "dog": 1,
    "dogs": 1,
    "doing": 2,
    "doll": 1,
    "dollar": 2,
    "dolls": 1,
    "domain": 2,
    "domestic": 3,
    "dominant": 3,
    "dominate": 3,
    "don": 1,
    "donald": 2,
    "donate": 2,
    "donation": 3,
    "done": 1,
    "donkey": 2,
    "donor": 2,
    "dont": 1,
    "door": 1,
    "doors": 1,
    "doorway": 2,
    "dose": 1,
    "dot": 1,
    "double": 2,
    "doubt": 1,
    "dough": 1,
    "douglas": 2,
    "down": 1,
    "download": 2,
    "downtown": 2,
    "dozen": 2,
    "dr.": 1,
    "draft": 1,
    "drag": 1,
    "dragon": 2,
    "drain": 1,
    "drama": 2,
    "dramatic": 3,
    "dramatically": 3,
    "draw": 1,
    "drawer": 2,
    "drawing": 2,
    "drawn": 1,
    "dream": 1,
    "dreams": 1,
    "dress": 1,
    "drew": 1,
    "dried": 1,
    "drift": 1,
    "drill": 1,
    "drink": 1,
    "drinking": 2,
    "drinks": 1,
    "drive": 1,
    "driven": 2,
    "driver": 2,
    "drivers": 2,
    "drives": 1,
    "driveway": 2,
    "driving": 2,
    "drop": 1,
    "dropped": 1,
    "drove": 1,
    "drown": 1,
    "drug": 1,
    "drugs": 1,
    "drum": 1,
    "drunk": 1,
    "dry": 1,
    "dual": 2,
    "dublin": 2,
    "duck": 1,
    "due": 1,
    "dug": 1,
    "duke": 1,
    "dull": 1,
    "dumb": 1,
    "dump": 1,
    "duration": 3,
    "durham": 2,
    "during": 2,
    "dust": 1,
    "dutch": 1,
    "duties": 2,
    "duty": 2,
    "dvds": 3,
    "dying": 2,
    "dynamic": 3,
    "dynamics": 3,
    "e-mail": 2,
    "each": 1,
    "eager": 2,
    "eagle": 2,
    "ear": 1,
    "earlier": 2,
    "early": 2,
    "earn": 1,
    "earned": 1,
    "earnings": 2,
    "earth": 1,
    "earthquake": 2,
    "ease": 1,
    "easier": 3,
    "easily": 3,
    "east": 1,
    "eastern": 2,
    "easy": 2,
    "eat": 1,
    "eaten": 2,
    "eating": 2,
    "ebay": 2,
    "ebony": 3,
    "echo": 2,
    "ecological": 4,
    "economic": 4,
    "economically": 6,
    "economics": 4,
    "economist": 4,
    "economy": 4,
    "ecosystem": 4,
    "eddy": 2,
    "edge": 1,
    "edinburgh": 3,
    "edit": 2,
    "edited": 3,
    "edition": 3,
    "editor": 3,
    "editorial": 5,
    "educate": 3,
    "education": 4,
    "educational": 5,
    "educator": 4,
    "edward": 2,
    "effect": 2,
    "effective": 3,
    "effectively": 3,
    "effectiveness": 3,
    "effects": 2,
    "efficiency": 4,
    "efficient": 3,
    "effort": 2,
    "egg": 1,
    "ego": 2,
    "egypt": 2,
    "eight": 1,
    "eighth": 1,
    "either": 2,
    "elaborate": 4,
    "elbow": 2,
    "elder": 2,
    "elderly": 3,
    "elect": 2,
    "elected": 3,
    "election": 3,
    "electric": 3,
    "electrical": 4,
    "electricity": 5,
    "electronic": 4,
    "electronics": 4,
    "elegant": 3,
    "element": 3,
    "elementary": 5,
    "elephant": 3,
    "elevator": 4,
    "eleven": 3,
    "eligibility": 6,
    "eligible": 4,
    "eliminate": 4,
    "elite": 2,
    "elizabeth": 4,
    "ellen": 2,
    "else": 1,
    "elsewhere": 2,
    "email": 2,
    "emails": 2,
    "embarrassed": 3,
    "embedded": 3,
    "embrace": 2,
    "emerge": 2,
    "emergency": 4,
    "emerging": 3,
    "emission": 3,
    "emotion": 3,
    "emotional": 4,
    "emotionally": 4,
    "emphasis": 3,
    "emphasize": 3,
    "empire": 2,
    "employ": 2,
    "employed": 2,
    "employee": 3,
    "employees": 3,
    "employer": 3,
    "employers": 3,
    "employment": 3,
    "empty": 2,
    "enable": 3,
    "enabled": 3,
    "enact": 2,
    "encounter": 3,
    "encourage": 3,
    "encouraged": 3,
    "encouraging": 3,
    "encyclopedia": 6,
    "end": 1,
    "ending": 2,
    "endless": 2,
    "endorse": 2,
    "ends": 1,
    "endure": 2,
    "enemy": 3,
    "energy": 3,
    "enforce": 2,
    "enforcement": 3,
    "engage": 2,
    "engaged": 2,
    "engagement": 3,
    "engine": 2,
    "engineer": 3,
    "engineering": 4,
    "england": 2,
    "english": 2,
    "enhance": 2,
    "enhanced": 2,
    "enjoy": 2,
    "enjoyed": 2,
    "enlarge": 2,
    "enormous": 3,
    "enough": 2,
    "enroll": 2,
    "enrollment": 3,
    "ensure": 2,
    "enter": 2,
    "enterprise": 3,
    "entertainment": 4,
    "enthusiasm": 4,
    "entire": 2,
    "entirely": 3,
    "entities": 3,
    "entitle": 3,
    "entitled": 3,
    "entity": 3,
    "entrance": 2,
    "entrepreneur": 4,
    "entries": 2,
    "entry": 2,
    "envelope": 3,
    "environment": 4,
    "environmental": 4,
    "envision": 3,
    "epidemic": 4,
    "episode": 3,
    "equal": 2,
    "equality": 4,
    "equally": 3,
    "equation": 3,
    "equator": 3,
    "equip": 2,
    "equipment": 3,
    "equipped": 2,
    "equity": 3,
    "equivalent": 4,
    "era": 2,
    "eric": 2,
    "ericsson": 3,
    "erotic": 3,
    "error": 2,
    "escape": 2,
    "especially": 4,
    "essay": 2,
    "essence": 2,
    "essential": 3,
    "essentially": 3,
    "establish": 3,
    "established": 3,
    "establishment": 4,
    "estate": 2,
    "estimate": 3,
    "estimated": 3,
    "etc": 1,
    "etc.": 1,
    "ethernet": 3,
    "ethical": 3,
    "ethics": 2,
    "ethnic": 2,
    "euro": 2,
    "europe": 2,
    "european": 4,
    "evaluate": 4,
    "evaluation": 5,
    "evans": 2,
    "even": 2,
    "evening": 2,
    "event": 2,
    "eventually": 5,
    "ever": 2,
    "every": 2,
    "everybody": 4,
    "everyday": 3,
    "everyone": 3,
    "everything": 3,
    "everywhere": 3,
    "evidence": 3,
    "evident": 3,
    "evil": 2,
    "evolution": 4,
    "evolve": 2,
    "exact": 2,
    "exactly": 3,
    "exam": 2,
    "examination": 5,
    "examine": 3,
    "example": 3,
    "exceed": 2,
    "excel": 2,
    "excellence": 3,
    "excellent": 3,
    "except": 2,
    "exception": 3,
    "excerpt": 2,
    "excess": 2,
    "excessive": 3,
    "exchange": 2,
    "excited": 3,
    "excitement": 3,
    "exciting": 3,
    "exclaimed": 2,
    "exclude": 2,
    "exclusive": 3,
    "exclusively": 3,
    "excuse": 2,
    "execute": 3,
    "execution": 4,
    "executive": 4,
    "exercise": 3,
    "exhaust": 2,
    "exhibit": 3,
    "exhibition": 4,
    "exist": 2,
    "existence": 3,
    "existing": 2,
    "exit": 2,
    "exotic": 3,
    "expand": 2,
    "expanded": 3,
    "expansion": 3,
    "expect": 2,
    "expectation": 4,
    "expectations": 4,
    "expected": 2,
    "expedition": 4,
    "expense": 2,
    "expensive": 3,
    "experience": 4,
    "experienced": 4,
    "experiences": 5,
    "experiment": 4,
    "experimental": 5,
    "expert": 2,
    "expertise": 3,
    "explain": 2,
    "explanation": 4,
    "explicit": 3,
    "explode": 2,
    "exploit": 2,
    "exploration": 4,
    "explore": 2,
    "explorer": 3,
    "explosion": 3,
    "export": 2,
    "expose": 2,
    "exposed": 2,
    "exposure": 3,
    "express": 2,
    "expression": 3,
    "extend": 2,
    "extended": 3,
    "extension": 3,
    "extensive": 3,
    "extent": 2,
    "external": 3,
    "extra": 2,
    "extraordinary": 5,
    "extreme": 2,
    "extremely": 3,
    "eye": 1,
    "eyebrow": 2,
    "eyes": 1,
    "fabric": 2,
    "face": 1,
    "facial": 2,
    "facilitate": 4,
    "facilities": 4,
    "facility": 4,
    "facing": 2,
    "fact": 1,
    "factor": 2,
    "factory": 3,
    "facts": 1,
    "faculty": 3,
    "fade": 1,
    "fail": 1,
    "failed": 1,
    "fails": 1,
    "failure": 2,
    "faint": 1,
    "fair": 1,
    "fairly": 2,
    "faith": 1,
    "faiths": 1,
    "fall": 1,
    "fallen": 2,
    "falling": 2,
    "falls": 1,
    "false": 1,
    "falsehood": 2,
    "falsehoods": 2,
    "fame": 1,
    "familiar": 3,
    "families": 3,
    "family": 3,
    "famous": 2,
    "fan": 1,
    "fans": 1,
    "fantastic": 3,
    "fantasy": 3,
    "faqs": 3,
    "far": 1,
    "fare": 1,
    "farm": 1,
    "farmer": 2,
    "farther": 2,
    "fascinating": 4,
    "fashion": 2,
    "fast": 1,
    "fastened": 2,
    "faster": 1,
    "fat": 1,
    "fatal": 2,
    "fate": 1,
    "father": 2,
    "fatigue": 2,
    "fault": 1,
    "favor": 2,
    "favorable": 4,
    "favorite": 3,
    "favourite": 3,
    "fear": 1,
    "feather": 2,
    "feathers": 2,
    "feature": 2,
    "featured": 2,
    "featuring": 3,
    "february": 4,
    "fed": 1,
    "federal": 3,
    "federation": 4,
    "fee": 1,
    "feed": 1,
    "feedback": 2,
    "feeding": 2,
    "feeds": 1,
    "feel": 1,
    "feeling": 2,
    "feels": 1,
    "fees": 1,
    "feet": 1,
    "fell": 1,
    "fellow": 2,
    "felt": 1,
    "female": 2,
    "feminist": 3,
    "fence": 1,
    "festival": 3,
    "fetish": 2,
    "fever": 2,
    "few": 1,
    "fewer": 2,
    "fiber": 2,
    "fiction": 2,
    "field": 1,
    "fields": 1,
    "fierce": 1,
    "fifteen": 2,
    "fifth": 1,
    "fifty": 2,
    "fight": 1,
    "fighter": 2,
    "fighting": 2,
    "figure": 2,
    "file": 1,
    "filed": 1,
    "files": 1,
    "filing": 2,
    "fill": 1,
    "filled": 1,
    "film": 1,
    "films": 1,
    "filter": 2,
    "filters": 2,
    "final": 2,
    "finally": 3,
    "finance": 2,
    "financial": 3,
    "financing": 3,
    "find": 1,
    "finder": 2,
    "finding": 2,
    "finds": 1,
    "fine": 1,
    "finest": 2,
    "finger": 2,
    "finish": 2,
    "finished": 2,
    "finland": 2,
    "fire": 1,
    "firefox": 2,
    "fireplace": 2,
    "firewall": 2,
    "firm": 1,
    "firmly": 1,
    "firms": 1,
    "first": 1,
    "fiscal": 2,
    "fish": 1,
    "fisherman": 3,
    "fishing": 2,
    "fist": 1,
    "fit": 1,
    "fitness": 2,
    "fits": 1,
    "five": 1,
    "fix": 1,
    "fixed": 1,
    "flag": 1,
    "flags": 1,
    "flame": 1,
    "flash": 1,
    "flashing": 2,
    "flat": 1,
    "flavor": 2,
    "flee": 1,
    "fleet": 1,
    "flesh": 1,
    "flew": 1,
    "flexibility": 3,
    "flexible": 3,
    "flies": 1,
    "flight": 1,
    "flights": 1,
    "flip": 1,
    "float": 1,
    "floating": 2,
    "flood": 1,
    "floor": 1,
    "florida": 3,
    "florist": 2,
    "flour": 1,
    "flow": 1,
    "flower": 2,
    "flowers": 2,
    "fluid": 2,
    "fly": 1,
    "flying": 2,
    "focus": 2,
    "focused": 2,
    "fog": 1,
    "fold": 1,
    "folder": 2,
    "folk": 1,
    "folks": 1,
    "follow": 2,
    "following": 3,
    "font": 1,
    "fonts": 1,
    "food": 1,
    "foods": 1,
    "fool": 1,
    "foot": 1,
    "football": 2,
    "for": 1,
    "forbid": 2,
    "force": 1,
    "forced": 1,
    "ford": 1,
    "forecast": 2,
    "forehead": 2,
    "foreign": 2,
    "foreigner": 3,
    "forest": 2,
    "forever": 3,
    "forget": 2,
    "forgive": 2,
    "forgot": 2,
    "forgotten": 3,
    "fork": 1,
    "form": 1,
    "formal": 2,
    "format": 2,
    "formation": 3,
    "formed": 1,
    "former": 2,
    "formerly": 3,
    "forms": 1,
    "formula": 3,
    "fort": 1,
    "forth": 1,
    "fortunately": 3,
    "fortune": 2,
    "forty": 2,
    "forum": 2,
    "forums": 2,
    "forward": 2,
    "foster": 2,
    "fought": 1,
    "found": 1,
    "foundation": 3,
    "founder": 2,
    "four": 1,
    "fourth": 1,
    "fox": 1,
    "fraction": 2,
    "fragile": 2,
    "fragment": 2,
    "frame": 1,
    "framed": 1,
    "frames": 1,
    "framework": 2,
    "france": 1,
    "franchise": 2,
    "francis": 2,
    "francisco": 3,
    "frank": 1,
    "franklin": 2,
    "frankly": 2,
    "fraud": 1,
    "fred": 1,
    "free": 1,
    "freedom": 2,
    "freely": 2,
    "freeware": 2,
    "freeze": 1,
    "french": 1,
    "frequency": 3,
    "frequent": 2,
    "frequently": 3,
    "fresh": 1,
    "freshman": 2,
    "friday": 2,
    "friend": 1,
    "friendly": 2,
    "friends": 1,
    "friendship": 2,
    "frighten": 2,
    "frog": 1,
    "from": 1,
    "front": 1,
    "frontier": 2,
    "frown": 1,
    "frozen": 2,
    "fruit": 1,
    "frustrate": 2,
    "frustration": 3,
    "fuck": 1,
    "fucked": 1,
    "fucking": 2,
    "fuel": 2,
    "full": 1,
    "full-time": 1,
    "fully": 2,
    "fun": 1,
    "function": 2,
    "functional": 3,
    "functionality": 5,
    "fund": 1,
    "fundamental": 4,
    "funded": 2,
    "funding": 1,
    "funds": 1,
    "funeral": 3,
    "funny": 2,
    "fur": 1,
    "furniture": 3,
    "further": 2,
    "furthermore": 3,
    "future": 2,
    "gain": 1,
    "galaxy": 3,
    "galleries": 3,
    "gallery": 3,
    "gambling": 2,
    "game": 1,
    "games": 1,
    "gaming": 2,
    "gamma": 2,
    "gang": 1,
    "gap": 1,
    "garage": 2,
    "garbage": 2,
    "garden": 2,
    "gardening": 3,
    "gardens": 2,
    "garlic": 2,
    "gary": 2,
    "gas": 1,
    "gasoline": 3,
    "gate": 1,
    "gateway": 2,
    "gather": 2,
    "gathering": 3,
    "gave": 1,
    "gay": 1,
    "gaze": 1,
    "gear": 1,
    "gender": 2,
    "gene": 1,
    "general": 3,
    "generally": 4,
    "generate": 3,
    "generated": 4,
    "generation": 4,
    "generator": 4,
    "generic": 3,
    "generous": 3,
    "genes": 1,
    "genetic": 3,
    "genetics": 3,
    "genius": 2,
    "genre": 2,
    "gentle": 2,
    "gentleman": 3,
    "gently": 2,
    "genuine": 3,
    "geographic": 4,
    "geography": 4,
    "george": 1,
    "georgia": 2,
    "german": 2,
    "germany": 3,
    "gesture": 2,
    "get": 1,
    "gets": 1,
    "getting": 1,
    "ghost": 1,
    "giant": 2,
    "gift": 1,
    "gifted": 2,
    "gifts": 1,
    "girl": 1,
    "girlfriend": 2,
    "girls": 1,
    "give": 1,
    "given": 2,
    "gives": 1,
    "giving": 1,
    "glad": 1,
    "glance": 1,
    "glasgow": 2,
    "glass": 1,
    "glimpse": 1,
    "global": 2,
    "globe": 1,
    "glory": 2,
    "glossary": 3,
    "glove": 1,
    "gloves": 1,
    "gnome": 1,
    "go": 1,
    "goal": 1,
    "goals": 1,
    "goat": 1,
    "god": 1,
    "goes": 1,
    "going": 2,
    "gold": 1,
    "golden": 2,
    "golf": 1,
    "gone": 1,
    "gonna": 2,
    "good": 1,
    "goods": 1,
    "google": 2,
    "goose": 1,
    "gordon": 2,
    "gospel": 2,
    "got": 1,
    "goto": 2,
    "gourmet": 2,
    "govern": 2,
    "governance": 3,
    "government": 3,
    "governments": 3,
    "governor": 3,
    "grab": 1,
    "grabbed": 1,
    "grace": 1,
    "grade": 1,
    "grades": 1,
    "gradually": 3,
    "graduate": 3,
    "graduation": 4,
    "graham": 2,
    "grain": 1,
    "grand": 1,
    "grandchild": 2,
    "grandfather": 3,
    "grandmother": 3,
    "grandparent": 3,
    "grant": 1,
    "grants": 1,
    "grape": 1,
    "graph": 1,
    "graphic": 2,
    "graphics": 2,
    "grasp": 1,
    "grass": 1,
    "grateful": 2,
    "gratis": 2,
    "grave": 1,
    "gravity": 3,
    "gray": 1,
    "great": 1,
    "greater": 2,
    "greatest": 1,
    "greatly": 2,
    "greece": 1,
    "greek": 1,
    "green": 1,
    "greet": 1,
    "greg": 1,
    "grew": 1,
    "grey": 1,
    "grid": 1,
    "grief": 1,
    "grin": 1,
    "grip": 1,
    "grocery": 3,
    "gross": 1,
    "ground": 1,
    "grounds": 1,
    "group": 1,
    "groups": 1,
    "grove": 1,
    "grow": 1,
    "growing": 2,
    "grown": 1,
    "growth": 1,
    "guarantee": 3,
    "guaranteed": 3,
    "guard": 1,
    "guardian": 3,
    "guess": 1,
    "guest": 1,
    "guests": 1,
    "guidance": 2,
    "guide": 1,
    "guideline": 2,
    "guides": 1,
    "guilt": 1,
    "guilty": 2,
    "guinea": 2,
    "guitar": 2,
    "gulf": 1,
    "gun": 1,
    "guns": 1,
    "gut": 1,
    "guy": 1,
    "guys": 1,
    "gym": 1,
    "ha": 1,
    "habit": 2,
    "habitat": 3,
    "had": 1,
    "hair": 1,
    "hairy": 2,
    "half": 1,
    "halfway": 2,
    "hall": 1,
    "halloween": 3,
    "hallway": 2,
    "hamilton": 3,
    "hampshire": 2,
    "hand": 1,
    "handbook": 2,
    "handful": 2,
    "handheld": 2,
    "handle": 2,
    "handling": 2,
    "hands": 1,
    "handsome": 2,
    "hang": 1,
    "happen": 2,
    "happened": 2,
    "happening": 3,
    "happens": 2,
    "happily": 3,
    "happiness": 3,
    "happy": 2,
    "harassment": 3,
    "harbor": 2,
    "hard": 1,
    "hardcore": 2,
    "hardcover": 3,
    "harder": 1,
    "hardly": 2,
    "hardware": 2,
    "harm": 1,
    "harmony": 3,
    "harris": 2,
    "harry": 2,
    "harsh": 1,
    "harvard": 2,
    "harvest": 2,
    "has": 1,
    "hat": 1,
    "hate": 1,
    "hats": 1,
    "haul": 1,
    "have": 1,
    "having": 1,
    "hawaii": 3,
    "hay": 1,
    "hazard": 2,
    "hazardous": 3,
    "he": 1,
    "head": 1,
    "headache": 2,
    "headed": 2,
    "header": 2,
    "heading": 2,
    "headline": 2,
    "headquarters": 3,
    "heads": 1,
    "headset": 2,
    "heal": 1,
    "healing": 2,
    "health": 1,
    "healthcare": 2,
    "healthy": 2,
    "hear": 1,
    "heard": 1,
    "hearing": 2,
    "heart": 1,
    "hearts": 1,
    "heat": 1,
    "heating": 2,
    "heaven": 2,
    "heavily": 3,
    "heavy": 2,
    "heel": 1,
    "height": 1,
    "heights": 1,
    "held": 1,
    "helicopter": 4,
    "hell": 1,
    "hello": 2,
    "helmet": 2,
    "help": 1,
    "helped": 1,
    "helpful": 2,
    "helping": 2,
    "helps": 1,
    "hence": 1,
    "henry": 2,
    "her": 1,
    "herald": 2,
    "herb": 1,
    "herd": 1,
    "here": 1,
    "herein": 2,
    "heritage": 3,
    "hero": 2,
    "hers": 1,
    "herself": 2,
    "hesitate": 3,
    "hey": 1,
    "hi": 1,
    "hidden": 2,
    "hide": 1,
    "high": 1,
    "high-tech": 1,
    "higher": 1,
    "highest": 1,
    "highlight": 2,
    "highly": 2,
    "highway": 2,
    "hike": 1,
    "hiking": 2,
    "hill": 1,
    "hills": 1,
    "him": 1,
    "himself": 2,
    "hint": 1,
    "hip": 1,
    "hire": 1,
    "his": 1,
    "hispanic": 3,
    "hist": 1,
    "historian": 4,
    "historic": 3,
    "historical": 4,
    "historically": 4,
    "history": 3,
    "hit": 1,
    "hits": 1,
    "hobbies": 2,
    "hockey": 2,
    "hold": 1,
    "holder": 2,
    "holding": 2,
    "holds": 1,
    "hole": 1,
    "holes": 1,
    "holiday": 3,
    "holland": 2,
    "hollow": 2,
    "hollywood": 3,
    "holy": 2,
    "home": 1,
    "homeland": 2,
    "homeless": 2,
    "homes": 1,
    "homework": 2,
    "honda": 2,
    "honest": 2,
    "honestly": 3,
    "honey": 2,
    "hong": 1,
    "honor": 2,
    "hook": 1,
    "hope": 1,
    "hopefully": 3,
    "horizon": 3,
    "hormone": 2,
    "horn": 1,
    "horny": 2,
    "horrible": 3,
    "horror": 2,
    "horse": 1,
    "horses": 2,
    "hospital": 3,
    "host": 1,
    "hostage": 2,
    "hostile": 2,
    "hosting": 2,
    "hosts": 1,
    "hot": 1,
    "hotel": 2,
    "hour": 1,
    "hours": 1,
    "house": 1,
    "household": 2,
    "houses": 2,
    "housewares": 2,
    "housing": 2,
    "houston": 2,
    "how": 1,
    "howard": 2,
    "however": 3,
    "html": 4,
    "http": 4,
    "hug": 1,
    "huge": 1,
    "huh": 1,
    "human": 2,
    "humanity": 4,
    "humor": 2,
    "hundred": 2,
    "hundreds": 2,
    "hung": 1,
    "hungary": 3,
    "hunger": 2,
    "hungry": 2,
    "hunt": 1,
    "hunter": 2,
    "hunting": 2,
    "hurricane": 3,
    "hurried": 2,
    "hurry": 2,
    "hurt": 1,
    "husband": 2,
    "hybrid": 2,
    "hypothesis": 4,
    "i": 1,
    "ice": 1,
    "icon": 2,
    "idaho": 3,
    "idea": 3,
    "ideal": 3,
    "ideas": 3,
    "identical": 4,
    "identification": 6,
    "identified": 4,
    "identify": 4,
    "identifying": 5,
    "identity": 4,
    "ideological": 6,
    "ideology": 5,
    "ie": 1,
    "ieee": 4,
    "if": 1,
    "ignore": 2,
    "ill": 1,
    "illegal": 3,
    "illinois": 3,
    "illness": 2,
    "illusion": 3,
    "illustrate": 3,
    "illustrated": 4,
    "image": 2,
    "imagination": 5,
    "imagine": 3,
    "imaging": 3,
    "immediate": 4,
    "immediately": 5,
    "immigrant": 3,
    "immigration": 4,
    "immune": 2,
    "impact": 2,
    "implement": 3,
    "implementation": 5,
    "implication": 4,
    "imply": 2,
    "import": 2,
    "importance": 3,
    "important": 3,
    "importantly": 3,
    "impose": 2,
    "impossible": 4,
    "impress": 2,
    "impression": 3,
    "impressive": 3,
    "improve": 2,
    "improved": 2,
    "improvement": 3,
    "improving": 3,
    "impulse": 2,
    "in": 1,
    "incentive": 3,
    "incest": 2,
    "inch": 1,
    "inches": 2,
    "incident": 3,
    "include": 2,
    "included": 3,
    "including": 2,
    "income": 2,
    "incorporate": 4,
    "incorporated": 5,
    "increase": 2,
    "increased": 2,
    "increases": 3,
    "increasing": 3,
    "increasingly": 3,
    "incredible": 4,
    "incredibly": 4,
    "indeed": 2,
    "independence": 4,
    "independent": 4,
    "index": 2,
    "india": 3,
    "indian": 3,
    "indiana": 4,
    "indicate": 3,
    "indicated": 4,
    "indication": 4,
    "indicator": 4,
    "indigenous": 4,
    "individual": 5,
    "individuals": 5,
    "indonesia": 4,
    "indoor": 2,
    "induced": 2,
    "industrial": 4,
    "industries": 3,
    "industry": 3,
    "inevitable": 5,
    "inevitably": 5,
    "infant": 2,
    "infection": 3,
    "inflation": 3,
    "influence": 3,
    "influential": 4,
    "info": 2,
    "inform": 2,
    "informal": 3,
    "information": 4,
    "informed": 2,
    "infrastructure": 4,
    "ingredient": 4,
    "inherent": 3,
    "inherit": 3,
    "initial": 3,
    "initially": 3,
    "initiate": 4,
    "initiative": 5,
    "injure": 2,
    "injuries": 3,
    "injury": 3,
    "inmate": 2,
    "inner": 2,
    "innocent": 3,
    "innovation": 4,
    "innovative": 4,
    "input": 2,
    "inquiry": 3,
    "insect": 2,
    "insert": 2,
    "inside": 2,
    "insider": 3,
    "insight": 2,
    "insist": 2,
    "inspection": 3,
    "inspector": 3,
    "inspiration": 4,
    "inspire": 2,
    "inspired": 2,
    "install": 2,
    "installation": 4,
    "installed": 2,
    "installing": 3,
    "instance": 2,
    "instant": 2,
    "instantly": 3,
    "instead": 2,
    "instinct": 2,
    "institute": 3,
    "institution": 4,
    "institutional": 5,
    "instruct": 2,
    "instruction": 3,
    "instructional": 3,
    "instructor": 3,
    "instrument": 3,
    "insurance": 3,
    "intact": 2,
    "integer": 3,
    "integrate": 3,
    "integrated": 4,
    "integration": 4,
    "integrity": 4,
    "intel": 2,
    "intellectual": 5,
    "intelligence": 4,
    "intelligent": 4,
    "intend": 2,
    "intended": 3,
    "intense": 2,
    "intensity": 4,
    "intent": 2,
    "intention": 3,
    "inter": 2,
    "interact": 3,
    "interaction": 4,
    "interactions": 4,
    "interactive": 4,
    "interest": 3,
    "interested": 4,
    "interesting": 4,
    "interface": 3,
    "interfere": 3,
    "interior": 4,
    "intermediate": 5,
    "internal": 3,
    "international": 5,
    "internet": 3,
    "interpret": 3,
    "interpretation": 5,
    "interracial": 4,
    "interrupt": 3,
    "interval": 3,
    "intervention": 4,
    "interview": 3,
    "intimate": 3,
    "into": 2,
    "intro": 2,
    "introduce": 3,
    "introduced": 3,
    "introduction": 4,
    "invade": 2,
    "invasion": 3,
    "invent": 2,
    "invented": 2,
    "invention": 3,
    "inventory": 4,
    "invest": 2,
    "investigate": 4,
    "investigation": 5,
    "investigator": 4,
    "investment": 3,
    "investor": 2,
    "invisible": 4,
    "invitation": 4,
    "invite": 2,
    "invited": 3,
    "involve": 2,
    "involved": 2,
    "involvement": 2,
    "involving": 3,
    "iowa": 3,
    "ipod": 2,
    "iran": 2,
    "iraq": 2,
    "iraqi": 3,
    "ireland": 2,
    "irish": 2,
    "iron": 2,
    "ironically": 4,
    "irony": 3,
    "is": 1,
    "islam": 2,
    "islamic": 2,
    "island": 2,
    "isolate": 3,
    "isolated": 4,
    "isolation": 4,
    "israel": 3,
    "israeli": 3,
    "issue": 2,
    "issued": 2,
    "it": 1,
    "italian": 3,
    "italy": 3,
    "item": 2,
    "its": 1,
    "itself": 2,
    "jack": 1,
    "jacket": 2,
    "jackson": 2,
    "jacksonville": 3,
    "jail": 1,
    "jamaica": 3,
    "james": 1,
    "jane": 1,
    "january": 4,
    "japan": 2,
    "japanese": 3,
    "jar": 1,
    "jason": 2,
    "java": 2,
    "jaw": 1,
    "jazz": 1,
    "jean": 1,
    "jeans": 1,
    "jeff": 1,
    "jefferson": 3,
    "jennifer": 3,
    "jerry": 2,
    "jersey": 2,
    "jessica": 3,
    "jesus": 2,
    "jet": 1,
    "jew": 1,
    "jewellery": 4,
    "jewelry": 3,
    "jewish": 2,
    "jim": 1,
    "jimmy": 2,
    "job": 1,
    "jobs": 1,
    "joe": 1,
    "john": 1,
    "johnny": 2,
    "johnson": 2,
    "join": 1,
    "joined": 1,
    "joint": 1,
    "joke": 1,
    "jokes": 1,
    "jonathan": 3,
    "jones": 1,
    "jordan": 2,
    "jose": 2,
    "joseph": 2,
    "journal": 2,
    "journalism": 3,
    "journalist": 3,
    "journey": 2,
    "joy": 1,
    "judge": 1,
    "judgment": 2,
    "judicial": 3,
    "juice": 1,
    "july": 2,
    "jump": 1,
    "june": 1,
    "jungle": 2,
    "junior": 2,
    "jurisdiction": 4,
    "juror": 2,
    "jury": 2,
    "just": 1,
    "justice": 2,
    "justify": 3,
    "kansas": 2,
    "karen": 2,
    "kate": 1,
    "katrina": 3,
    "keep": 1,
    "keeping": 2,
    "keeps": 1,
    "keith": 1,
    "kelly": 2,
    "kennedy": 3,
    "kent": 1,
    "kentucky": 3,
    "kenya": 2,
    "kept": 1,
    "kernel": 2,
    "kerry": 2,
    "kevin": 2,
    "key": 1,
    "keyboard": 2,
    "keys": 1,
    "keyword": 2,
    "kick": 1,
    "kid": 1,
    "kids": 1,
    "kill": 1,
    "killed": 1,
    "killer": 2,
    "killing": 2,
    "kind": 1,
    "kinds": 1,
    "king": 1,
    "kingdom": 2,
    "kings": 1,
    "kiss": 1,
    "kit": 1,
    "kitchen": 2,
    "kits": 1,
    "knee": 1,
    "kneel": 1,
    "knew": 1,
    "knife": 1,
    "knight": 1,
    "knock": 1,
    "know": 1,
    "knowing": 2,
    "knowledge": 2,
    "known": 1,
    "knows": 1,
    "kong": 1,
    "korea": 3,
    "korean": 3,
    "lab": 1,
    "label": 2,
    "labor": 2,
    "laboratory": 5,
    "labour": 2,
    "labs": 1,
    "lack": 1,
    "ladder": 2,
    "ladies": 2,
    "lady": 2,
    "laid": 1,
    "lake": 1,
    "lakes": 1,
    "lamp": 1,
    "land": 1,
    "landing": 2,
    "landmark": 2,
    "lands": 1,
    "landscape": 2,
    "lane": 1,
    "language": 2,
    "lap": 1,
    "laptop": 2,
    "large": 1,
    "largely": 2,
    "larger": 1,
    "largest": 1,
    "larry": 2,
    "laser": 2,
    "last": 1,
    "late": 1,
    "lately": 2,
    "later": 2,
    "latest": 2,
    "latin": 2,
    "latter": 2,
    "laugh": 1,
    "laughter": 2,
    "launch": 1,
    "launched": 1,
    "laundry": 2,
    "laura": 2,
    "law": 1,
    "lawmaker": 3,
    "lawn": 1,
    "lawrence": 2,
    "laws": 1,
    "lawsuit": 2,
    "lawyer": 2,
    "lawyers": 2,
    "lay": 1,
    "layer": 2,
    "layers": 2,
    "layout": 2,
    "lead": 1,
    "leader": 2,
    "leadership": 3,
    "leading": 2,
    "leads": 1,
    "leaf": 1,
    "league": 1,
    "lean": 1,
    "leap": 1,
    "learn": 1,
    "learned": 1,
    "learning": 2,
    "lease": 1,
    "least": 1,
    "leather": 2,
    "leave": 1,
    "leaves": 1,
    "leaving": 2,
    "lebanon": 3,
    "lecture": 2,
    "led": 1,
    "lee": 1,
    "left": 1,
    "leg": 1,
    "legacy": 3,
    "legal": 2,
    "legally": 2,
    "legend": 2,
    "legislation": 4,
    "legislative": 4,
    "legislator": 4,
    "legislature": 4,
    "legitimate": 4,
    "legs": 1,
    "leisure": 2,
    "lemon": 2,
    "lend": 1,
    "length": 1,
    "lens": 1,
    "lenses": 2,
    "lesbian": 3,
    "less": 1,
    "lesson": 2,
    "lessons": 2,
    "let": 1,
    "lets": 1,
    "letter": 2,
    "level": 2,
    "lewis": 2,
    "liability": 5,
    "liable": 3,
    "liberal": 3,
    "liberty": 3,
    "libraries": 3,
    "library": 3,
    "licence": 2,
    "license": 2,
    "licensed": 2,
    "licensing": 3,
    "lid": 1,
    "lie": 1,
    "lies": 1,
    "life": 1,
    "lifestyle": 2,
    "lifetime": 2,
    "lift": 1,
    "light": 1,
    "lighting": 2,
    "lightly": 2,
    "lightning": 2,
    "lights": 1,
    "like": 1,
    "liked": 1,
    "likelihood": 3,
    "likely": 2,
    "likewise": 2,
    "limb": 1,
    "limit": 2,
    "limitation": 4,
    "limited": 3,
    "lincoln": 2,
    "linda": 2,
    "line": 1,
    "linear": 3,
    "lines": 1,
    "lingerie": 3,
    "link": 1,
    "linked": 1,
    "links": 1,
    "linux": 2,
    "lion": 2,
    "lip": 1,
    "lips": 1,
    "liquid": 2,
    "list": 1,
    "listed": 2,
    "listen": 2,
    "listener": 2,
    "listing": 2,
    "lists": 1,
    "lite": 1,
    "literacy": 4,
    "literally": 4,
    "literary": 4,
    "literature": 4,
    "little": 2,
    "live": 1,
    "lived": 1,
    "liver": 2,
    "lives": 1,
    "living": 2,
    "load": 1,
    "loaded": 2,
    "loading": 2,
    "loan": 1,
    "loans": 1,
    "lobby": 2,
    "local": 2,
    "locate": 2,
    "located": 3,
    "location": 3,
    "lock": 1,
    "lodge": 1,
    "lodging": 2,
    "log": 1,
    "logged": 1,
    "logic": 2,
    "logical": 3,
    "login": 2,
    "logo": 2,
    "logos": 2,
    "london": 2,
    "lonely": 2,
    "long": 1,
    "long-term": 1,
    "longer": 1,
    "longtime": 2,
    "look": 1,
    "looked": 1,
    "looking": 2,
    "looks": 1,
    "lookup": 2,
    "loop": 1,
    "loose": 1,
    "lord": 1,
    "lose": 1,
    "losing": 2,
    "loss": 1,
    "losses": 2,
    "lost": 1,
    "lot": 1,
    "lots": 1,
    "lottery": 3,
    "loud": 1,
    "louis": 2,
    "louisiana": 5,
    "lounge": 1,
    "love": 1,
    "loved": 1,
    "lovely": 2,
    "lover": 2,
    "loves": 1,
    "low": 1,
    "lower": 2,
    "lowest": 2,
    "loyal": 2,
    "loyalty": 3,
    "luck": 1,
    "lucky": 2,
    "lunch": 1,
    "lung": 1,
    "lungs": 1,
    "luxury": 3,
    "lying": 2,
    "machine": 2,
    "machinery": 4,
    "machines": 2,
    "mad": 1,
    "made": 1,
    "madison": 3,
    "magazine": 3,
    "magic": 2,
    "magnanimity": 5,
    "magnanimous": 4,
    "magnet": 2,
    "magnetic": 3,
    "magnitude": 3,
    "mail": 1,
    "mailing": 2,
    "main": 1,
    "maine": 1,
    "mainly": 2,
    "mainstream": 2,
    "maintain": 2,
    "maintenance": 3,
    "major": 2,
    "majority": 4,
    "make": 1,
    "maker": 2,
    "makes": 1,
    "makeup": 2,
    "making": 2,
    "malaysia": 3,
    "male": 1,
    "mall": 1,
    "mama": 2,
    "man": 1,
    "manage": 2,
    "managed": 2,
    "management": 3,
    "manager": 3,
    "managers": 3,
    "managing": 2,
    "manchester": 3,
    "mandate": 2,
    "manhattan": 3,
    "manipulate": 4,
    "manner": 2,
    "mansion": 2,
    "manual": 3,
    "manufacturer": 5,
    "manufacturing": 4,
    "many": 2,
    "map": 1,
    "mapping": 2,
    "maps": 1,
    "marble": 2,
    "marc": 1,
    "march": 1,
    "margaret": 3,
    "margin": 2,
    "maria": 3,
    "marie": 2,
    "marine": 2,
    "mark": 1,
    "marked": 1,
    "marker": 2,
    "market": 2,
    "marketing": 3,
    "marketplace": 3,
    "markets": 2,
    "marks": 1,
    "marriage": 2,
    "married": 2,
    "marry": 2,
    "mars": 1,
    "marshall": 2,
    "martin": 2,
    "mary": 2,
    "maryland": 3,
    "mask": 1,
    "mass": 1,
    "massachusetts": 4,
    "massage": 2,
    "massive": 2,
    "master": 2,
    "masters": 2,
    "match": 1,
    "matching": 2,
    "mate": 1,
    "material": 4,
    "materials": 4,
    "math": 1,
    "mathematics": 4,
    "matrix": 2,
    "matt": 1,
    "matter": 2,
    "matthew": 2,
    "mature": 2,
    "maximum": 3,
    "may": 1,
    "maybe": 2,
    "mayor": 2,
    "me": 1,
    "meal": 1,
    "meals": 1,
    "mean": 1,
    "meaning": 2,
    "meaningful": 3,
    "means": 1,
    "meant": 1,
    "meantime": 2,
    "meanwhile": 2,
    "measure": 2,
    "measured": 2,
    "measurement": 3,
    "measuring": 3,
    "meat": 1,
    "mechanic": 3,
    "mechanical": 4,
    "mechanism": 3,
    "medal": 2,
    "media": 3,
    "median": 3,
    "medical": 3,
    "medicare": 3,
    "medication": 4,
    "medicine": 3,
    "medium": 3,
    "meet": 1,
    "meeting": 2,
    "meets": 1,
    "melbourne": 2,
    "melt": 1,
    "melted": 1,
    "member": 2,
    "membership": 3,
    "memorabilia": 6,
    "memorial": 4,
    "memories": 3,
    "memory": 3,
    "memphis": 2,
    "men": 1,
    "mens": 1,
    "mental": 2,
    "mentally": 3,
    "mention": 2,
    "mentor": 2,
    "menu": 2,
    "merchandise": 3,
    "merchant": 2,
    "mercury": 3,
    "mere": 1,
    "merely": 2,
    "merit": 2,
    "mesh": 1,
    "mess": 1,
    "message": 2,
    "messaging": 3,
    "messenger": 3,
    "met": 1,
    "meta": 2,
    "metabolism": 4,
    "metal": 2,
    "metaphor": 3,
    "meter": 2,
    "meters": 2,
    "method": 2,
    "methodology": 5,
    "metro": 2,
    "metropolitan": 5,
    "mexican": 3,
    "mexico": 3,
    "miami": 3,
    "mice": 1,
    "michael": 2,
    "michelle": 2,
    "michigan": 3,
    "micro": 2,
    "microsoft": 3,
    "middle": 2,
    "middle-class": 2,
    "midnight": 2,
    "midst": 1,
    "might": 1,
    "mighty": 2,
    "migration": 3,
    "mike": 1,
    "mild": 1,
    "mile": 1,
    "miles": 1,
    "military": 4,
    "milk": 1,
    "mill": 1,
    "miller": 2,
    "million": 2,
    "millions": 2,
    "mills": 1,
    "milwaukee": 3,
    "mind": 1,
    "mine": 1,
    "mineral": 3,
    "minerals": 3,
    "mini": 2,
    "minimal": 3,
    "minimize": 3,
    "minimum": 3,
    "mining": 2,
    "minister": 3,
    "ministry": 3,
    "minneapolis": 5,
    "minnesota": 4,
    "minor": 2,
    "minority": 4,
    "mint": 1,
    "minute": 2,
    "miracle": 3,
    "mirror": 2,
    "misc": 4,
    "miscellaneous": 5,
    "miss": 1,
    "missed": 1,
    "missile": 2,
    "missing": 2,
    "mission": 2,
    "missionary": 4,
    "mississippi": 4,
    "missouri": 3,
    "mistake": 2,
    "mitchell": 2,
    "mix": 1,
    "mixed": 1,
    "mixture": 2,
    "mobile": 2,
    "mode": 1,
    "model": 2,
    "modeling": 3,
    "modem": 2,
    "moderate": 3,
    "moderator": 4,
    "modern": 2,
    "modes": 1,
    "modest": 2,
    "modification": 5,
    "modified": 3,
    "modify": 3,
    "module": 2,
    "molecular": 4,
    "molecule": 3,
    "mom": 1,
    "moment": 2,
    "momentum": 3,
    "monday": 2,
    "money": 2,
    "monitor": 3,
    "monkey": 2,
    "monster": 2,
    "montana": 3,
    "month": 1,
    "monthly": 2,
    "months": 1,
    "montreal": 3,
    "monument": 3,
    "mood": 1,
    "moon": 1,
    "moore": 1,
    "moral": 2,
    "more": 1,
    "moreover": 3,
    "morgan": 2,
    "morning": 2,
    "morris": 2,
    "mortality": 4,
    "mortgage": 2,
    "most": 1,
    "mostly": 2,
    "mother": 2,
    "motion": 2,
    "motivate": 3,
    "motivation": 4,
    "motive": 2,
    "motor": 2,
    "motorcycle": 4,
    "mount": 1,
    "mountain": 2,
    "mountains": 2,
    "mounted": 2,
    "mouse": 1,
    "mouth": 1,
    "move": 1,
    "moved": 1,
    "movement": 2,
    "moves": 1,
    "movie": 2,
    "moving": 2,
    "mpeg": 2,
    "mr": 1,
    "mr.": 1,
    "mrs": 1,
    "mrs.": 1,
    "ms": 1,
    "much": 1,
    "mud": 1,
    "multimedia": 5,
    "multiple": 3,
    "municipal": 4,
    "murder": 2,
    "murray": 2,
    "muscle": 2,
    "museum": 3,
    "mushroom": 2,
    "music": 2,
    "musical": 3,
    "musician": 3,
    "muslim": 2,
    "must": 1,
    "mutter": 2,
    "mutual": 3,
    "my": 1,
    "myself": 2,
    "mysql": 4,
    "mysterious": 4,
    "mystery": 3,
    "myth": 1,
    "nail": 1,
    "nails": 1,
    "naked": 2,
    "name": 1,
    "named": 1,
    "names": 1,
    "nancy": 2,
    "narrative": 3,
    "narrow": 2,
    "nasa": 2,
    "nashville": 2,
    "nasty": 2,
    "nation": 2,
    "national": 3,
    "nationwide": 3,
    "native": 2,
    "natural": 3,
    "naturally": 4,
    "nature": 2,
    "naval": 2,
    "navigate": 3,
    "navigation": 4,
    "navy": 2,
    "ncaa": 4,
    "near": 1,
    "nearby": 2,
    "nearer": 1,
    "nearest": 1,
    "nearly": 2,
    "neat": 1,
    "nebraska": 3,
    "necessarily": 5,
    "necessary": 4,
    "necessity": 4,
    "neck": 1,
    "need": 1,
    "needed": 2,
    "needle": 2,
    "needs": 1,
    "negative": 3,
    "negotiate": 4,
    "negotiation": 5,
    "negro": 2,
    "neighbor": 2,
    "neighborhood": 3,
    "neighboring": 3,
    "neil": 1,
    "neither": 2,
    "nelson": 2,
    "nerve": 1,
    "nervous": 2,
    "nest": 1,
    "net": 1,
    "netherlands": 3,
    "network": 2,
    "networking": 3,
    "neutral": 2,
    "nevada": 3,
    "never": 2,
    "nevertheless": 4,
    "new": 1,
    "newest": 2,
    "newly": 2,
    "news": 1,
    "newsletter": 3,
    "newspaper": 3,
    "next": 1,
    "nice": 1,
    "nick": 1,
    "night": 1,
    "nightmare": 2,
    "nights": 1,
    "nike": 2,
    "nine": 1,
    "nintendo": 3,
    "no": 1,
    "noble": 2,
    "nobody": 3,
    "nod": 1,
    "nodded": 1,
    "node": 1,
    "nodes": 1,
    "noise": 1,
    "nomination": 4,
    "nominee": 3,
    "none": 1,
    "nonetheless": 3,
    "nonprofit": 3,
    "noon": 1,
    "nor": 1,
    "norm": 1,
    "normal": 2,
    "normally": 3,
    "north": 1,
    "northeast": 2,
    "northern": 2,
    "northwest": 2,
    "norway": 2,
    "nose": 1,
    "not": 1,
    "note": 1,
    "notebook": 2,
    "noted": 2,
    "notes": 1,
    "nothing": 2,
    "notice": 2,
    "noticed": 2,
    "notices": 3,
    "notification": 5,
    "notify": 3,
    "notion": 2,
    "noun": 1,
    "nova": 2,
    "novel": 2,
    "november": 3,
    "now": 1,
    "nowhere": 2,
    "nuclear": 3,
    "nude": 1,
    "null": 1,
    "number": 2,
    "numbers": 2,
    "numeral": 3,
    "numerous": 3,
    "nurse": 1,
    "nursing": 2,
    "nut": 1,
    "nutrient": 3,
    "nutrition": 3,
    "nuts": 1,
    "oak": 1,
    "oakland": 2,
    "object": 2,
    "objection": 3,
    "objective": 3,
    "obligation": 4,
    "observation": 4,
    "observe": 2,
    "observed": 2,
    "observer": 3,
    "obstacle": 3,
    "obtain": 2,
    "obvious": 3,
    "obviously": 3,
    "occasion": 3,
    "occasional": 4,
    "occasionally": 5,
    "occupation": 4,
    "occupational": 5,
    "occupy": 3,
    "occur": 2,
    "occurred": 2,
    "ocean": 2,
    "october": 3,
    "odd": 1,
    "odds": 1,
    "of": 1,
    "off": 1,
    "offender": 2,
    "offense": 2,
    "offensive": 3,
    "offer": 2,
    "offering": 3,
    "office": 2,
    "officer": 3,
    "officers": 3,
    "offices": 3,
    "official": 3,
    "officially": 3,
    "offline": 2,
    "offset": 2,
    "often": 2,
    "oh": 1,
    "ohio": 3,
    "oil": 1,
    "ok": 1,
    "okay": 2,
    "oklahoma": 4,
    "old": 1,
    "old-fashioned": 2,
    "older": 2,
    "oldest": 2,
    "olympic": 3,
    "olympics": 3,
    "on": 1,
    "once": 1,
    "one": 1,
    "ones": 1,
    "ongoing": 3,
    "onion": 2,
    "online": 2,
    "only": 2,
    "ontario": 4,
    "onto": 2,
    "open": 2,
    "opened": 2,
    "opening": 3,
    "openly": 2,
    "opera": 3,
    "operate": 3,
    "operated": 4,
    "operating": 4,
    "operation": 4,
    "operational": 5,
    "operator": 4,
    "opinion": 3,
    "opponent": 3,
    "opportunities": 5,
    "opportunity": 5,
    "oppose": 2,
    "opposed": 2,
    "opposite": 3,
    "opposition": 4,
    "opt": 1,
    "optical": 3,
    "optimistic": 4,
    "optimization": 5,
    "option": 2,
    "optional": 3,
    "or": 1,
    "oracle": 3,
    "oral": 2,
    "orange": 2,
    "orbit": 2,
    "orchestra": 3,
    "order": 2,
    "ordered": 2,
    "ordering": 3,
    "ordinary": 4,
    "oregon": 3,
    "organ": 2,
    "organic": 3,
    "organisation": 5,
    "organism": 3,
    "organization": 5,
    "organizational": 5,
    "organize": 3,
    "organized": 3,
    "orgy": 2,
    "orientation": 5,
    "origin": 3,
    "original": 4,
    "originally": 5,
    "orlando": 3,
    "orleans": 2,
    "other": 2,
    "others": 2,
    "otherwise": 3,
    "ottawa": 3,
    "ought": 1,
    "our": 1,
    "ours": 1,
    "ourselves": 2,
    "out": 1,
    "outcome": 2,
    "outdoor": 2,
    "outdoors": 2,
    "outer": 2,
    "outfit": 2,
    "outlet": 2,
    "outline": 2,
    "outlook": 2,
    "output": 2,
    "outside": 2,
    "outsider": 3,
    "outsourcing": 3,
    "outstanding": 3,
    "oven": 2,
    "over": 2,
    "overall": 3,
    "overcome": 3,
    "overlook": 3,
    "overnight": 3,
    "overseas": 3,
    "oversee": 3,
    "overview": 3,
    "overwhelm": 3,
    "overwhelming": 4,
    "owe": 1,
    "own": 1,
    "owned": 1,
    "owner": 2,
    "ownership": 3,
    "oxford": 2,
    "oxygen": 3,
    "pace": 1,
    "pacific": 3,
    "pack": 1,
    "package": 2,
    "packaging": 3,
    "packet": 2,
    "packs": 1,
    "pad": 1,
    "page": 1,
    "paid": 1,
    "pain": 1,
    "painful": 2,
    "paint": 1,
    "painter": 2,
    "painting": 2,
    "pair": 1,
    "pakistan": 3,
    "palace": 2,
    "pale": 1,
    "palestinian": 5,
    "palm": 1,
    "pan": 1,
    "panel": 2,
    "panic": 2,
    "pant": 1,
    "pants": 1,
    "papa": 2,
    "paper": 2,
    "paperback": 3,
    "para": 2,
    "parade": 2,
    "paradise": 3,
    "paragraph": 3,
    "parallel": 3,
    "parameter": 4,
    "parent": 2,
    "parental": 3,
    "parenting": 3,
    "parents": 2,
    "paris": 2,
    "parish": 2,
    "park": 1,
    "parker": 2,
    "parking": 2,
    "parks": 1,
    "parliament": 3,
    "part": 1,
    "partial": 2,
    "partially": 2,
    "participant": 4,
    "participate": 4,
    "participating": 5,
    "participation": 5,
    "particle": 3,
    "particles": 3,
    "particular": 4,
    "particularly": 5,
    "parties": 2,
    "partly": 2,
    "partner": 2,
    "partnership": 3,
    "parts": 1,
    "party": 2,
    "pass": 1,
    "passage": 2,
    "passed": 1,
    "passenger": 3,
    "passes": 2,
    "passing": 2,
    "passion": 2,
    "password": 2,
    "past": 1,
    "pasta": 2,
    "pastor": 2,
    "pat": 1,
    "patch": 1,
    "patent": 2,
    "path": 1,
    "patience": 2,
    "patient": 2,
    "patients": 2,
    "patrick": 2,
    "patrol": 2,
    "patron": 2,
    "pattern": 2,
    "paul": 1,
    "pause": 1,
    "pay": 1,
    "payday": 2,
    "paying": 2,
    "payment": 2,
    "pc": 1,
    "peace": 1,
    "peaceful": 2,
    "peak": 1,
    "peanut": 2,
    "pearl": 1,
    "peasant": 2,
    "peel": 1,
    "peer": 1,
    "pen": 1,
    "penalty": 3,
    "pencil": 2,
    "pending": 2,
    "penis": 2,
    "pennsylvania": 4,
    "pension": 2,
    "people": 2,
    "pepper": 2,
    "per": 1,
    "perceive": 2,
    "perceived": 2,
    "percent": 2,
    "percentage": 3,
    "perception": 3,
    "perfect": 2,
    "perfectly": 3,
    "perform": 2,
    "performance": 3,
    "performer": 2,
    "perfume": 2,
    "perhaps": 2,
    "period": 3,
    "perl": 1,
    "permanent": 3,
    "permission": 3,
    "permit": 2,
    "permitted": 3,
    "persian": 2,
    "persist": 2,
    "person": 2,
    "personal": 3,
    "personality": 5,
    "personalized": 4,
    "personally": 4,
    "personnel": 3,
    "perspective": 3,
    "persuade": 2,
    "peru": 2,
    "pervade": 2,
    "pervasive": 3,
    "pet": 1,
    "peter": 2,
    "petition": 3,
    "pets": 1,
    "pharmaceutical": 5,
    "pharmacy": 3,
    "phase": 1,
    "phenomenon": 4,
    "phil": 1,
    "philadelphia": 5,
    "philip": 2,
    "philippines": 3,
    "philosophical": 5,
    "philosophy": 4,
    "phoenix": 2,
    "phone": 1,
    "phones": 1,
    "photo": 2,
    "photograph": 3,
    "photographer": 4,
    "photographs": 3,
    "photography": 4,
    "photos": 2,
    "phrase": 1,
    "physical": 3,
    "physically": 4,
    "physician": 3,
    "physics": 2,
    "piano": 3,
    "pick": 1,
    "picked": 1,
    "picks": 1,
    "pickup": 2,
    "pics": 1,
    "picture": 2,
    "pictured": 2,
    "pie": 1,
    "piece": 1,
    "pierre": 2,
    "pig": 1,
    "pile": 1,
    "pill": 1,
    "pillow": 2,
    "pills": 1,
    "pilot": 2,
    "pin": 1,
    "pine": 1,
    "pink": 1,
    "pioneer": 3,
    "pipe": 1,
    "pipeline": 2,
    "pissing": 2,
    "pistol": 2,
    "pit": 1,
    "pitch": 1,
    "pitcher": 2,
    "pittsburgh": 2,
    "pizza": 2,
    "place": 1,
    "placed": 1,
    "placement": 2,
    "places": 2,
    "plain": 1,
    "plaintiff": 2,
    "plan": 1,
    "plane": 1,
    "planet": 2,
    "planned": 1,
    "planner": 2,
    "planning": 2,
    "plans": 1,
    "plant": 1,
    "plants": 1,
    "plasma": 2,
    "plastic": 2,
    "plate": 1,
    "plates": 1,
    "platform": 2,
    "platinum": 3,
    "play": 1,
    "played": 1,
    "player": 2,
    "playing": 2,
    "playoff": 1,
    "plays": 1,
    "plaza": 2,
    "plea": 1,
    "plead": 1,
    "pleasant": 2,
    "please": 1,
    "pleased": 1,
    "pleasure": 2,
    "plenty": 2,
    "plot": 1,
    "plug": 1,
    "plugin": 2,
    "plunge": 1,
    "plural": 2,
    "plus": 1,
    "pm": 1,
    "pocket": 2,
    "poem": 2,
    "poems": 2,
    "poet": 2,
    "poetry": 3,
    "point": 1,
    "pointer": 2,
    "points": 1,
    "poke": 1,
    "poker": 2,
    "poland": 2,
    "pole": 1,
    "police": 2,
    "policeman": 3,
    "policies": 3,
    "policy": 3,
    "polish": 2,
    "political": 4,
    "politically": 4,
    "politician": 4,
    "politics": 3,
    "poll": 1,
    "polls": 1,
    "pollution": 3,
    "polyphonic": 4,
    "pond": 1,
    "pony": 2,
    "pool": 1,
    "poor": 1,
    "pop": 1,
    "popular": 3,
    "popularity": 5,
    "population": 4,
    "porch": 1,
    "pork": 1,
    "porn": 1,
    "porno": 2,
    "port": 1,
    "portable": 3,
    "portal": 2,
    "portfolio": 4,
    "portion": 2,
    "portland": 2,
    "portrait": 2,
    "portray": 2,
    "ports": 1,
    "portugal": 3,
    "portuguese": 3,
    "pose": 1,
    "position": 3,
    "positive": 3,
    "possess": 2,
    "possession": 3,
    "possibility": 5,
    "possible": 3,
    "possibly": 3,
    "post": 1,
    "postage": 2,
    "postal": 2,
    "poster": 2,
    "posting": 2,
    "posts": 1,
    "pot": 1,
    "potato": 3,
    "potatoes": 3,
    "potential": 3,
    "potentially": 4,
    "potter": 2,
    "pottery": 3,
    "pound": 1,
    "pounds": 1,
    "pour": 1,
    "poverty": 3,
    "powder": 2,
    "power": 2,
    "powered": 2,
    "powerful": 3,
    "powers": 2,
    "practical": 3,
    "practically": 4,
    "practice": 2,
    "practitioner": 4,
    "praise": 1,
    "pray": 1,
    "prayer": 1,
    "preach": 1,
    "precious": 2,
    "precise": 2,
    "precisely": 2,
    "precision": 3,
    "predator": 3,
    "predict": 2,
    "prediction": 3,
    "prefer": 2,
    "preference": 3,
    "preferences": 4,
    "preferred": 2,
    "pregnancy": 3,
    "pregnant": 2,
    "preliminary": 5,
    "premier": 2,
    "premise": 2,
    "premium": 3,
    "preparation": 4,
    "prepare": 2,
    "prepared": 2,
    "preparing": 3,
    "prescription": 3,
    "presence": 2,
    "present": 2,
    "presentation": 4,
    "preserve": 2,
    "presidency": 4,
    "president": 3,
    "presidential": 4,
    "press": 1,
    "pressure": 2,
    "presumably": 4,
    "pretend": 2,
    "pretty": 2,
    "prev": 1,
    "prevail": 2,
    "prevent": 2,
    "prevention": 3,
    "preview": 2,
    "previous": 3,
    "previously": 3,
    "price": 1,
    "priced": 1,
    "pricing": 2,
    "pride": 1,
    "priest": 1,
    "primarily": 4,
    "primary": 3,
    "prime": 1,
    "primitive": 3,
    "prince": 1,
    "princess": 2,
    "principal": 3,
    "principle": 3,
    "print": 1,
    "printable": 3,
    "printed": 1,
    "printer": 2,
    "printing": 2,
    "prints": 1,
    "prior": 2,
    "priorities": 4,
    "priority": 4,
    "prison": 2,
    "prisoner": 3,
    "privacy": 3,
    "private": 2,
    "privately": 2,
    "privilege": 3,
    "prize": 1,
    "pro": 1,
    "probability": 5,
    "probably": 3,
    "problem": 2,
    "procedure": 3,
    "proceed": 2,
    "process": 2,
    "processes": 3,
    "processing": 2,
    "processor": 3,
    "proclaim": 2,
    "produce": 2,
    "produced": 2,
    "producer": 3,
    "producers": 3,
    "producing": 3,
    "product": 2,
    "production": 3,
    "productive": 3,
    "productivity": 5,
    "profession": 3,
    "professional": 4,
    "professor": 3,
    "profile": 2,
    "profit": 2,
    "profound": 2,
    "program": 2,
    "programme": 2,
    "programmes": 2,
    "programming": 3,
    "programs": 2,
    "progress": 2,
    "progressed": 2,
    "progression": 3,
    "progressive": 3,
    "prohibit": 3,
    "project": 2,
    "projection": 3,
    "prominent": 3,
    "promise": 2,
    "promised": 2,
    "promising": 3,
    "promote": 2,
    "promoting": 3,
    "promotion": 3,
    "promotional": 4,
    "prompt": 1,
    "proof": 1,
    "proper": 2,
    "properly": 2,
    "properties": 3,
    "property": 3,
    "proportion": 3,
    "proposal": 3,
    "propose": 2,
    "proposed": 2,
    "prosecution": 4,
    "prosecutor": 4,
    "prospect": 2,
    "protect": 2,
    "protection": 3,
    "protective": 3,
    "protein": 2,
    "protest": 2,
    "protocol": 3,
    "proud": 1,
    "prove": 1,
    "proven": 2,
    "provide": 2,
    "provided": 3,
    "provider": 3,
    "providing": 3,
    "province": 2,
    "provision": 3,
    "provoke": 2,
    "psychological": 5,
    "psychologist": 4,
    "psychology": 4,
    "public": 2,
    "publication": 4,
    "publicity": 4,
    "publicly": 3,
    "publish": 2,
    "publisher": 3,
    "publishing": 3,
    "pull": 1,
    "pulse": 1,
    "pump": 1,
    "punch": 1,
    "punish": 2,
    "punishment": 3,
    "punk": 1,
    "pupil": 2,
    "purchase": 2,
    "purchased": 2,
    "purchasing": 3,
    "pure": 1,
    "purple": 2,
    "purpose": 2,
    "purse": 1,
    "pursuant": 3,
    "pursue": 2,
    "pursuit": 2,
    "push": 1,
    "pussy": 2,
    "put": 1,
    "putting": 1,
    "puzzle": 2,
    "python": 2,
    "qualified": 3,
    "qualify": 3,
    "quality": 3,
    "quantity": 3,
    "quantum": 2,
    "quarter": 2,
    "quarterback": 3,
    "quarterly": 3,
    "quebec": 2,
    "queen": 1,
    "queries": 2,
    "query": 2,
    "quest": 1,
    "question": 2,
    "questionnaire": 3,
    "questions": 2,
    "quick": 1,
    "quickly": 2,
    "quiet": 2,
    "quietly": 2,
    "quit": 1,
    "quite": 1,
    "quiz": 1,
    "quote": 1,
    "quotes": 1,
    "rabbit": 2,
    "race": 1,
    "racial": 2,
    "racing": 2,
    "racism": 2,
    "rack": 1,
    "radar": 2,
    "radiation": 4,
    "radical": 3,
    "radio": 3,
    "rage": 1,
    "rail": 1,
    "railroad": 2,
    "rain": 1,
    "rainbow": 2,
    "raise": 1,
    "raised": 1,
    "raising": 2,
    "rally": 2,
    "ralph": 1,
    "ran": 1,
    "ranch": 1,
    "random": 2,
    "range": 1,
    "rank": 1,
    "ranking": 2,
    "rape": 1,
    "rapid": 2,
    "rapidly": 2,
    "rare": 1,
    "rarely": 2,
    "rat": 1,
    "rate": 1,
    "rated": 2,
    "rates": 1,
    "rather": 2,
    "rating": 2,
    "ratio": 2,
    "rational": 3,
    "raw": 1,
    "rays": 1,
    "re": 1,
    "reach": 1,
    "reached": 1,
    "react": 2,
    "reaction": 3,
    "read": 1,
    "reader": 2,
    "readily": 3,
    "reading": 2,
    "reads": 1,
    "ready": 2,
    "real": 2,
    "realistic": 4,
    "reality": 4,
    "realize": 3,
    "realized": 3,
    "really": 3,
    "realm": 1,
    "rear": 1,
    "reason": 2,
    "reasonable": 4,
    "rebel": 2,
    "rebuild": 2,
    "recall": 2,
    "receipt": 2,
    "receive": 2,
    "received": 2,
    "receiver": 3,
    "receiving": 3,
    "recent": 2,
    "recently": 2,
    "reception": 3,
    "recession": 3,
    "recipe": 3,
    "recipient": 4,
    "recognition": 4,
    "recognize": 3,
    "recognized": 3,
    "recommend": 3,
    "recommendation": 5,
    "reconcile": 4,
    "reconciled": 4,
    "reconciliation": 6,
    "record": 2,
    "recorded": 3,
    "recorder": 3,
    "recording": 3,
    "recover": 3,
    "recovery": 4,
    "recreation": 4,
    "recruit": 2,
    "recruitment": 3,
    "recycling": 3,
    "red": 1,
    "redeemed": 2,
    "reduce": 2,
    "reduced": 2,
    "reducing": 3,
    "reduction": 3,
    "reed": 1,
    "refer": 2,
    "reference": 3,
    "referred": 2,
    "refinance": 3,
    "refine": 2,
    "reflect": 2,
    "reflection": 3,
    "reform": 2,
    "refrigerator": 5,
    "refuge": 2,
    "refugee": 3,
    "refund": 2,
    "refuse": 2,
    "refused": 2,
    "regain": 2,
    "regard": 2,
    "regarding": 3,
    "regardless": 3,
    "regime": 2,
    "region": 2,
    "regional": 3,
    "register": 3,
    "registered": 3,
    "registration": 4,
    "registry": 3,
    "regret": 2,
    "regular": 3,
    "regularly": 4,
    "regulate": 3,
    "regulation": 4,
    "regulator": 4,
    "regulatory": 3,
    "rehabilitation": 5,
    "reinforce": 3,
    "reject": 2,
    "relate": 2,
    "related": 3,
    "relating": 3,
    "relation": 3,
    "relationship": 4,
    "relative": 3,
    "relatively": 4,
    "relax": 2,
    "release": 2,
    "released": 2,
    "relevant": 3,
    "reliability": 6,
    "reliable": 4,
    "relief": 2,
    "relieve": 2,
    "religion": 3,
    "religious": 3,
    "reluctant": 3,
    "rely": 2,
    "remain": 2,
    "remaining": 2,
    "remark": 2,
    "remarkable": 4,
    "remember": 3,
    "remind": 2,
    "reminder": 3,
    "remote": 2,
    "removal": 3,
    "remove": 2,
    "removed": 2,
    "render": 2,
    "rent": 1,
    "rental": 2,
    "repair": 2,
    "repeat": 2,
    "repeatedly": 3,
    "replace": 2,
    "replaced": 2,
    "replacement": 3,
    "replied": 2,
    "replies": 2,
    "reply": 2,
    "report": 2,
    "reportedly": 4,
    "reporter": 3,
    "reporting": 2,
    "represent": 3,
    "representation": 5,
    "representative": 5,
    "reproduction": 4,
    "republic": 3,
    "republican": 4,
    "reputation": 4,
    "request": 2,
    "require": 2,
    "required": 2,
    "requirement": 3,
    "requirements": 3,
    "requiring": 3,
    "rescue": 2,
    "research": 2,
    "researcher": 2,
    "resemble": 3,
    "reservation": 4,
    "reserve": 2,
    "reserved": 2,
    "residence": 3,
    "resident": 3,
    "residential": 4,
    "resign": 2,
    "resist": 2,
    "resistance": 3,
    "resistant": 3,
    "resolution": 4,
    "resolve": 2,
    "resort": 2,
    "resource": 2,
    "resources": 3,
    "respect": 2,
    "respective": 3,
    "respectively": 4,
    "respond": 2,
    "respondent": 3,
    "response": 2,
    "responsibilities": 6,
    "responsibility": 6,
    "responsible": 4,
    "rest": 1,
    "restaurant": 3,
    "restoration": 4,
    "restore": 2,
    "restrict": 2,
    "restricted": 3,
    "restriction": 3,
    "restrictions": 3,
    "result": 2,
    "resume": 2,
    "retail": 2,
    "retailer": 2,
    "retain": 2,
    "retire": 2,
    "retired": 2,
    "retirement": 3,
    "retreat": 2,
    "return": 2,
    "reveal": 2,
    "revelation": 4,
    "revenue": 3,
    "reverse": 2,
    "review": 2,
    "reviewer": 3,
    "revised": 2,
    "revision": 3,
    "revolution": 4,
    "revolutionary": 6,
    "reward": 2,
    "rhetoric": 3,
    "rhode": 1,
    "rhyme": 1,
    "rhythm": 1,
    "rib": 1,
    "ribbon": 2,
    "rice": 1,
    "rich": 1,
    "richard": 2,
    "richmond": 2,
    "rick": 1,
    "rico": 2,
    "rid": 1,
    "ride": 1,
    "rider": 2,
    "ridge": 1,
    "ridiculous": 4,
    "riding": 2,
    "rifle": 2,
    "right": 1,
    "rights": 1,
    "rim": 1,
    "ring": 1,
    "rings": 1,
    "riot": 2,
    "rip": 1,
    "rise": 1,
    "rising": 2,
    "risk": 1,
    "risks": 1,
    "risky": 2,
    "ritual": 3,
    "rival": 2,
    "river": 2,
    "rivers": 2,
    "road": 1,
    "roads": 1,
    "roar": 1,
    "robert": 2,
    "roberts": 2,
    "robin": 2,
    "robinson": 3,
    "robot": 2,
    "rock": 1,
    "rocket": 2,
    "rocks": 1,
    "rocky": 2,
    "rod": 1,
    "roger": 2,
    "role": 1,
    "roles": 1,
    "roll": 1,
    "rolling": 2,
    "roman": 2,
    "romance": 2,
    "romania": 4,
    "romantic": 3,
    "rome": 1,
    "roof": 1,
    "room": 1,
    "rooms": 1,
    "root": 1,
    "roots": 1,
    "rope": 1,
    "rose": 1,
    "roses": 2,
    "ross": 1,
    "rough": 1,
    "roughly": 1,
    "roulette": 2,
    "round": 1,
    "route": 1,
    "router": 2,
    "routine": 2,
    "routinely": 2,
    "row": 1,
    "royal": 2,
    "rub": 1,
    "rubbed": 1,
    "rubber": 2,
    "rugby": 2,
    "ruin": 2,
    "rule": 1,
    "ruler": 2,
    "rules": 1,
    "ruling": 2,
    "rumor": 2,
    "run": 1,
    "runner": 2,
    "running": 2,
    "runs": 1,
    "rural": 2,
    "rush": 1,
    "russell": 2,
    "russia": 2,
    "russian": 2,
    "ryan": 2,
    "sack": 1,
    "sacramento": 4,
    "sacred": 2,
    "sacrifice": 3,
    "sad": 1,
    "saddle": 2,
    "safari": 3,
    "safe": 1,
    "safely": 1,
    "safety": 2,
    "said": 1,
    "sail": 1,
    "saint": 1,
    "sake": 1,
    "salad": 2,
    "salary": 3,
    "sale": 1,
    "sales": 1,
    "sally": 2,
    "salmon": 2,
    "salt": 1,
    "sam": 1,
    "same": 1,
    "sample": 2,
    "sampling": 2,
    "samuel": 3,
    "sanction": 2,
    "sand": 1,
    "sandwich": 2,
    "sang": 1,
    "santa": 2,
    "sarah": 2,
    "sat": 1,
    "satellite": 3,
    "satellites": 3,
    "satisfaction": 4,
    "satisfied": 3,
    "satisfy": 3,
    "saturday": 3,
    "sauce": 1,
    "saudi": 2,
    "save": 1,
    "saved": 1,
    "saving": 2,
    "saw": 1,
    "say": 1,
    "saying": 2,
    "says": 1,
    "scale": 1,
    "scan": 1,
    "scandal": 2,
    "scanner": 2,
    "scare": 1,
    "scared": 1,
    "scary": 2,
    "scatter": 2,
    "scenario": 4,
    "scene": 1,
    "scenes": 1,
    "scent": 1,
    "schedule": 2,
    "scheduled": 2,
    "scheme": 1,
    "scholar": 2,
    "scholarship": 3,
    "school": 1,
    "schools": 1,
    "science": 2,
    "sciences": 3,
    "scientific": 4,
    "scientist": 3,
    "scientists": 3,
    "scope": 1,
    "score": 1,
    "scores": 1,
    "scotland": 2,
    "scott": 1,
    "scottish": 2,
    "scramble": 2,
    "scratch": 1,
    "scream": 1,
    "screeched": 1,
    "screen": 1,
    "screening": 2,
    "screw": 1,
    "script": 1,
    "scripts": 1,
    "sculpture": 2,
    "sea": 1,
    "seal": 1,
    "sealed": 1,
    "sean": 1,
    "search": 1,
    "searching": 2,
    "season": 2,
    "seasons": 2,
    "seat": 1,
    "seats": 1,
    "seattle": 3,
    "second": 2,
    "secondary": 4,
    "secret": 2,
    "secretary": 4,
    "section": 2,
    "sector": 2,
    "secular": 3,
    "secure": 2,
    "secured": 2,
    "securities": 4,
    "security": 4,
    "see": 1,
    "seed": 1,
    "seeds": 1,
    "seeing": 2,
    "seek": 1,
    "seeker": 2,
    "seeking": 2,
    "seem": 1,
    "seemed": 1,
    "seemingly": 2,
    "seems": 1,
    "seen": 1,
    "segment": 2,
    "seize": 1,
    "seldom": 2,
    "select": 2,
    "selected": 2,
    "selection": 3,
    "self": 1,
    "self-esteem": 2,
    "sell": 1,
    "seller": 2,
    "selling": 2,
    "semester": 3,
    "semi": 2,
    "seminar": 3,
    "senate": 2,
    "senator": 3,
    "send": 1,
    "sending": 2,
    "senior": 2,
    "sensation": 3,
    "sense": 1,
    "sensitive": 3,
    "sensitivity": 5,
    "sensor": 2,
    "sent": 1,
    "sentence": 2,
    "sentiment": 3,
    "separate": 3,
    "separation": 4,
    "september": 3,
    "sequence": 2,
    "serial": 3,
    "series": 2,
    "serious": 3,
    "seriously": 4,
    "servant": 2,
    "serve": 1,
    "served": 1,
    "server": 2,
    "serves": 1,
    "service": 2,
    "services": 3,
    "serving": 2,
    "session": 2,
    "sessions": 2,
    "set": 1,
    "sets": 1,
    "setting": 2,
    "settle": 2,
    "settlement": 3,
    "settlers": 2,
    "setup": 2,
    "seven": 2,
    "seventh": 2,
    "several": 3,
    "severe": 2,
    "severely": 2,
    "sex": 1,
    "sexual": 3,
    "sexuality": 5,
    "sexually": 3,
    "sexy": 2,
    "shade": 1,
    "shadow": 2,
    "shake": 1,
    "shaking": 2,
    "shall": 1,
    "shallow": 2,
    "shame": 1,
    "shape": 1,
    "share": 1,
    "shared": 1,
    "shareholder": 3,
    "shares": 1,
    "shareware": 2,
    "sharing": 2,
    "shark": 1,
    "sharon": 2,
    "sharp": 1,
    "sharply": 1,
    "shaved": 1,
    "she": 1,
    "shed": 1,
    "sheep": 1,
    "sheer": 1,
    "sheet": 1,
    "sheets": 1,
    "shelf": 1,
    "shell": 1,
    "shells": 1,
    "shelter": 2,
    "shift": 1,
    "shine": 1,
    "shinning": 1,
    "ship": 1,
    "shipped": 1,
    "shipping": 2,
    "ships": 1,
    "shirt": 1,
    "shirts": 1,
    "shit": 1,
    "shock": 1,
    "shoe": 1,
    "shoes": 1,
    "shoot": 1,
    "shooting": 1,
    "shop": 1,
    "shopper": 2,
    "shopping": 2,
    "shops": 1,
    "shore": 1,
    "short": 1,
    "short-term": 1,
    "shortage": 2,
    "shorter": 2,
    "shortly": 2,
    "shorts": 1,
    "shot": 1,
    "shots": 1,
    "should": 1,
    "shoulder": 2,
    "shout": 1,
    "shove": 1,
    "show": 1,
    "showed": 1,
    "shower": 2,
    "showing": 2,
    "shown": 1,
    "shows": 1,
    "shrimp": 1,
    "shrink": 1,
    "shrug": 1,
    "shut": 1,
    "shuttle": 2,
    "shy": 1,
    "sibling": 2,
    "sick": 1,
    "side": 1,
    "sides": 1,
    "sidewalk": 2,
    "siemens": 2,
    "sierra": 3,
    "sigh": 1,
    "sight": 1,
    "sigma": 2,
    "sign": 1,
    "signal": 2,
    "signature": 3,
    "signed": 1,
    "significance": 4,
    "significant": 4,
    "significantly": 4,
    "signs": 1,
    "silence": 2,
    "silent": 2,
    "silk": 1,
    "silly": 2,
    "silver": 2,
    "similar": 3,
    "similarity": 5,
    "similarly": 3,
    "simon": 2,
    "simple": 2,
    "simplest": 2,
    "simply": 2,
    "simpson": 2,
    "simulation": 4,
    "simultaneously": 5,
    "sin": 1,
    "since": 1,
    "sing": 1,
    "singapore": 3,
    "singer": 2,
    "single": 2,
    "singles": 2,
    "sink": 1,
    "sir": 1,
    "sister": 2,
    "sit": 1,
    "site": 1,
    "sites": 1,
    "sitting": 2,
    "situation": 4,
    "six": 1,
    "sixth": 1,
    "size": 1,
    "sized": 1,
    "ski": 1,
    "skiing": 2,
    "skill": 1,
    "skilled": 1,
    "skills": 1,
    "skin": 1,
    "skip": 1,
    "skirt": 1,
    "skull": 1,
    "sky": 1,
    "skype": 1,
    "slabs": 1,
    "slam": 1,
    "slap": 1,
    "slave": 1,
    "slavery": 3,
    "sleep": 1,
    "sleeping": 2,
    "sleeve": 1,
    "slept": 1,
    "slice": 1,
    "slide": 1,
    "slight": 1,
    "slightly": 1,
    "slip": 1,
    "slipped": 1,
    "slope": 1,
    "slot": 1,
    "slots": 1,
    "slow": 1,
    "slowly": 2,
    "small": 1,
    "smaller": 1,
    "smallest": 1,
    "smart": 1,
    "smell": 1,
    "smile": 1,
    "smith": 1,
    "smoke": 1,
    "smoking": 2,
    "smooth": 1,
    "snake": 1,
    "snap": 1,
    "sneak": 1,
    "snow": 1,
    "so": 1,
    "so-called": 1,
    "soak": 1,
    "soap": 1,
    "soar": 1,
    "soccer": 2,
    "social": 2,
    "socially": 2,
    "society": 4,
    "sock": 1,
    "socket": 2,
    "sodium": 3,
    "sofa": 2,
    "soft": 1,
    "soften": 2,
    "softly": 1,
    "software": 2,
    "soil": 1,
    "solar": 2,
    "sold": 1,
    "soldier": 2,
    "sole": 1,
    "solely": 2,
    "solid": 2,
    "solo": 2,
    "solution": 3,
    "solve": 1,
    "some": 1,
    "somebody": 3,
    "someday": 2,
    "somehow": 2,
    "someone": 2,
    "something": 2,
    "sometime": 2,
    "sometimes": 2,
    "somewhat": 2,
    "somewhere": 2,
    "son": 1,
    "song": 1,
    "songs": 1,
    "sons": 1,
    "soon": 1,
    "sophisticated": 5,
    "sorry": 2,
    "sort": 1,
    "sorted": 2,
    "sought": 1,
    "soul": 1,
    "sound": 1,
    "sounds": 1,
    "soup": 1,
    "source": 1,
    "south": 1,
    "southeast": 2,
    "southern": 2,
    "southwest": 2,
    "sovereignty": 4,
    "soviet": 3,
    "space": 1,
    "spaces": 2,
    "spain": 1,
    "spam": 1,
    "spanish": 2,
    "spanking": 2,
    "spare": 1,
    "spark": 1,
    "speak": 1,
    "speaker": 2,
    "speaking": 2,
    "special": 2,
    "specialist": 3,
    "specialize": 3,
    "specials": 1,
    "specialty": 3,
    "species": 2,
    "specific": 3,
    "specifically": 5,
    "specification": 5,
    "specified": 3,
    "specify": 3,
    "specs": 1,
    "spectacular": 4,
    "spectrum": 2,
    "speculate": 3,
    "speculation": 4,
    "speech": 1,
    "speed": 1,
    "spell": 1,
    "spend": 1,
    "spending": 1,
    "spent": 1,
    "sphere": 1,
    "spider": 2,
    "spill": 1,
    "spin": 1,
    "spine": 1,
    "spirit": 2,
    "spiritual": 4,
    "spit": 1,
    "spite": 1,
    "split": 1,
    "spoke": 1,
    "spoken": 2,
    "spokesman": 2,
    "sponsor": 2,
    "spoon": 1,
    "sport": 1,
    "sporting": 2,
    "sports": 1,
    "spot": 1,
    "spouse": 1,
    "spray": 1,
    "spread": 1,
    "spring": 1,
    "springs": 1,
    "sprinkle": 2,
    "spy": 1,
    "squad": 1,
    "square": 1,
    "squeeze": 1,
    "st.": 1,
    "stability": 4,
    "stable": 2,
    "stack": 1,
    "stadium": 3,
    "staff": 1,
    "stage": 1,
    "stainless": 2,
    "stair": 1,
    "stairs": 1,
    "stake": 1,
    "stamps": 1,
    "stance": 1,
    "stand": 1,
    "standard": 2,
    "standards": 2,
    "standing": 2,
    "stands": 1,
    "star": 1,
    "stare": 1,
    "stared": 1,
    "stars": 1,
    "start": 1,
    "started": 2,
    "starter": 2,
    "starting": 1,
    "starts": 1,
    "state": 1,
    "stated": 2,
    "statement": 2,
    "statements": 2,
    "states": 1,
    "static": 2,
    "station": 2,
    "statistical": 4,
    "statistics": 3,
    "stats": 1,
    "statue": 2,
    "status": 2,
    "statute": 2,
    "stay": 1,
    "steadily": 2,
    "steady": 2,
    "steak": 1,
    "steal": 1,
    "steam": 1,
    "steel": 1,
    "steep": 1,
    "steer": 1,
    "stem": 1,
    "stems": 1,
    "step": 1,
    "stephen": 2,
    "stepped": 1,
    "steps": 1,
    "stereo": 3,
    "stereotype": 4,
    "sterling": 2,
    "steve": 1,
    "steven": 2,
    "stewart": 2,
    "stick": 1,
    "stiff": 1,
    "still": 1,
    "stimulate": 3,
    "stimulus": 3,
    "stir": 1,
    "stock": 1,
    "stocks": 1,
    "stomach": 2,
    "stone": 1,
    "stones": 1,
    "stood": 1,
    "stop": 1,
    "stopped": 1,
    "storage": 2,
    "store": 1,
    "stored": 1,
    "stores": 1,
    "stories": 2,
    "storm": 1,
    "story": 2,
    "stove": 1,
    "straight": 1,
    "straighten": 2,
    "strain": 1,
    "strange": 1,
    "stranger": 2,
    "strap": 1,
    "strategic": 3,
    "strategies": 3,
    "strategy": 3,
    "straw": 1,
    "streak": 1,
    "stream": 1,
    "street": 1,
    "streets": 1,
    "strength": 1,
    "strengthen": 2,
    "stress": 1,
    "stretch": 1,
    "strict": 1,
    "strictly": 2,
    "strike": 1,
    "striking": 2,
    "string": 1,
    "strings": 1,
    "strip": 1,
    "stroke": 1,
    "strong": 1,
    "stronger": 1,
    "strongly": 2,
    "struck": 1,
    "struct": 1,
    "structural": 3,
    "structure": 2,
    "struggle": 2,
    "stuck": 1,
    "student": 2,
    "students": 2,
    "studied": 2,
    "studies": 2,
    "studio": 3,
    "studios": 3,
    "study": 2,
    "studying": 2,
    "stuff": 1,
    "stumble": 2,
    "stupid": 2,
    "style": 1,
    "styles": 1,
    "subject": 2,
    "sublime": 2,
    "submission": 3,
    "submit": 2,
    "submitted": 3,
    "subscribe": 2,
    "subscriber": 3,
    "subscription": 3,
    "subsection": 3,
    "subsequent": 3,
    "subsidy": 3,
    "substance": 2,
    "substantial": 3,
    "substantially": 3,
    "subtle": 2,
    "suburb": 2,
    "suburban": 3,
    "succeed": 2,
    "success": 2,
    "successful": 3,
    "successfully": 3,
    "such": 1,
    "suck": 1,
    "sudden": 2,
    "suddenly": 2,
    "sue": 1,
    "suffer": 2,
    "suffering": 3,
    "sufficient": 3,
    "sugar": 2,
    "suggest": 2,
    "suggested": 3,
    "suggestion": 3,
    "suggestions": 3,
    "suicide": 3,
    "suit": 1,
    "suitable": 3,
    "suite": 1,
    "suites": 1,
    "sum": 1,
    "summary": 3,
    "summer": 2,
    "summit": 2,
    "sun": 1,
    "sunday": 2,
    "sunlight": 2,
    "sunny": 2,
    "sunset": 2,
    "super": 2,
    "superior": 4,
    "supermarket": 4,
    "supervisor": 4,
    "supper": 2,
    "supplement": 3,
    "supplied": 2,
    "supplier": 2,
    "supplies": 2,
    "supply": 2,
    "support": 2,
    "supporter": 3,
    "supportive": 3,
    "suppose": 2,
    "supposed": 2,
    "supposedly": 2,
    "supreme": 2,
    "sure": 1,
    "surely": 2,
    "surf": 1,
    "surface": 2,
    "surgeon": 2,
    "surgery": 3,
    "surprise": 2,
    "surprised": 2,
    "surprising": 3,
    "surprisingly": 3,
    "surround": 2,
    "surrounded": 2,
    "surrounding": 3,
    "surveillance": 3,
    "survey": 2,
    "surveys": 2,
    "survival": 3,
    "survive": 2,
    "survivor": 3,
    "susan": 2,
    "suspect": 2,
    "suspend": 2,
    "suspension": 3,
    "suspicion": 3,
    "suspicious": 3,
    "sustain": 2,
    "sustainable": 4,
    "swallow": 2,
    "swam": 1,
    "swear": 1,
    "sweat": 1,
    "sweater": 2,
    "sweden": 2,
    "swedish": 2,
    "sweep": 1,
    "sweet": 1,
    "swell": 1,
    "swept": 1,
    "swim": 1,
    "swimming": 2,
    "swing": 1,
    "swiss": 1,
    "switch": 1,
    "switches": 2,
    "switzerland": 3,
    "sword": 1,
    "swung": 1,
    "sydney": 2,
    "syllable": 3,
    "symbol": 2,
    "symbolic": 3,
    "sympathy": 3,
    "symptom": 2,
    "symptoms": 2,
    "syndrome": 2,
    "syntax": 2,
    "system": 2,
    "t-shirt": 1,
    "table": 2,
    "tablespoon": 3,
    "tabs": 1,
    "tackle": 2,
    "tactic": 2,
    "tag": 1,
    "tags": 1,
    "tail": 1,
    "taiwan": 2,
    "take": 1,
    "taken": 2,
    "takes": 1,
    "taking": 2,
    "tale": 1,
    "talent": 2,
    "talented": 3,
    "tales": 1,
    "talk": 1,
    "talking": 2,
    "talks": 1,
    "tall": 1,
    "tampa": 2,
    "tank": 1,
    "tap": 1,
    "tape": 1,
    "target": 2,
    "targeted": 3,
    "task": 1,
    "tasks": 1,
    "taste": 1,
    "taught": 1,
    "tax": 1,
    "taxes": 2,
    "taxpayer": 3,
    "taylor": 2,
    "tea": 1,
    "teach": 1,
    "teacher": 2,
    "teaching": 2,
    "team": 1,
    "teammate": 2,
    "teams": 1,
    "tear": 1,
    "tears": 1,
    "teaspoon": 2,
    "tech": 1,
    "technical": 3,
    "technician": 3,
    "technique": 2,
    "technological": 5,
    "technology": 4,
    "teen": 1,
    "teenage": 2,
    "teenager": 3,
    "teens": 1,
    "teeth": 1,
    "telecom": 3,
    "telecommunications": 7,
    "telephone": 3,
    "telescope": 3,
    "television": 4,
    "tell": 1,
    "telling": 2,
    "tells": 1,
    "temp": 1,
    "temperature": 4,
    "template": 2,
    "temple": 2,
    "temporary": 4,
    "ten": 1,
    "tend": 1,
    "tendency": 3,
    "tender": 2,
    "tennessee": 3,
    "tennis": 2,
    "tension": 2,
    "tent": 1,
    "term": 1,
    "terminal": 3,
    "terms": 1,
    "terrain": 2,
    "terrible": 3,
    "terribly": 3,
    "terrific": 3,
    "territory": 4,
    "terror": 2,
    "terrorism": 3,
    "terrorist": 3,
    "terry": 2,
    "test": 1,
    "tested": 2,
    "testify": 3,
    "testimony": 4,
    "testing": 1,
    "tests": 1,
    "texas": 2,
    "text": 1,
    "textbook": 2,
    "textbooks": 2,
    "texts": 1,
    "texture": 2,
    "thai": 1,
    "thailand": 2,
    "than": 1,
    "thank": 1,
    "thanks": 1,
    "thanksgiving": 3,
    "that": 1,
    "the": 1,
    "theater": 3,
    "theatre": 3,
    "thee": 1,
    "theft": 1,
    "their": 1,
    "them": 1,
    "theme": 1,
    "themes": 1,
    "themselves": 2,
    "then": 1,
    "theological": 5,
    "theology": 4,
    "theoretical": 5,
    "theory": 3,
    "therapist": 3,
    "therapy": 3,
    "there": 1,
    "thereby": 2,
    "therefore": 2,
    "thermal": 2,
    "these": 1,
    "they": 1,
    "thick": 1,
    "thigh": 1,
    "thin": 1,
    "thing": 1,
    "things": 1,
    "think": 1,
    "thinking": 2,
    "third": 1,
    "thirty": 2,
    "this": 1,
    "thomas": 2,
    "thompson": 2,
    "thongs": 1,
    "thoroughly": 3,
    "those": 1,
    "thou": 1,
    "though": 1,
    "thought": 1,
    "thoughts": 1,
    "thousand": 2,
    "thousands": 2,
    "thread": 1,
    "threads": 1,
    "threat": 1,
    "threaten": 2,
    "three": 1,
    "threshold": 2,
    "threw": 1,
    "thrive": 1,
    "throat": 1,
    "through": 1,
    "throughout": 2,
    "throw": 1,
    "thrown": 1,
    "thumb": 1,
    "thumbnail": 2,
    "thumbs": 1,
    "thursday": 2,
    "thus": 1,
    "thy": 1,
    "ticket": 2,
    "tickets": 2,
    "tide": 1,
    "tie": 1,
    "tiffany": 3,
    "tiger": 2,
    "tight": 1,
    "tighten": 2,
    "tightly": 1,
    "tile": 1,
    "till": 1,
    "tim": 1,
    "timber": 2,
    "time": 1,
    "times": 1,
    "timing": 2,
    "tin": 1,
    "tiny": 2,
    "tion": 1,
    "tip": 1,
    "tips": 1,
    "tire": 1,
    "tired": 1,
    "tissue": 2,
    "title": 2,
    "tits": 1,
    "to": 1,
    "tobacco": 3,
    "today": 2,
    "toe": 1,
    "together": 3,
    "toilet": 2,
    "tokyo": 3,
    "told": 1,
    "tolerance": 3,
    "tolerate": 3,
    "toll": 1,
    "tomato": 3,
    "tomorrow": 3,
    "tone": 1,
    "tones": 1,
    "tongue": 1,
    "tonight": 2,
    "tons": 1,
    "tony": 2,
    "too": 1,
    "took": 1,
    "tool": 1,
    "tools": 1,
    "tooth": 1,
    "top": 1,
    "topic": 2,
    "tops": 1,
    "torn": 1,
    "toronto": 3,
    "torture": 2,
    "toss": 1,
    "total": 2,
    "totally": 3,
    "touch": 1,
    "touchdown": 2,
    "tough": 1,
    "tour": 1,
    "tourism": 2,
    "tourist": 2,
    "tournament": 3,
    "tours": 1,
    "toward": 2,
    "towards": 2,
    "towel": 2,
    "tower": 2,
    "town": 1,
    "towns": 1,
    "township": 2,
    "toxic": 2,
    "toy": 1,
    "toys": 1,
    "trace": 1,
    "track": 1,
    "tracked": 1,
    "tracker": 2,
    "tracking": 2,
    "tracks": 1,
    "trade": 1,
    "trademark": 2,
    "trading": 1,
    "tradition": 3,
    "traditional": 4,
    "traditionally": 4,
    "traffic": 2,
    "tragedy": 3,
    "tragic": 2,
    "trail": 1,
    "trailer": 2,
    "train": 1,
    "trained": 1,
    "trainer": 2,
    "training": 2,
    "trait": 1,
    "trans": 1,
    "transaction": 3,
    "transfer": 2,
    "transferred": 2,
    "transform": 2,
    "transformation": 4,
    "transit": 2,
    "transition": 3,
    "translate": 2,
    "translation": 3,
    "transmission": 3,
    "transmit": 2,
    "transport": 2,
    "transportation": 4,
    "trap": 1,
    "trash": 1,
    "trauma": 2,
    "travel": 2,
    "traveler": 3,
    "tray": 1,
    "treasure": 2,
    "treat": 1,
    "treated": 1,
    "treatment": 2,
    "treaty": 2,
    "tree": 1,
    "trees": 1,
    "tremendous": 3,
    "trend": 1,
    "trends": 1,
    "trial": 2,
    "triangle": 3,
    "tribal": 2,
    "tribe": 1,
    "trick": 1,
    "tried": 1,
    "trigger": 2,
    "trim": 1,
    "trip": 1,
    "triple": 2,
    "trips": 1,
    "triumph": 2,
    "troop": 1,
    "troops": 1,
    "tropical": 3,
    "trouble": 2,
    "troubled": 2,
    "truck": 1,
    "trucks": 1,
    "true": 1,
    "truly": 2,
    "trunk": 1,
    "trust": 1,
    "trusted": 2,
    "truth": 1,
    "try": 1,
    "trying": 2,
    "tube": 1,
    "tuck": 1,
    "tuesday": 2,
    "tumor": 2,
    "tune": 1,
    "tunnel": 2,
    "turkey": 2,
    "turkish": 2,
    "turn": 1,
    "turned": 1,
    "turning": 2,
    "turns": 1,
    "tutorial": 4,
    "tv": 1,
    "twelve": 1,
    "twentieth": 3,
    "twenty": 2,
    "twice": 1,
    "twin": 1,
    "twist": 1,
    "two": 1,
    "type": 1,
    "types": 1,
    "typical": 3,
    "typically": 3,
    "ugly": 2,
    "uh": 1,
    "ukraine": 2,
    "ultimate": 3,
    "ultimately": 3,
    "ultra": 2,
    "unable": 3,
    "uncertain": 3,
    "uncertainty": 4,
    "uncle": 2,
    "uncomfortable": 5,
    "uncover": 3,
    "under": 2,
    "undergo": 3,
    "undergraduate": 5,
    "underground": 3,
    "underline": 3,
    "underlying": 4,
    "undermine": 3,
    "understand": 3,
    "understanding": 4,
    "understood": 3,
    "undertake": 3,
    "unemployment": 4,
    "unexpected": 4,
    "unfair": 2,
    "unfold": 2,
    "unfortunately": 4,
    "unhappy": 3,
    "uniform": 3,
    "union": 2,
    "unique": 2,
    "unit": 2,
    "unite": 2,
    "united": 3,
    "unity": 3,
    "universal": 4,
    "universe": 3,
    "universities": 5,
    "university": 5,
    "unix": 2,
    "unknown": 2,
    "unless": 2,
    "unlike": 2,
    "unlikely": 3,
    "unlimited": 4,
    "unprecedented": 5,
    "until": 2,
    "unusual": 4,
    "up": 1,
    "upcoming": 3,
    "update": 2,
    "updated": 3,
    "upgrade": 2,
    "upload": 2,
    "upon": 2,
    "upper": 2,
    "upset": 2,
    "upstairs": 2,
    "upward": 2,
    "urban": 2,
    "urge": 1,
    "us": 1,
    "usage": 2,
    "use": 1,
    "used": 1,
    "useful": 2,
    "user": 2,
    "username": 3,
    "uses": 2,
    "using": 1,
    "usual": 3,
    "usually": 3,
    "utah": 2,
    "utilities": 4,
    "utility": 4,
    "utilize": 3,
    "vacation": 3,
    "vaccine": 2,
    "vacuum": 3,
    "valentine": 3,
    "valid": 2,
    "validity": 4,
    "valley": 2,
    "valuable": 4,
    "value": 2,
    "valve": 1,
    "van": 1,
    "vancouver": 3,
    "vanish": 2,
    "vapor": 2,
    "variable": 4,
    "variation": 4,
    "variety": 4,
    "various": 3,
    "vary": 2,
    "vast": 1,
    "vector": 2,
    "vegetable": 4,
    "vehicle": 3,
    "vehicles": 3,
    "vendor": 2,
    "venture": 2,
    "venue": 2,
    "verb": 1,
    "verbal": 2,
    "verdict": 2,
    "verification": 5,
    "verify": 3,
    "vermont": 2,
    "version": 2,
    "versus": 2,
    "vertical": 3,
    "very": 2,
    "vessel": 2,
    "vessels": 2,
    "veteran": 3,
    "via": 2,
    "viagra": 3,
    "vice": 1,
    "victim": 2,
    "victoria": 4,
    "victory": 3,
    "video": 3,
    "vietnam": 3,
    "view": 1,
    "viewed": 1,
    "viewer": 2,
    "viewing": 2,
    "views": 1,
    "villa": 2,
    "village": 2,
    "vincent": 2,
    "vintage": 2,
    "vinyl": 2,
    "violate": 3,
    "violation": 4,
    "violence": 3,
    "violent": 3,
    "virgin": 2,
    "virginia": 3,
    "virtual": 3,
    "virtually": 4,
    "virtue": 2,
    "virus": 2,
    "visa": 2,
    "visible": 3,
    "vision": 2,
    "visit": 2,
    "visiting": 3,
    "visitor": 3,
    "vista": 2,
    "visual": 3,
    "vital": 2,
    "vitamin": 3,
    "vocal": 2,
    "voice": 1,
    "voices": 2,
    "void": 1,
    "voip": 4,
    "voltage": 2,
    "volume": 2,
    "voluntary": 4,
    "volunteer": 3,
    "vote": 1,
    "voter": 2,
    "votes": 1,
    "voting": 1,
    "vowel": 2,
    "voyage": 2,
    "voyeur": 2,
    "vs": 1,
    "vulnerable": 4,
    "wage": 1,
    "wagon": 2,
    "waist": 1,
    "wait": 1,
    "waiting": 2,
    "wake": 1,
    "wales": 1,
    "walk": 1,
    "walker": 2,
    "walking": 2,
    "wall": 1,
    "wallpaper": 3,
    "wallpapers": 3,
    "walls": 1,
    "walter": 2,
    "wander": 2,
    "want": 1,
    "wants": 1,
    "war": 1,
    "ward": 1,
    "warehouse": 2,
    "warm": 1,
    "warmth": 1,
    "warn": 1,
    "warner": 2,
    "warning": 2,
    "warranty": 3,
    "warren": 2,
    "warrior": 3,
    "wars": 1,
    "was": 1,
    "wash": 1,
    "washington": 3,
    "waste": 1,
    "watch": 1,
    "watches": 2,
    "watching": 2,
    "water": 2,
    "waters": 2,
    "wave": 1,
    "waves": 1,
    "way": 1,
    "wayne": 1,
    "ways": 1,
    "we": 1,
    "weak": 1,
    "weaken": 2,
    "weakness": 2,
    "wealth": 1,
    "wealthy": 2,
    "weapon": 2,
    "wear": 1,
    "wearing": 2,
    "weather": 2,
    "weave": 1,
    "web": 1,
    "webcam": 2,
    "webmaster": 3,
    "website": 2,
    "wedding": 2,
    "wednesday": 2,
    "weed": 1,
    "week": 1,
    "weekend": 2,
    "weekly": 2,
    "weeks": 1,
    "weigh": 1,
    "weight": 1,
    "weird": 1,
    "welcome": 2,
    "welfare": 2,
    "well": 1,
    "well-being": 2,
    "well-known": 1,
    "wells": 1,
    "went": 1,
    "were": 1,
    "west": 1,
    "western": 2,
    "wet": 1,
    "whale": 1,
    "what": 1,
    "whatever": 3,
    "wheat": 1,
    "wheel": 1,
    "wheelchair": 2,
    "wheels": 1,
    "when": 1,
    "whenever": 3,
    "where": 1,
    "whereas": 2,
    "wherever": 3,
    "whether": 2,
    "which": 1,
    "while": 1,
    "whip": 1,
    "whisper": 2,
    "whispered": 2,
    "whistle": 2,
    "white": 1,
    "who": 1,
    "whoever": 3,
    "whole": 1,
    "wholesale": 2,
    "whom": 1,
    "whose": 1,
    "why": 1,
    "wide": 1,
    "widely": 2,
    "widespread": 2,
    "widow": 2,
    "width": 1,
    "wife": 1,
    "wild": 1,
    "wilderness": 3,
    "wildlife": 2,
    "will": 1,
    "william": 2,
    "williams": 2,
    "willing": 2,
    "willingness": 2,
    "wilson": 2,
    "win": 1,
    "wind": 1,
    "window": 2,
    "windows": 2,
    "winds": 1,
    "wine": 1,
    "wing": 1,
    "wings": 1,
    "winner": 2,
    "winning": 2,
    "wins": 1,
    "winter": 2,
    "wipe": 1,
    "wire": 1,
    "wireless": 2,
    "wisconsin": 3,
    "wisdom": 2,
    "wise": 1,
    "wish": 1,
    "with": 1,
    "withdraw": 2,
    "withdrawal": 3,
    "within": 2,
    "without": 2,
    "witness": 2,
    "wizard": 2,
    "wolf": 1,
    "woman": 2,
    "women": 2,
    "won": 1,
    "wonder": 2,
    "wonderful": 3,
    "wood": 1,
    "wooden": 2,
    "woods": 1,
    "wool": 1,
    "word": 1,
    "words": 1,
    "wore": 1,
    "work": 1,
    "worked": 1,
    "worker": 2,
    "workforce": 2,
    "working": 2,
    "workout": 2,
    "workplace": 2,
    "works": 1,
    "workshop": 2,
    "world": 1,
    "worldwide": 2,
    "worried": 2,
    "worry": 2,
    "worse": 1,
    "worship": 2,
    "worst": 1,
    "worth": 1,
    "would": 1,
    "wound": 1,
    "wow": 1,
    "wrap": 1,
    "wrapped": 1,
    "wright": 1,
    "wrist": 1,
    "write": 1,
    "writer": 2,
    "writes": 1,
    "writing": 2,
    "written": 2,
    "wrong": 1,
    "wrote": 1,
    "wyoming": 3,
    "xbox": 2,
    "yahoo": 2,
    "yard": 1,
    "yards": 1,
    "yeah": 1,
    "year": 1,
    "years": 1,
    "yell": 1,
    "yellow": 2,
    "yes": 1,
    "yesterday": 3,
    "yet": 1,
    "yield": 1,
    "yoga": 2,
    "york": 1,
    "you": 1,
    "young": 1,
    "younger": 2,
    "youngster": 2,
    "your": 1,
    "yours": 1,
    "yourself": 2,
    "youth": 1,
    "zealand": 2,
    "zero": 2,
    "zone": 1,
    "zoo": 1,
    "zoom": 1
}

In [None]:
"""
Dale-Chall word list.
"""
DALE_CHALL_WORDS = (
    "a",
    "able",
    "aboard",
    "about",
    "above",
    "absent",
    "accept",
    "accident",
    "account",
    "ache",
    "aching",
    "acorn",
    "acre",
    "across",
    "act",
    "acts",
    "add",
    "address",
    "admire",
    "adventure",
    "afar",
    "afraid",
    "after",
    "afternoon",
    "afterward",
    "afterwards",
    "again",
    "against",
    "age",
    "aged",
    "ago",
    "agree",
    "ah",
    "ahead",
    "aid",
    "aim",
    "air",
    "airfield",
    "airplane",
    "airport",
    "airship",
    "airy",
    "alarm",
    "alike",
    "alive",
    "all",
    "alley",
    "alligator",
    "allow",
    "almost",
    "alone",
    "along",
    "aloud",
    "already",
    "also",
    "always",
    "am",
    "America",
    "American",
    "among",
    "amount",
    "an",
    "and",
    "angel",
    "anger",
    "angry",
    "animal",
    "another",
    "answer",
    "ant",
    "any",
    "anybody",
    "anyhow",
    "anyone",
    "anything",
    "anyway",
    "anywhere",
    "apart",
    "apartment",
    "ape",
    "apiece",
    "appear",
    "apple",
    "April",
    "apron",
    "are",
    "aren't",
    "arise",
    "arithmetic",
    "arm",
    "armful",
    "army",
    "arose",
    "around",
    "arrange",
    "arrive",
    "arrived",
    "arrow",
    "art",
    "artist",
    "as",
    "ash",
    "ashes",
    "aside",
    "ask",
    "asleep",
    "at",
    "ate",
    "attack",
    "attend",
    "attention",
    "August",
    "aunt",
    "author",
    "auto",
    "automobile",
    "autumn",
    "avenue",
    "awake",
    "awaken",
    "away",
    "awful",
    "awfully",
    "awhile",
    "ax",
    "axe",
    "baa",
    "babe",
    "babies",
    "back",
    "background",
    "backward",
    "backwards",
    "bacon",
    "bad",
    "badge",
    "badly",
    "bag",
    "bake",
    "baker",
    "bakery",
    "baking",
    "ball",
    "balloon",
    "banana",
    "band",
    "bandage",
    "bang",
    "banjo",
    "bank",
    "banker",
    "bar",
    "barber",
    "bare",
    "barefoot",
    "barely",
    "bark",
    "barn",
    "barrel",
    "base",
    "baseball",
    "basement",
    "basket",
    "bat",
    "batch",
    "bath",
    "bathe",
    "bathing",
    "bathroom",
    "bathtub",
    "battle",
    "battleship",
    "bay",
    "be",
    "beach",
    "bead",
    "beam",
    "bean",
    "bear",
    "beard",
    "beast",
    "beat",
    "beating",
    "beautiful",
    "beautify",
    "beauty",
    "became",
    "because",
    "become",
    "becoming",
    "bed",
    "bedbug",
    "bedroom",
    "bedspread",
    "bedtime",
    "bee",
    "beech",
    "beef",
    "beefsteak",
    "beehive",
    "been",
    "beer",
    "beet",
    "before",
    "beg",
    "began",
    "beggar",
    "begged",
    "begin",
    "beginning",
    "begun",
    "behave",
    "behind",
    "being",
    "believe",
    "bell",
    "belong",
    "below",
    "belt",
    "bench",
    "bend",
    "beneath",
    "bent",
    "berries",
    "berry",
    "beside",
    "besides",
    "best",
    "bet",
    "better",
    "between",
    "bib",
    "bible",
    "bicycle",
    "bid",
    "big",
    "bigger",
    "bill",
    "billboard",
    "bin",
    "bind",
    "bird",
    "birth",
    "birthday",
    "biscuit",
    "bit",
    "bite",
    "biting",
    "bitter",
    "black",
    "blackberry",
    "blackbird",
    "blackboard",
    "blackness",
    "blacksmith",
    "blame",
    "blank",
    "blanket",
    "blast",
    "blaze",
    "bleed",
    "bless",
    "blessing",
    "blew",
    "blind",
    "blindfold",
    "blinds",
    "block",
    "blood",
    "bloom",
    "blossom",
    "blot",
    "blow",
    "blue",
    "blueberry",
    "bluebird",
    "blush",
    "board",
    "boast",
    "boat",
    "bob",
    "bobwhite",
    "bodies",
    "body",
    "boil",
    "boiler",
    "bold",
    "bone",
    "bonnet",
    "boo",
    "book",
    "bookcase",
    "bookkeeper",
    "boom",
    "boot",
    "born",
    "borrow",
    "boss",
    "both",
    "bother",
    "bottle",
    "bottom",
    "bought",
    "bounce",
    "bow",
    "bowl",
    "bow-wow",
    "box",
    "boxcar",
    "boxer",
    "boxes",
    "boy",
    "boyhood",
    "bracelet",
    "brain",
    "brake",
    "bran",
    "branch",
    "brass",
    "brave",
    "bread",
    "break",
    "breakfast",
    "breast",
    "breath",
    "breathe",
    "breeze",
    "brick",
    "bride",
    "bridge",
    "bright",
    "brightness",
    "bring",
    "broad",
    "broadcast",
    "broke",
    "broken",
    "brook",
    "broom",
    "brother",
    "brought",
    "brown",
    "brush",
    "bubble",
    "bucket",
    "buckle",
    "bud",
    "buffalo",
    "bug",
    "buggy",
    "build",
    "building",
    "built",
    "bulb",
    "bull",
    "bullet",
    "bum",
    "bumblebee",
    "bump",
    "bun",
    "bunch",
    "bundle",
    "bunny",
    "burn",
    "burst",
    "bury",
    "bus",
    "bush",
    "bushel",
    "business",
    "busy",
    "but",
    "butcher",
    "butt",
    "butter",
    "buttercup",
    "butterfly",
    "buttermilk",
    "butterscotch",
    "button",
    "buttonhole",
    "buy",
    "buzz",
    "by",
    "bye",
    "cab",
    "cabbage",
    "cabin",
    "cabinet",
    "cackle",
    "cage",
    "cake",
    "calendar",
    "calf",
    "call",
    "caller",
    "calling",
    "came",
    "camel",
    "camp",
    "campfire",
    "can",
    "canal",
    "canary",
    "candle",
    "candlestick",
    "candy",
    "cane",
    "cannon",
    "cannot",
    "canoe",
    "can't",
    "canyon",
    "cap",
    "cape",
    "capital",
    "captain",
    "car",
    "card",
    "cardboard",
    "care",
    "careful",
    "careless",
    "carelessness",
    "carload",
    "carpenter",
    "carpet",
    "carriage",
    "carrot",
    "carry",
    "cart",
    "carve",
    "case",
    "cash",
    "cashier",
    "castle",
    "cat",
    "catbird",
    "catch",
    "catcher",
    "caterpillar",
    "catfish",
    "catsup",
    "cattle",
    "caught",
    "cause",
    "cave",
    "ceiling",
    "cell",
    "cellar",
    "cent",
    "center",
    "cereal",
    "certain",
    "certainly",
    "chain",
    "chair",
    "chalk",
    "champion",
    "chance",
    "change",
    "chap",
    "charge",
    "charm",
    "chart",
    "chase",
    "chatter",
    "cheap",
    "cheat",
    "check",
    "checkers",
    "cheek",
    "cheer",
    "cheese",
    "cherry",
    "chest",
    "chew",
    "chick",
    "chicken",
    "chief",
    "child",
    "childhood",
    "children",
    "chill",
    "chilly",
    "chimney",
    "chin",
    "china",
    "chip",
    "chipmunk",
    "chocolate",
    "choice",
    "choose",
    "chop",
    "chorus",
    "chose",
    "chosen",
    "christen",
    "Christmas",
    "church",
    "churn",
    "cigarette",
    "circle",
    "circus",
    "citizen",
    "city",
    "clang",
    "clap",
    "class",
    "classmate",
    "classroom",
    "claw",
    "clay",
    "clean",
    "cleaner",
    "clear",
    "clerk",
    "clever",
    "click",
    "cliff",
    "climb",
    "clip",
    "cloak",
    "clock",
    "close",
    "closet",
    "cloth",
    "clothes",
    "clothing",
    "cloud",
    "cloudy",
    "clover",
    "clown",
    "club",
    "cluck",
    "clump",
    "coach",
    "coal",
    "coast",
    "coat",
    "cob",
    "cobbler",
    "cocoa",
    "coconut",
    "cocoon",
    "cod",
    "codfish",
    "coffee",
    "coffeepot",
    "coin",
    "cold",
    "collar",
    "college",
    "color",
    "colored",
    "colt",
    "column",
    "comb",
    "come",
    "comfort",
    "comic",
    "coming",
    "company",
    "compare",
    "conductor",
    "cone",
    "connect",
    "coo",
    "cook",
    "cooked",
    "cooking",
    "cookie",
    "cookies",
    "cool",
    "cooler",
    "coop",
    "copper",
    "copy",
    "cord",
    "cork",
    "corn",
    "corner",
    "correct",
    "cost",
    "cot",
    "cottage",
    "cotton",
    "couch",
    "cough",
    "could",
    "couldn't",
    "count",
    "counter",
    "country",
    "county",
    "course",
    "court",
    "cousin",
    "cover",
    "cow",
    "coward",
    "cowardly",
    "cowboy",
    "cozy",
    "crab",
    "crack",
    "cracker",
    "cradle",
    "cramps",
    "cranberry",
    "crank",
    "cranky",
    "crash",
    "crawl",
    "crazy",
    "cream",
    "creamy",
    "creek",
    "creep",
    "crept",
    "cried",
    "croak",
    "crook",
    "crooked",
    "crop",
    "cross",
    "crossing",
    "cross-eyed",
    "crow",
    "crowd",
    "crowded",
    "crown",
    "cruel",
    "crumb",
    "crumble",
    "crush",
    "crust",
    "cry",
    "cries",
    "cub",
    "cuff",
    "cup",
    "cuff",
    "cup",
    "cupboard",
    "cupful",
    "cure",
    "curl",
    "curly",
    "curtain",
    "curve",
    "cushion",
    "custard",
    "customer",
    "cut",
    "cute",
    "cutting",
    "dab",
    "dad",
    "daddy",
    "daily",
    "dairy",
    "daisy",
    "dam",
    "damage",
    "dame",
    "damp",
    "dance",
    "dancer",
    "dancing",
    "dandy",
    "danger",
    "dangerous",
    "dare",
    "dark",
    "darkness",
    "darling",
    "darn",
    "dart",
    "dash",
    "date",
    "daughter",
    "dawn",
    "day",
    "daybreak",
    "daytime",
    "dead",
    "deaf",
    "deal",
    "dear",
    "death",
    "December",
    "decide",
    "deck",
    "deed",
    "deep",
    "deer",
    "defeat",
    "defend",
    "defense",
    "delight",
    "den",
    "dentist",
    "depend",
    "deposit",
    "describe",
    "desert",
    "deserve",
    "desire",
    "desk",
    "destroy",
    "devil",
    "dew",
    "diamond",
    "did",
    "didn't",
    "die",
    "died",
    "dies",
    "difference",
    "different",
    "dig",
    "dim",
    "dime",
    "dine",
    "ding-dong",
    "dinner",
    "dip",
    "direct",
    "direction",
    "dirt",
    "dirty",
    "discover",
    "dish",
    "dislike",
    "dismiss",
    "ditch",
    "dive",
    "diver",
    "divide",
    "do",
    "dock",
    "doctor",
    "does",
    "doesn't",
    "dog",
    "doll",
    "dollar",
    "dolly",
    "done",
    "donkey",
    "don't",
    "door",
    "doorbell",
    "doorknob",
    "doorstep",
    "dope",
    "dot",
    "double",
    "dough",
    "dove",
    "down",
    "downstairs",
    "downtown",
    "dozen",
    "drag",
    "drain",
    "drank",
    "draw",
    "drawer",
    "draw",
    "drawing",
    "dream",
    "dress",
    "dresser",
    "dressmaker",
    "drew",
    "dried",
    "drift",
    "drill",
    "drink",
    "drip",
    "drive",
    "driven",
    "driver",
    "drop",
    "drove",
    "drown",
    "drowsy",
    "drub",
    "drum",
    "drunk",
    "dry",
    "duck",
    "due",
    "dug",
    "dull",
    "dumb",
    "dump",
    "during",
    "dust",
    "dusty",
    "duty",
    "dwarf",
    "dwell",
    "dwelt",
    "dying",
    "each",
    "eager",
    "eagle",
    "ear",
    "early",
    "earn",
    "earth",
    "east",
    "eastern",
    "easy",
    "eat",
    "eaten",
    "edge",
    "egg",
    "eh",
    "eight",
    "eighteen",
    "eighth",
    "eighty",
    "either",
    "elbow",
    "elder",
    "eldest",
    "electric",
    "electricity",
    "elephant",
    "eleven",
    "elf",
    "elm",
    "else",
    "elsewhere",
    "empty",
    "end",
    "ending",
    "enemy",
    "engine",
    "engineer",
    "English",
    "enjoy",
    "enough",
    "enter",
    "envelope",
    "equal",
    "erase",
    "eraser",
    "errand",
    "escape",
    "eve",
    "even",
    "evening",
    "ever",
    "every",
    "everybody",
    "everyday",
    "everyone",
    "everything",
    "everywhere",
    "evil",
    "exact",
    "except",
    "exchange",
    "excited",
    "exciting",
    "excuse",
    "exit",
    "expect",
    "explain",
    "extra",
    "eye",
    "eyebrow",
    "fable",
    "face",
    "facing",
    "fact",
    "factory",
    "fail",
    "faint",
    "fair",
    "fairy",
    "faith",
    "fake",
    "fall",
    "false",
    "family",
    "fan",
    "fancy",
    "far",
    "faraway",
    "fare",
    "farmer",
    "farm",
    "farming",
    "far-off",
    "farther",
    "fashion",
    "fast",
    "fasten",
    "fat",
    "father",
    "fault",
    "favor",
    "favorite",
    "fear",
    "feast",
    "feather",
    "February",
    "fed",
    "feed",
    "feel",
    "feet",
    "fell",
    "fellow",
    "felt",
    "fence",
    "fever",
    "few",
    "fib",
    "fiddle",
    "field",
    "fife",
    "fifteen",
    "fifth",
    "fifty",
    "fig",
    "fight",
    "figure",
    "file",
    "fill",
    "film",
    "finally",
    "find",
    "fine",
    "finger",
    "finish",
    "fire",
    "firearm",
    "firecracker",
    "fireplace",
    "fireworks",
    "firing",
    "first",
    "fish",
    "fisherman",
    "fist",
    "fit",
    "fits",
    "five",
    "fix",
    "flag",
    "flake",
    "flame",
    "flap",
    "flash",
    "flashlight",
    "flat",
    "flea",
    "flesh",
    "flew",
    "flies",
    "flight",
    "flip",
    "flip-flop",
    "float",
    "flock",
    "flood",
    "floor",
    "flop",
    "flour",
    "flow",
    "flower",
    "flowery",
    "flutter",
    "fly",
    "foam",
    "fog",
    "foggy",
    "fold",
    "folks",
    "follow",
    "following",
    "fond",
    "food",
    "fool",
    "foolish",
    "foot",
    "football",
    "footprint",
    "for",
    "forehead",
    "forest",
    "forget",
    "forgive",
    "forgot",
    "forgotten",
    "fork",
    "form",
    "fort",
    "forth",
    "fortune",
    "forty",
    "forward",
    "fought",
    "found",
    "fountain",
    "four",
    "fourteen",
    "fourth",
    "fox",
    "frame",
    "free",
    "freedom",
    "freeze",
    "freight",
    "French",
    "fresh",
    "fret",
    "Friday",
    "fried",
    "friend",
    "friendly",
    "friendship",
    "frighten",
    "frog",
    "from",
    "front",
    "frost",
    "frown",
    "froze",
    "fruit",
    "fry",
    "fudge",
    "fuel",
    "full",
    "fully",
    "fun",
    "funny",
    "fur",
    "furniture",
    "further",
    "fuzzy",
    "gain",
    "gallon",
    "gallop",
    "game",
    "gang",
    "garage",
    "garbage",
    "garden",
    "gas",
    "gasoline",
    "gate",
    "gather",
    "gave",
    "gay",
    "gear",
    "geese",
    "general",
    "gentle",
    "gentleman",
    "gentlemen",
    "geography",
    "get",
    "getting",
    "giant",
    "gift",
    "gingerbread",
    "girl",
    "give",
    "given",
    "giving",
    "glad",
    "gladly",
    "glance",
    "glass",
    "glasses",
    "gleam",
    "glide",
    "glory",
    "glove",
    "glow",
    "glue",
    "go",
    "going",
    "goes",
    "goal",
    "goat",
    "gobble",
    "God",
    "god",
    "godmother",
    "gold",
    "golden",
    "goldfish",
    "golf",
    "gone",
    "good",
    "goods",
    "goodbye",
    "good-by",
    "goodbye",
    "good-bye",
    "good-looking",
    "goodness",
    "goody",
    "goose",
    "gooseberry",
    "got",
    "govern",
    "government",
    "gown",
    "grab",
    "gracious",
    "grade",
    "grain",
    "grand",
    "grandchild",
    "grandchildren",
    "granddaughter",
    "grandfather",
    "grandma",
    "grandmother",
    "grandpa",
    "grandson",
    "grandstand",
    "grape",
    "grapes",
    "grapefruit",
    "grass",
    "grasshopper",
    "grateful",
    "grave",
    "gravel",
    "graveyard",
    "gravy",
    "gray",
    "graze",
    "grease",
    "great",
    "green",
    "greet",
    "grew",
    "grind",
    "groan",
    "grocery",
    "ground",
    "group",
    "grove",
    "grow",
    "guard",
    "guess",
    "guest",
    "guide",
    "gulf",
    "gum",
    "gun",
    "gunpowder",
    "guy",
    "ha",
    "habit",
    "had",
    "hadn't",
    "hail",
    "hair",
    "haircut",
    "hairpin",
    "half",
    "hall",
    "halt",
    "ham",
    "hammer",
    "hand",
    "handful",
    "handkerchief",
    "handle",
    "handwriting",
    "hang",
    "happen",
    "happily",
    "happiness",
    "happy",
    "harbor",
    "hard",
    "hardly",
    "hardship",
    "hardware",
    "hare",
    "hark",
    "harm",
    "harness",
    "harp",
    "harvest",
    "has",
    "hasn't",
    "haste",
    "hasten",
    "hasty",
    "hat",
    "hatch",
    "hatchet",
    "hate",
    "haul",
    "have",
    "haven't",
    "having",
    "hawk",
    "hay",
    "hayfield",
    "haystack",
    "he",
    "head",
    "headache",
    "heal",
    "health",
    "healthy",
    "heap",
    "hear",
    "hearing",
    "heard",
    "heart",
    "heat",
    "heater",
    "heaven",
    "heavy",
    "he'd",
    "heel",
    "height",
    "held",
    "hell",
    "he'll",
    "hello",
    "helmet",
    "help",
    "helper",
    "helpful",
    "hem",
    "hen",
    "henhouse",
    "her",
    "hers",
    "herd",
    "here",
    "here's",
    "hero",
    "herself",
    "he's",
    "hey",
    "hickory",
    "hid",
    "hidden",
    "hide",
    "high",
    "highway",
    "hill",
    "hillside",
    "hilltop",
    "hilly",
    "him",
    "himself",
    "hind",
    "hint",
    "hip",
    "hire",
    "his",
    "hiss",
    "history",
    "hit",
    "hitch",
    "hive",
    "ho",
    "hoe",
    "hog",
    "hold",
    "holder",
    "hole",
    "holiday",
    "hollow",
    "holy",
    "home",
    "homely",
    "homesick",
    "honest",
    "honey",
    "honeybee",
    "honeymoon",
    "honk",
    "honor",
    "hood",
    "hoof",
    "hook",
    "hoop",
    "hop",
    "hope",
    "hopeful",
    "hopeless",
    "horn",
    "horse",
    "horseback",
    "horseshoe",
    "hose",
    "hospital",
    "host",
    "hot",
    "hotel",
    "hound",
    "hour",
    "house",
    "housetop",
    "housewife",
    "housework",
    "how",
    "however",
    "howl",
    "hug",
    "huge",
    "hum",
    "humble",
    "hump",
    "hundred",
    "hung",
    "hunger",
    "hungry",
    "hunk",
    "hunt",
    "hunter",
    "hurrah",
    "hurried",
    "hurry",
    "hurt",
    "husband",
    "hush",
    "hut",
    "hymn",
    "i",
    "ice",
    "icy",
    "i'd",
    "idea",
    "ideal",
    "if",
    "ill",
    "i'll",
    "i'm",
    "important",
    "impossible",
    "improve",
    "in",
    "inch",
    "inches",
    "income",
    "indeed",
    "indian",
    "indoors",
    "ink",
    "inn",
    "insect",
    "inside",
    "instant",
    "instead",
    "insult",
    "intend",
    "interested",
    "interesting",
    "into",
    "invite",
    "iron",
    "is",
    "island",
    "isn't",
    "it",
    "its",
    "it's",
    "itself",
    "i've",
    "ivory",
    "ivy",
    "jacket",
    "jacks",
    "jail",
    "jam",
    "January",
    "jar",
    "jaw",
    "jay",
    "jelly",
    "jellyfish",
    "jerk",
    "jig",
    "job",
    "jockey",
    "join",
    "joke",
    "joking",
    "jolly",
    "journey",
    "joy",
    "joyful",
    "joyous",
    "judge",
    "jug",
    "juice",
    "juicy",
    "July",
    "jump",
    "June",
    "junior",
    "junk",
    "just",
    "keen",
    "keep",
    "kept",
    "kettle",
    "key",
    "kick",
    "kid",
    "kill",
    "killed",
    "kind",
    "kindly",
    "kindness",
    "king",
    "kingdom",
    "kiss",
    "kitchen",
    "kite",
    "kitten",
    "kitty",
    "knee",
    "kneel",
    "knew",
    "knife",
    "knit",
    "knives",
    "knob",
    "knock",
    "knot",
    "know",
    "known",
    "lace",
    "lad",
    "ladder",
    "ladies",
    "lady",
    "laid",
    "lake",
    "lamb",
    "lame",
    "lamp",
    "land",
    "lane",
    "language",
    "lantern",
    "lap",
    "lard",
    "large",
    "lash",
    "lass",
    "last",
    "late",
    "laugh",
    "laundry",
    "law",
    "lawn",
    "lawyer",
    "lay",
    "lazy",
    "lead",
    "leader",
    "leaf",
    "leak",
    "lean",
    "leap",
    "learn",
    "learned",
    "least",
    "leather",
    "leave",
    "leaving",
    "led",
    "left",
    "leg",
    "lemon",
    "lemonade",
    "lend",
    "length",
    "less",
    "lesson",
    "let",
    "let's",
    "letter",
    "letting",
    "lettuce",
    "level",
    "liberty",
    "library",
    "lice",
    "lick",
    "lid",
    "lie",
    "life",
    "lift",
    "light",
    "lightness",
    "lightning",
    "like",
    "likely",
    "liking",
    "lily",
    "limb",
    "lime",
    "limp",
    "line",
    "linen",
    "lion",
    "lip",
    "list",
    "listen",
    "lit",
    "little",
    "live",
    "lives",
    "lively",
    "liver",
    "living",
    "lizard",
    "load",
    "loaf",
    "loan",
    "loaves",
    "lock",
    "locomotive",
    "log",
    "lone",
    "lonely",
    "lonesome",
    "long",
    "look",
    "lookout",
    "loop",
    "loose",
    "lord",
    "lose",
    "loser",
    "loss",
    "lost",
    "lot",
    "loud",
    "love",
    "lovely",
    "lover",
    "low",
    "luck",
    "lucky",
    "lumber",
    "lump",
    "lunch",
    "lying",
    "ma",
    "machine",
    "machinery",
    "mad",
    "made",
    "magazine",
    "magic",
    "maid",
    "mail",
    "mailbox",
    "mailman",
    "major",
    "make",
    "making",
    "male",
    "mama",
    "mamma",
    "man",
    "manager",
    "mane",
    "manger",
    "many",
    "map",
    "maple",
    "marble",
    "march",
    "March",
    "mare",
    "mark",
    "market",
    "marriage",
    "married",
    "marry",
    "mask",
    "mast",
    "master",
    "mat",
    "match",
    "matter",
    "mattress",
    "may",
    "May",
    "maybe",
    "mayor",
    "maypole",
    "me",
    "meadow",
    "meal",
    "mean",
    "means",
    "meant",
    "measure",
    "meat",
    "medicine",
    "meet",
    "meeting",
    "melt",
    "member",
    "men",
    "mend",
    "meow",
    "merry",
    "mess",
    "message",
    "met",
    "metal",
    "mew",
    "mice",
    "middle",
    "midnight",
    "might",
    "mighty",
    "mile",
    "milk",
    "milkman",
    "mill",
    "miler",
    "million",
    "mind",
    "mine",
    "miner",
    "mint",
    "minute",
    "mirror",
    "mischief",
    "miss",
    "Miss",
    "misspell",
    "mistake",
    "misty",
    "mitt",
    "mitten",
    "mix",
    "moment",
    "Monday",
    "money",
    "monkey",
    "month",
    "moo",
    "moon",
    "moonlight",
    "moose",
    "mop",
    "more",
    "morning",
    "morrow",
    "moss",
    "most",
    "mostly",
    "mother",
    "motor",
    "mount",
    "mountain",
    "mouse",
    "mouth",
    "move",
    "movie",
    "movies",
    "moving",
    "mow",
    "Mr.",
    "Mrs.",
    "much",
    "mud",
    "muddy",
    "mug",
    "mule",
    "multiply",
    "murder",
    "music",
    "must",
    "my",
    "myself",
    "nail",
    "name",
    "nap",
    "napkin",
    "narrow",
    "nasty",
    "naughty",
    "navy",
    "near",
    "nearby",
    "nearly",
    "neat",
    "neck",
    "necktie",
    "need",
    "needle",
    "needn't",
    "Negro",
    "neighbor",
    "neighborhood",
    "neither",
    "nerve",
    "nest",
    "net",
    "never",
    "nevermore",
    "new",
    "news",
    "newspaper",
    "next",
    "nibble",
    "nice",
    "nickel",
    "night",
    "nightgown",
    "nine",
    "nineteen",
    "ninety",
    "no",
    "nobody",
    "nod",
    "noise",
    "noisy",
    "none",
    "noon",
    "nor",
    "north",
    "northern",
    "nose",
    "not",
    "note",
    "nothing",
    "notice",
    "November",
    "now",
    "nowhere",
    "number",
    "nurse",
    "nut",
    "oak",
    "oar",
    "oatmeal",
    "oats",
    "obey",
    "ocean",
    "o'clock",
    "October",
    "odd",
    "of",
    "off",
    "offer",
    "office",
    "officer",
    "often",
    "oh",
    "oil",
    "old",
    "old-fashioned",
    "on",
    "once",
    "one",
    "onion",
    "only",
    "onward",
    "open",
    "or",
    "orange",
    "orchard",
    "order",
    "ore",
    "organ",
    "other",
    "otherwise",
    "ouch",
    "ought",
    "our",
    "ours",
    "ourselves",
    "out",
    "outdoors",
    "outfit",
    "outlaw",
    "outline",
    "outside",
    "outward",
    "oven",
    "over",
    "overalls",
    "overcoat",
    "overeat",
    "overhead",
    "overhear",
    "overnight",
    "overturn",
    "owe",
    "owing",
    "owl",
    "own",
    "owner",
    "ox",
    "pa",
    "pace",
    "pack",
    "package",
    "pad",
    "page",
    "paid",
    "pail",
    "pain",
    "painful",
    "paint",
    "painter",
    "painting",
    "pair",
    "pal",
    "palace",
    "pale",
    "pan",
    "pancake",
    "pane",
    "pansy",
    "pants",
    "papa",
    "paper",
    "parade",
    "pardon",
    "parent",
    "park",
    "part",
    "partly",
    "partner",
    "party",
    "pass",
    "passenger",
    "past",
    "paste",
    "pasture",
    "pat",
    "patch",
    "path",
    "patter",
    "pave",
    "pavement",
    "paw",
    "pay",
    "payment",
    "pea",
    "peas",
    "peace",
    "peaceful",
    "peach",
    "peaches",
    "peak",
    "peanut",
    "pear",
    "pearl",
    "peck",
    "peek",
    "peel",
    "peep",
    "peg",
    "pen",
    "pencil",
    "penny",
    "people",
    "pepper",
    "peppermint",
    "perfume",
    "perhaps",
    "person",
    "pet",
    "phone",
    "piano",
    "pick",
    "pickle",
    "picnic",
    "picture",
    "pie",
    "piece",
    "pig",
    "pigeon",
    "piggy",
    "pile",
    "pill",
    "pillow",
    "pin",
    "pine",
    "pineapple",
    "pink",
    "pint",
    "pipe",
    "pistol",
    "pit",
    "pitch",
    "pitcher",
    "pity",
    "place",
    "plain",
    "plan",
    "plane",
    "plant",
    "plate",
    "platform",
    "platter",
    "play",
    "player",
    "playground",
    "playhouse",
    "playmate",
    "plaything",
    "pleasant",
    "please",
    "pleasure",
    "plenty",
    "plow",
    "plug",
    "plum",
    "pocket",
    "pocketbook",
    "poem",
    "point",
    "poison",
    "poke",
    "pole",
    "police",
    "policeman",
    "polish",
    "polite",
    "pond",
    "ponies",
    "pony",
    "pool",
    "poor",
    "pop",
    "popcorn",
    "popped",
    "porch",
    "pork",
    "possible",
    "post",
    "postage",
    "postman",
    "pot",
    "potato",
    "potatoes",
    "pound",
    "pour",
    "powder",
    "power",
    "powerful",
    "praise",
    "pray",
    "prayer",
    "prepare",
    "present",
    "pretty",
    "price",
    "prick",
    "prince",
    "princess",
    "print",
    "prison",
    "prize",
    "promise",
    "proper",
    "protect",
    "proud",
    "prove",
    "prune",
    "public",
    "puddle",
    "puff",
    "pull",
    "pump",
    "pumpkin",
    "punch",
    "punish",
    "pup",
    "pupil",
    "puppy",
    "pure",
    "purple",
    "purse",
    "push",
    "puss",
    "pussy",
    "pussycat",
    "put",
    "putting",
    "puzzle",
    "quack",
    "quart",
    "quarter",
    "queen",
    "queer",
    "question",
    "quick",
    "quickly",
    "quiet",
    "quilt",
    "quit",
    "quite",
    "rabbit",
    "race",
    "rack",
    "radio",
    "radish",
    "rag",
    "rail",
    "railroad",
    "railway",
    "rain",
    "rainy",
    "rainbow",
    "raise",
    "raisin",
    "rake",
    "ram",
    "ran",
    "ranch",
    "rang",
    "rap",
    "rapidly",
    "rat",
    "rate",
    "rather",
    "rattle",
    "raw",
    "ray",
    "reach",
    "read",
    "reader",
    "reading",
    "ready",
    "real",
    "really",
    "reap",
    "rear",
    "reason",
    "rebuild",
    "receive",
    "recess",
    "record",
    "red",
    "redbird",
    "redbreast",
    "refuse",
    "reindeer",
    "rejoice",
    "remain",
    "remember",
    "remind",
    "remove",
    "rent",
    "repair",
    "repay",
    "repeat",
    "report",
    "rest",
    "return",
    "review",
    "reward",
    "rib",
    "ribbon",
    "rice",
    "rich",
    "rid",
    "riddle",
    "ride",
    "rider",
    "riding",
    "right",
    "rim",
    "ring",
    "rip",
    "ripe",
    "rise",
    "rising",
    "river",
    "road",
    "roadside",
    "roar",
    "roast",
    "rob",
    "robber",
    "robe",
    "robin",
    "rock",
    "rocky",
    "rocket",
    "rode",
    "roll",
    "roller",
    "roof",
    "room",
    "rooster",
    "root",
    "rope",
    "rose",
    "rosebud",
    "rot",
    "rotten",
    "rough",
    "round",
    "route",
    "row",
    "rowboat",
    "royal",
    "rub",
    "rubbed",
    "rubber",
    "rubbish",
    "rug",
    "rule",
    "ruler",
    "rumble",
    "run",
    "rung",
    "runner",
    "running",
    "rush",
    "rust",
    "rusty",
    "rye",
    "sack",
    "sad",
    "saddle",
    "sadness",
    "safe",
    "safety",
    "said",
    "sail",
    "sailboat",
    "sailor",
    "saint",
    "salad",
    "sale",
    "salt",
    "same",
    "sand",
    "sandy",
    "sandwich",
    "sang",
    "sank",
    "sap",
    "sash",
    "sat",
    "satin",
    "satisfactory",
    "Saturday",
    "sausage",
    "savage",
    "save",
    "savings",
    "saw",
    "say",
    "scab",
    "scales",
    "scare",
    "scarf",
    "school",
    "schoolboy",
    "schoolhouse",
    "schoolmaster",
    "schoolroom",
    "scorch",
    "score",
    "scrap",
    "scrape",
    "scratch",
    "scream",
    "screen",
    "screw",
    "scrub",
    "sea",
    "seal",
    "seam",
    "search",
    "season",
    "seat",
    "second",
    "secret",
    "see",
    "seeing",
    "seed",
    "seek",
    "seem",
    "seen",
    "seesaw",
    "select",
    "self",
    "selfish",
    "sell",
    "send",
    "sense",
    "sent",
    "sentence",
    "separate",
    "September",
    "servant",
    "serve",
    "service",
    "set",
    "setting",
    "settle",
    "settlement",
    "seven",
    "seventeen",
    "seventh",
    "seventy",
    "several",
    "sew",
    "shade",
    "shadow",
    "shady",
    "shake",
    "shaker",
    "shaking",
    "shall",
    "shame",
    "shan't",
    "shape",
    "share",
    "sharp",
    "shave",
    "she",
    "she'd",
    "she'll",
    "she's",
    "shear",
    "shears",
    "shed",
    "sheep",
    "sheet",
    "shelf",
    "shell",
    "shepherd",
    "shine",
    "shining",
    "shiny",
    "ship",
    "shirt",
    "shock",
    "shoe",
    "shoemaker",
    "shone",
    "shook",
    "shoot",
    "shop",
    "shopping",
    "shore",
    "short",
    "shot",
    "should",
    "shoulder",
    "shouldn't",
    "shout",
    "shovel",
    "show",
    "shower",
    "shut",
    "shy",
    "sick",
    "sickness",
    "side",
    "sidewalk",
    "sideways",
    "sigh",
    "sight",
    "sign",
    "silence",
    "silent",
    "silk",
    "sill",
    "silly",
    "silver",
    "simple",
    "sin",
    "since",
    "sing",
    "singer",
    "single",
    "sink",
    "sip",
    "sir",
    "sis",
    "sissy",
    "sister",
    "sit",
    "sitting",
    "six",
    "sixteen",
    "sixth",
    "sixty",
    "size",
    "skate",
    "skater",
    "ski",
    "skin",
    "skip",
    "skirt",
    "sky",
    "slam",
    "slap",
    "slate",
    "slave",
    "sled",
    "sleep",
    "sleepy",
    "sleeve",
    "sleigh",
    "slept",
    "slice",
    "slid",
    "slide",
    "sling",
    "slip",
    "slipped",
    "slipper",
    "slippery",
    "slit",
    "slow",
    "slowly",
    "sly",
    "smack",
    "small",
    "smart",
    "smell",
    "smile",
    "smoke",
    "smooth",
    "snail",
    "snake",
    "snap",
    "snapping",
    "sneeze",
    "snow",
    "snowy",
    "snowball",
    "snowflake",
    "snuff",
    "snug",
    "so",
    "soak",
    "soap",
    "sob",
    "socks",
    "sod",
    "soda",
    "sofa",
    "soft",
    "soil",
    "sold",
    "soldier",
    "sole",
    "some",
    "somebody",
    "somehow",
    "someone",
    "something",
    "sometime",
    "sometimes",
    "somewhere",
    "son",
    "song",
    "soon",
    "sore",
    "sorrow",
    "sorry",
    "sort",
    "soul",
    "sound",
    "soup",
    "sour",
    "south",
    "southern",
    "space",
    "spade",
    "spank",
    "sparrow",
    "speak",
    "speaker",
    "spear",
    "speech",
    "speed",
    "spell",
    "spelling",
    "spend",
    "spent",
    "spider",
    "spike",
    "spill",
    "spin",
    "spinach",
    "spirit",
    "spit",
    "splash",
    "spoil",
    "spoke",
    "spook",
    "spoon",
    "sport",
    "spot",
    "spread",
    "spring",
    "springtime",
    "sprinkle",
    "square",
    "squash",
    "squeak",
    "squeeze",
    "squirrel",
    "stable",
    "stack",
    "stage",
    "stair",
    "stall",
    "stamp",
    "stand",
    "star",
    "stare",
    "start",
    "starve",
    "state",
    "station",
    "stay",
    "steak",
    "steal",
    "steam",
    "steamboat",
    "steamer",
    "steel",
    "steep",
    "steeple",
    "steer",
    "stem",
    "step",
    "stepping",
    "stick",
    "sticky",
    "stiff",
    "still",
    "stillness",
    "sting",
    "stir",
    "stitch",
    "stock",
    "stocking",
    "stole",
    "stone",
    "stood",
    "stool",
    "stoop",
    "stop",
    "stopped",
    "stopping",
    "store",
    "stork",
    "stories",
    "storm",
    "stormy",
    "story",
    "stove",
    "straight",
    "strange",
    "stranger",
    "strap",
    "straw",
    "strawberry",
    "stream",
    "street",
    "stretch",
    "string",
    "strip",
    "stripes",
    "strong",
    "stuck",
    "study",
    "stuff",
    "stump",
    "stung",
    "subject",
    "such",
    "suck",
    "sudden",
    "suffer",
    "sugar",
    "suit",
    "sum",
    "summer",
    "sun",
    "Sunday",
    "sunflower",
    "sung",
    "sunk",
    "sunlight",
    "sunny",
    "sunrise",
    "sunset",
    "sunshine",
    "supper",
    "suppose",
    "sure",
    "surely",
    "surface",
    "surprise",
    "swallow",
    "swam",
    "swamp",
    "swan",
    "swat",
    "swear",
    "sweat",
    "sweater",
    "sweep",
    "sweet",
    "sweetness",
    "sweetheart",
    "swell",
    "swept",
    "swift",
    "swim",
    "swimming",
    "swing",
    "switch",
    "sword",
    "swore",
    "table",
    "tablecloth",
    "tablespoon",
    "tablet",
    "tack",
    "tag",
    "tail",
    "tailor",
    "take",
    "taken",
    "taking",
    "tale",
    "talk",
    "talker",
    "tall",
    "tame",
    "tan",
    "tank",
    "tap",
    "tape",
    "tar",
    "tardy",
    "task",
    "taste",
    "taught",
    "tax",
    "tea",
    "teach",
    "teacher",
    "team",
    "tear",
    "tease",
    "teaspoon",
    "teeth",
    "telephone",
    "tell",
    "temper",
    "ten",
    "tennis",
    "tent",
    "term",
    "terrible",
    "test",
    "than",
    "thank",
    "thanks",
    "thankful",
    "Thanksgiving",
    "that",
    "that's",
    "the",
    "theater",
    "thee",
    "their",
    "them",
    "then",
    "there",
    "these",
    "they",
    "they'd",
    "they'll",
    "they're",
    "they've",
    "thick",
    "thief",
    "thimble",
    "thin",
    "thing",
    "think",
    "third",
    "thirsty",
    "thirteen",
    "thirty",
    "this",
    "thorn",
    "those",
    "though",
    "thought",
    "thousand",
    "thread",
    "three",
    "threw",
    "throat",
    "throne",
    "through",
    "throw",
    "thrown",
    "thumb",
    "thunder",
    "Thursday",
    "thy",
    "tick",
    "ticket",
    "tickle",
    "tie",
    "tiger",
    "tight",
    "till",
    "time",
    "tin",
    "tinkle",
    "tiny",
    "tip",
    "tiptoe",
    "tire",
    "tired",
    "title",
    "to",
    "toad",
    "toadstool",
    "toast",
    "tobacco",
    "today",
    "toe",
    "together",
    "toilet",
    "told",
    "tomato",
    "tomorrow",
    "ton",
    "tone",
    "tongue",
    "tonight",
    "too",
    "took",
    "tool",
    "toot",
    "tooth",
    "toothbrush",
    "toothpick",
    "top",
    "tore",
    "torn",
    "toss",
    "touch",
    "tow",
    "toward",
    "towards",
    "towel",
    "tower",
    "town",
    "toy",
    "trace",
    "track",
    "trade",
    "train",
    "tramp",
    "trap",
    "tray",
    "treasure",
    "treat",
    "tree",
    "trick",
    "tricycle",
    "tried",
    "trim",
    "trip",
    "trolley",
    "trouble",
    "truck",
    "true",
    "truly",
    "trunk",
    "trust",
    "truth",
    "try",
    "tub",
    "Tuesday",
    "tug",
    "tulip",
    "tumble",
    "tune",
    "tunnel",
    "turkey",
    "turn",
    "turtle",
    "twelve",
    "twenty",
    "twice",
    "twig",
    "twin",
    "two",
    "ugly",
    "umbrella",
    "uncle",
    "under",
    "understand",
    "underwear",
    "undress",
    "unfair",
    "unfinished",
    "unfold",
    "unfriendly",
    "unhappy",
    "unhurt",
    "uniform",
    "United",
    "States",
    "unkind",
    "unknown",
    "unless",
    "unpleasant",
    "until",
    "unwilling",
    "up",
    "upon",
    "upper",
    "upset",
    "upside",
    "upstairs",
    "uptown",
    "upward",
    "us",
    "use",
    "used",
    "useful",
    "valentine",
    "valley",
    "valuable",
    "value",
    "vase",
    "vegetable",
    "velvet",
    "very",
    "vessel",
    "victory",
    "view",
    "village",
    "vine",
    "violet",
    "visit",
    "visitor",
    "voice",
    "vote",
    "wag",
    "wagon",
    "waist",
    "wait",
    "wake",
    "waken",
    "walk",
    "wall",
    "walnut",
    "want",
    "war",
    "warm",
    "warn",
    "was",
    "wash",
    "washer",
    "washtub",
    "wasn't",
    "waste",
    "watch",
    "watchman",
    "water",
    "watermelon",
    "waterproof",
    "wave",
    "wax",
    "way",
    "wayside",
    "we",
    "weak",
    "weakness",
    "weaken",
    "wealth",
    "weapon",
    "wear",
    "weary",
    "weather",
    "weave",
    "web",
    "we'd",
    "wedding",
    "Wednesday",
    "wee",
    "weed",
    "week",
    "we'll",
    "weep",
    "weigh",
    "welcome",
    "well",
    "went",
    "were",
    "we're",
    "west",
    "western",
    "wet",
    "we've",
    "whale",
    "what",
    "what's",
    "wheat",
    "wheel",
    "when",
    "whenever",
    "where",
    "which",
    "while",
    "whip",
    "whipped",
    "whirl",
    "whisky",
    "whiskey",
    "whisper",
    "whistle",
    "white",
    "who",
    "who'd",
    "whole",
    "who'll",
    "whom",
    "who's",
    "whose",
    "why",
    "wicked",
    "wide",
    "wife",
    "wiggle",
    "wild",
    "wildcat",
    "will",
    "willing",
    "willow",
    "win",
    "wind",
    "windy",
    "windmill",
    "window",
    "wine",
    "wing",
    "wink",
    "winner",
    "winter",
    "wipe",
    "wire",
    "wise",
    "wish",
    "wit",
    "witch",
    "with",
    "without",
    "woke",
    "wolf",
    "woman",
    "women",
    "won",
    "wonder",
    "wonderful",
    "won't",
    "wood",
    "wooden",
    "woodpecker",
    "woods",
    "wool",
    "woolen",
    "word",
    "wore",
    "work",
    "worker",
    "workman",
    "world",
    "worm",
    "worn",
    "worry",
    "worse",
    "worst",
    "worth",
    "would",
    "wouldn't",
    "wound",
    "wove",
    "wrap",
    "wrapped",
    "wreck",
    "wren",
    "wring",
    "write",
    "writing",
    "written",
    "wrong",
    "wrote",
    "wrung",
    "yard",
    "yarn",
    "year",
    "yell",
    "yellow",
    "yes",
    "yesterday",
    "yet",
    "yolk",
    "yonder",
    "you",
    "you'd",
    "you'll",
    "young",
    "youngster",
    "your",
    "yours",
    "you're",
    "yourself",
    "yourselves",
    "youth",
    "you've",
)

In [None]:
def syllapy_count(word: str) -> int:
    """Returns number of syllables in a word.
    If the word is None, not a string, contains invalid chars, or empty then returns 0.
    :rtype: int
    :param word: the word to count syllables for
    :return: the number of syllables in the word
    """
    try:
        word = word.strip().lower().strip(punctuation)
        if not word:
            return 0
        if _contains_numbers(word):
            return 0
        if word in WORD_DICT:
            return WORD_DICT[word]
        return _syllables(word)
    except AttributeError:
        return 0

In [None]:
def _syllables(word: str) -> int:
    syllable_count = 0
    vowels = "aeiouy"
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith("e"):
        syllable_count -= 1
    if word.endswith("le") and len(word) > 2 and word[-3] not in vowels:
        syllable_count += 1
    if syllable_count == 0:
        syllable_count += 1
    return syllable_count

In [None]:
def _contains_numbers(word: str) -> bool:
    return bool(NUMBERS.search(word))

In [None]:
class Readability:
    """spaCy v2.0 pipeline component for calculating readability scores of of text.
    Provides scores for Flesh-Kincaid grade level, Flesh-Kincaid reading ease, and Dale-Chall.
    USAGE:
        >>> import spacy
        >>> from spacy_readability import Readability
        >>> nlp = spacy.load('en')
        >>> read = Readability()
        >>> nlp.add_pipe(read, last=True)
        >>> doc = nlp("I am some really difficult text. I use obnoxiously large words.")
        >>> print(doc._.flesch_kincaid_grade_level)
        >>> print(doc._.flesch_kincaid_reading_ease)
        >>> print(doc._.dale_chall)
        >>> print(doc._.smog)
        >>> print(doc._.coleman_liau_index)
        >>> print(doc._.automated_readability_index)
        >>> print(doc._.forcast)
    """

    name = "readability"

    def __init__(self):
        """Initialise the pipeline component.
        """
        if not Doc.has_extension("flesch_kincaid_grade_level"):
            Doc.set_extension("flesch_kincaid_grade_level", getter=self.fk_grade)

        if not Doc.has_extension("flesch_kincaid_reading_ease"):
            Doc.set_extension("flesch_kincaid_reading_ease", getter=self.fk_ease)

        if not Doc.has_extension("dale_chall"):
            Doc.set_extension("dale_chall", getter=self.dale_chall)

        if not Doc.has_extension("smog"):
            Doc.set_extension("smog", getter=self.smog)

        if not Doc.has_extension("coleman_liau_index"):
            Doc.set_extension("coleman_liau_index", getter=self.coleman_liau)

        if not Doc.has_extension("automated_readability_index"):
            Doc.set_extension("automated_readability_index", getter=self.ari)

        if not Doc.has_extension("forcast"):
            Doc.set_extension("forcast", getter=self.forcast)

    def __call__(self, doc):
        """Apply the pipeline component to a `Doc` object.
        doc (Doc): The `Doc` returned by the previous pipeline component.
        RETURNS (Doc): The modified `Doc` object.
        """
        return doc

    def fk_grade(self, doc):
        """Returns the Flesch-Kincaid grade for the document.
        """
        num_sentences = _get_num_sentences(doc)
        num_words = _get_num_words(doc)
        num_syllables = _get_num_syllables(doc)
        if num_sentences == 0 or num_words == 0 or num_syllables == 0:
            return 0
        return (
            (11.8 * num_syllables / num_words)
            + (0.39 * num_words / num_sentences)
            - 15.59
        )

    def fk_ease(self, doc):
        """Returns the Flesch-Kincaid Reading Ease score for the document.
        """
        num_sentences = _get_num_sentences(doc)
        num_words = _get_num_words(doc)
        num_syllables = _get_num_syllables(doc)
        if num_sentences == 0 or num_words == 0 or num_syllables == 0:
            return 0
        words_per_sent = num_words / num_sentences
        syllables_per_word = num_syllables / num_words
        return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word)

    def dale_chall(self, doc):
        """Returns the Dale-Chall score for the document.
        """
        num_sentences = _get_num_sentences(doc)
        num_words = _get_num_words(doc)
        if num_sentences == 0 or num_words == 0:
            return 0

        diff_words_count = 0
        for word in doc:
            if not word.is_punct and "'" not in word.text:
                if (
                    word.text.lower() not in DALE_CHALL_WORDS
                    and word.lemma_.lower() not in DALE_CHALL_WORDS
                ):
                    diff_words_count += 1

        percent_difficult_words = 100 * diff_words_count / num_words
        average_sentence_length = num_words / num_sentences
        grade = 0.1579 * percent_difficult_words + 0.0496 * average_sentence_length

        # if percent difficult words is about 5% then adjust score
        if percent_difficult_words > 5:
            grade += 3.6365
        return grade

    def smog(self, doc):
        """Returns the SMOG score for the document. If there are less than 30 sentences then
        it returns 0 because he formula significantly loses accuracy on small corpora.
        """
        num_sentences = _get_num_sentences(doc)
        num_words = _get_num_words(doc)
        if num_sentences < 30 or num_words == 0:
            return 0
        num_poly = _get_num_syllables(doc, min_syllables=3)
        return 1.0430 * np.sqrt(num_poly * 30 / num_sentences) + 3.1291

    def coleman_liau(self, doc):
        """Returns the Coleman-Liau index for the document."""
        num_words = _get_num_words(doc)
        if num_words <= 0:
            return 0

        num_sentences = _get_num_sentences(doc)
        letter_count = sum(
            [len(token) for token in doc if not token.is_punct and not token.is_digit]
        )
        if letter_count <= 0:
            return 0
        letters_to_words = letter_count / num_words * 100
        sent_to_words = num_sentences / num_words * 100
        return 0.0588 * letters_to_words - 0.296 * sent_to_words - 15.8

    def ari(self, doc):
        """Returns the Automated Readability Index for the document."""
        num_sentences = _get_num_sentences(doc)
        num_words = _get_num_words(doc)
        if num_words <= 0:
            return 0

        letter_count = sum([len(token) for token in doc if not token.is_punct])
        letter_to_words = letter_count / num_words
        words_to_sents = num_words / num_sentences
        return 4.71 * letter_to_words + 0.5 * words_to_sents - 21.43

    def forcast(self, doc):
        """Returns the Forcast score for the document.
        """
        num_words = _get_num_words(doc)

        if num_words < 150:
            return 0

        mono_syllabic = 0
        for i in range(150):
            if syllapy_count(doc[i].text) == 1:
                mono_syllabic += 1
        return 20 - (mono_syllabic / 10)

In [None]:
def _get_num_sentences(doc: Doc):
    """Return number of sentences in the document
    """
    return len(list(doc.sents))

In [None]:
def _get_num_words(doc: Doc):
    """Return number of words in the document.
    Filters punctuation and words that start with apostrophe (aka contractions)
    """
    filtered_words = [
        word for word in doc if not word.is_punct and "'" not in word.text
    ]
    return len(filtered_words)

In [None]:
def _get_num_syllables(doc: Doc, min_syllables: int = 1):
    """Return number of words in the document.
    Filters punctuation and words that start with apostrophe (aka contractions)
    """
    text = (word for word in doc if not word.is_punct and "'" not in word.text)
    syllables_per_word = tuple(syllapy_count(word.text) for word in text)
    return sum(c for c in syllables_per_word if c >= min_syllables)

In [None]:
class ReadabilityTextVectorizer(BaseEstimator, TransformerMixin):
    def __init__(self, verbose: bool, spacy_model: str = 'en_core_web_lg'):
        super().__init__()
        self.verbose = verbose
        self.spacy_model = spacy_model
        
    def fit(self, X, y=None):
        if self.verbose:
            print(f'Number of texts for training is {len(X)}.')
        brown, morpho, syntax, quantitative_characteristics, colnames = self._process_texts(X)
        assert len(quantitative_characteristics.shape) == 2
        assert quantitative_characteristics.shape[0] == len(brown)
        assert len(brown) == len(morpho)
        assert len(syntax) == len(morpho)
        vector_columns = list(filter(lambda idx: colnames[idx].startswith('vector'),
                                     range(len(colnames))))
        assert vector_columns[-1] == (len(colnames) - 1)
        assert len(vector_columns) == (vector_columns[-1] - vector_columns[0] + 1)
        if self.verbose:
            print(f'Number of samples for training is {len(brown)}.')
        self.feature_names_ = colnames
        assert len(self.feature_names_) == quantitative_characteristics.shape[1]
        self.brown_vectorizer_ = TfidfVectorizer(
            analyzer='word', lowercase=False, sublinear_tf=True,
            ngram_range=(1, 5),
            token_pattern=r'\S+'
        ).fit(brown)
        self.morpho_vectorizer_ = TfidfVectorizer(
            analyzer='word', lowercase=False, sublinear_tf=True,
            ngram_range=(1, 5),
            token_pattern=r'\S+'
        ).fit(morpho)
        self.syntax_vectorizer_ = TfidfVectorizer(
            analyzer='word', lowercase=False, sublinear_tf=True,
            ngram_range=(1, 5),
            token_pattern=r'\S+'
        ).fit(syntax)
        X_res = hstack(
            (
                self.brown_vectorizer_.transform(brown),
                self.morpho_vectorizer_.transform(morpho),
                self.syntax_vectorizer_.transform(syntax)
            )
        )
        united_feature_names = self.brown_vectorizer_.get_feature_names()
        united_feature_names += self.morpho_vectorizer_.get_feature_names()
        united_feature_names += self.syntax_vectorizer_.get_feature_names()
        del brown, morpho, syntax, quantitative_characteristics
        if self.verbose:
            print(f'Source TF-IDF vector size is {X_res.shape[1]}.')
        self.unsupervised_feature_selector_ = VarianceThreshold(threshold=1e-5).fit(X_res)
        X_res = self.unsupervised_feature_selector_.transform(X_res)
        united_feature_names = [united_feature_names[idx]
                                for idx in self.unsupervised_feature_selector_.get_support(True)]
        if self.verbose:
            print(f'TF-IDF vector size after unsupervised feature selection is {X_res.shape[1]}.')
        if (y is not None) and (X_res.shape[1] > 1500):
            self.supervised_feature_selector_ = SelectKBest(
                score_func=f_regression,
                k=1500
            ).fit(X_res, y)
            X_res = self.supervised_feature_selector_.transform(X_res)
            if self.verbose:
                print(f'TF-IDF vector size after supervised feature selection is {X_res.shape[1]}.')
            self.feature_names_ += [united_feature_names[idx]
                                    for idx in self.supervised_feature_selector_.get_support(True)]
        else:
            self.feature_names_ += united_feature_names
        del X_res
        return self
    
    def transform(self, X):
        check_is_fitted(self, ['feature_names_',
                               'brown_vectorizer_', 'morpho_vectorizer_', 'syntax_vectorizer_',
                               'unsupervised_feature_selector_'])
        brown, morpho, syntax, quantitative_characteristics, _ = self._process_texts(X)
        assert len(quantitative_characteristics.shape) == 2
        assert quantitative_characteristics.shape[0] == len(brown)
        assert len(brown) == len(morpho)
        assert len(syntax) == len(morpho)
        X_res = hstack(
            (
                self.brown_vectorizer_.transform(brown),
                self.morpho_vectorizer_.transform(morpho),
                self.syntax_vectorizer_.transform(syntax)
            )
        )
        del brown, morpho, syntax
        X_res = self.unsupervised_feature_selector_.transform(X_res)
        if hasattr(self, 'supervised_feature_selector_'):
            X_res = self.supervised_feature_selector_.transform(X_res)
        if hasattr(self, 'svd_'):
            X_res = self.svd_.transform(X_res)
        else:
            X_res = X_res.toarray()
        assert len(self.feature_names_) == (X_res.shape[1] + quantitative_characteristics.shape[1])
        assert quantitative_characteristics.shape[0] == X_res.shape[0]
        return np.hstack((quantitative_characteristics, X_res))

    def fit_transform(self, X, y=None, **fit_params):
        return self.fit(X, y).transform(X)
    
    def _process_texts(self, X) -> Tuple[List[str], List[str], List[str], np.ndarray,
                                         List[str]]:
        if not hasattr(self, 'spacy_nlp_'):
            self.spacy_nlp_ = spacy.load(self.spacy_model)
            self.spacy_nlp_.remove_pipe("ner")
            self.spacy_nlp_.add_pipe(Readability(), last=True)
        pos_tags_of_texts = []
        syntactical_structures = []
        brown_clusters = []
        par_counts = []
        sent_counts = []
        word_counts = []
        char_counts = []
        punct_count = []
        oov_counts = []
        word_lengths = []
        sent_lengths = []
        readability_characteristics = []
        text_vectors = []
        vector_size = 0
        for text in (tqdm(X) if self.verbose else X):
            char_counts.append(len(text.strip()))
            paragraphs = list(filter(
                lambda it2: len(it2) > 0,
                map(lambda it1: it1.strip(), text.split('\n'))
            ))
            par_counts.append(len(paragraphs))
            new_pos_tags = '<BOS>'
            new_syntactical_struct = '<BOS>'
            new_brown_cluster_list = '<BOS>'
            instant_sent_counts = 0
            instant_word_counts = 0
            instant_word_lengths = 0
            instant_sent_lengths = 0
            instant_punct_count = 0
            instant_oov = 0
            instant_vectors = []
            instant_readability = []
            for doc in self.spacy_nlp_.pipe(paragraphs, n_process=-1, batch_size=4):
                sent_start_idx = -1
                instant_readability.append([
                    doc._.flesch_kincaid_grade_level,
                    doc._.flesch_kincaid_reading_ease,
                    doc._.dale_chall,
                    doc._.smog,
                    doc._.coleman_liau_index,
                    doc._.automated_readability_index,
                    doc._.forcast
                ])
                for token in doc:
                    token_text = token.text.strip()
                    if token.is_sent_start:
                        if sent_start_idx >= 0:
                            instant_sent_counts += 1
                            instant_sent_lengths += (token.i - sent_start_idx)
                        sent_start_idx = token.i
                    if len(token_text) > 0:
                        instant_word_lengths += len(token_text)
                        instant_word_counts += 1
                        new_pos_tags += (' '+ token.pos_)
                        new_syntactical_struct += (' '+ token.dep_)
                        new_brown_cluster_list += (' ' + str(token.cluster))
                        if token.is_punct:
                            instant_punct_count += 1
                    if token.has_vector:
                        if vector_size > 0:
                            assert vector_size == token.vector.shape[0]
                        else:
                            vector_size = token.vector.shape[0]
                            if self.verbose:
                                print(f'Vector size is {token.vector.shape}.')
                        instant_vectors.append(
                            np.reshape(token.vector, newshape=(1, vector_size))
                        )
                    else:
                        instant_oov += 1
                if sent_start_idx >= 0:
                    instant_sent_counts += 1
                    instant_sent_lengths += (len(doc) - sent_start_idx)
            assert len(instant_readability) > 0
            instant_readability = np.array(instant_readability, dtype=np.float64)
            instant_readability_mean = np.mean(instant_readability,
                                               axis=0).reshape((1, 7))
            instant_readability_min = np.min(instant_readability,
                                             axis=0).reshape((1, 7))
            instant_readability_max = np.max(instant_readability,
                                             axis=0).reshape((1, 7))
            readability_characteristics.append(
                np.hstack((
                    instant_readability_mean,
                    instant_readability_min,
                    instant_readability_max
                ))
            )
            new_pos_tags += ' <EOS>'
            new_syntactical_struct += ' <EOS>'
            new_brown_cluster_list += ' <EOS>'
            punct_count.append(instant_punct_count)
            sent_counts.append(instant_sent_counts)
            word_counts.append(instant_word_counts)
            sent_lengths.append(instant_sent_lengths / float(instant_sent_counts))
            word_lengths.append(instant_word_lengths / float(instant_word_counts))
            oov_counts.append(instant_oov)
            pos_tags_of_texts.append(new_pos_tags.strip())
            syntactical_structures.append(new_syntactical_struct.strip())
            if len(instant_vectors) > 0:
                text_vectors.append(np.mean(np.vstack(instant_vectors), axis=0))
            else:
                text_vectors.append(None)
            brown_clusters.append(new_brown_cluster_list.strip())
        assert vector_size > 0
        for idx in range(len(text_vectors)):
            if text_vectors[idx] is None:
                text_vectors[idx] = np.zeros((1, vector_size))
            else:
                text_vectors[idx] = np.reshape(text_vectors[idx], newshape=(1, vector_size))
        text_vectors = np.vstack(text_vectors)
        readability_characteristics = np.vstack(readability_characteristics)
        par_counts = np.array(par_counts, dtype=np.float64).reshape((len(brown_clusters), 1))
        sent_counts = np.array(sent_counts, dtype=np.float64).reshape((len(brown_clusters), 1))
        word_counts = np.array(word_counts, dtype=np.float64).reshape((len(brown_clusters), 1))
        char_counts = np.array(char_counts, dtype=np.float64).reshape((len(brown_clusters), 1))
        punct_count = np.array(punct_count, dtype=np.float64).reshape((len(brown_clusters), 1))
        oov_counts = np.array(oov_counts, dtype=np.float64).reshape((len(brown_clusters), 1))
        sent_lengths = np.array(sent_lengths, dtype=np.float64).reshape((len(brown_clusters), 1))
        word_lengths = np.array(word_lengths, dtype=np.float64).reshape((len(brown_clusters), 1))
        X = np.hstack(
            (
                par_counts,
                sent_counts,
                word_counts,
                char_counts,
                punct_count,
                oov_counts,
                sent_lengths,
                word_lengths,
                readability_characteristics,
                text_vectors
            )
        )
        col_names = ['paragraphs', 'sentences', 'words', 'characters', 'punctuation', 'oov',
                     'mean sentence length', 'mean word length']
        readability_names = ['flesch_kincaid_grade_level', 'flesch_kincaid_reading_ease',
                             'dale_chall', 'smog', 'coleman_liau_index',
                             'automated_readability_index', 'forcast']
        col_names += [cur_name + '_mean' for cur_name in readability_names]
        col_names += [cur_name + '_min' for cur_name in readability_names]
        col_names += [cur_name + '_max' for cur_name in readability_names]
        col_names += [f'vector{idx}' for idx in range(vector_size)]
        return brown_clusters, pos_tags_of_texts, syntactical_structures, X, col_names

In [None]:
def load_data_for_training(fname: str) -> Dict[str, Tuple[str, float, float]]:
    loaded_header = []
    id_col_idx = -1
    text_col_idx = -1
    target_col_idx = -1
    std_col_idx = -1
    line_idx = 1
    data = dict()
    set_of_texts = set()
    with codecs.open(fname, mode='r', encoding='utf-8') as fp:
        data_reader = csv.reader(fp, quotechar='"', delimiter=',')
        for row in data_reader:
            if len(row) > 0:
                err_msg = f'File {fname}: line {line_idx} is wrong!'
                if len(loaded_header) == 0:
                    loaded_header = copy.copy(row)
                    try:
                        text_col_idx = loaded_header.index('excerpt')
                    except:
                        text_col_idx = -1
                    if text_col_idx <= 0:
                        raise ValueError(err_msg + ' Field "excerpt" is not found!')
                    try:
                        id_col_idx = loaded_header.index('id')
                    except:
                        id_col_idx = -1
                    if id_col_idx < 0:
                        raise ValueError(err_msg + ' Field "id" is not found!')
                    try:
                        target_col_idx = loaded_header.index('target')
                    except:
                        target_col_idx = -1
                    if target_col_idx < 0:
                        raise ValueError(err_msg + ' Field "target" is not found!')
                    try:
                        std_col_idx = loaded_header.index('standard_error')
                    except:
                        std_col_idx = -1
                    if std_col_idx < 0:
                        err_msg2 = f'{err_msg} Field "standard_error" is not found!'
                        raise ValueError(err_msg2)
                else:
                    sample_id = row[id_col_idx]
                    if sample_id != sample_id.strip():
                        raise ValueError(err_msg + f' {sample_id} is wrong sample ID!')
                    if sample_id in data:
                        err_msg2 = f'{err_msg} {sample_id} is not unique sample ID!'
                        raise ValueError(err_msg2)
                    text = row[text_col_idx].strip().replace('\r', '\n')
                    if len(text) == 0:
                        raise ValueError(err_msg + f' Text {sample_id} is empty!')
                    text_paragraphs = list(filter(
                        lambda it2: len(it2) > 0,
                        map(lambda it1: it1.strip(), text.split('\n'))
                    ))
                    text = '\n'.join(text_paragraphs)
                    if text.lower() in set_of_texts:
                        raise ValueError(err_msg + f' Text {sample_id} is not unique!')
                    set_of_texts.add(text.lower())
                    try:
                        target_val = float(row[target_col_idx])
                        ok = True
                    except:
                        target_val = 0.0
                        ok = False
                    if not ok:
                        err_msg2 = err_msg
                        err_msg2 += f' {row[target_col_idx]} is wrong target for ' \
                                    f'text {sample_id}.'
                        raise ValueError(err_msg2)
                    try:
                        std_val = float(row[std_col_idx])
                        ok = (std_val > 0.0)
                    except:
                        std_val = 0.0
                        ok = False
                    if not ok:
                        err_msg2 = err_msg
                        err_msg2 += f' {row[std_col_idx]} is wrong standard error' \
                                    f' for text {sample_id}.'
                        warnings.warn(err_msg2)
                    else:
                        data[sample_id] = (text, target_val, std_val)
            if line_idx % 100 == 0:
                print(f'{line_idx} lines are processed.')
            line_idx += 1
    if (line_idx - 1) != 0:
        print(f'{line_idx - 1} lines are processed.')
    return data

In [None]:
def mixup(X: np.ndarray, y_mean: np.ndarray, y_std: np.ndarray,
          mixup_coeff: float, n_samples: int) -> Tuple[np.ndarray, np.ndarray]:
    assert (mixup_coeff > 0.0) and (mixup_coeff < 1.0)
    assert len(X.shape) == 2
    assert len(y_mean.shape) == 1
    assert len(y_std.shape) == 1
    assert X.shape[0] == y_mean.shape[0]
    assert y_mean.shape[0] == y_std.shape[0]
    X_new = np.empty((n_samples, X.shape[1]), dtype=np.float64)
    y_new = np.empty((n_samples,), dtype=np.float64)
    for sample_idx in range(n_samples):
        idx1 = random.randint(0, X.shape[0] - 1)
        idx2 = random.randint(0, X.shape[0] - 1)
        X_new[sample_idx] = (1.0 - mixup_coeff) * X[idx1] + mixup_coeff * X[idx2]
        y_new[sample_idx] = (1.0 - mixup_coeff) * np.random.normal(loc=y_mean[idx1], scale=y_std[idx1])
        y_new[sample_idx] += mixup_coeff * np.random.normal(loc=y_mean[idx2], scale=y_std[idx2])
    return X_new, y_new

In [None]:
def load_data_for_testing(fname: str, batch_size: int):
    loaded_header = []
    id_col_idx = -1
    text_col_idx = -1
    target_col_idx = -1
    std_col_idx = -1
    line_idx = 1
    data = dict()
    with codecs.open(fname, mode='r', encoding='utf-8') as fp:
        data_reader = csv.reader(fp, quotechar='"', delimiter=',')
        for row in data_reader:
            if len(row) > 0:
                err_msg = f'File {fname}: line {line_idx} is wrong!'
                if len(loaded_header) == 0:
                    loaded_header = copy.copy(row)
                    try:
                        text_col_idx = loaded_header.index('excerpt')
                    except:
                        text_col_idx = -1
                    if text_col_idx <= 0:
                        raise ValueError(err_msg + ' Field "excerpt" is not found!')
                    try:
                        id_col_idx = loaded_header.index('id')
                    except:
                        id_col_idx = -1
                    if id_col_idx < 0:
                        raise ValueError(err_msg + ' Field "id" is not found!')
                else:
                    sample_id = row[id_col_idx]
                    if sample_id != sample_id.strip():
                        raise ValueError(err_msg + f' {sample_id} is wrong sample ID!')
                    if sample_id in data:
                        err_msg2 = f'{err_msg} {sample_id} is not unique sample ID!'
                        raise ValueError(err_msg2)
                    text = row[text_col_idx].strip().replace('\r', '\n')
                    if len(text) == 0:
                        raise ValueError(err_msg + f' Text {sample_id} is empty!')
                    data[sample_id] = text
                    if len(data) >= batch_size:
                        yield data
                        del data
                        data = dict()
            line_idx += 1
    if len(data) > 0:
        yield data

In [None]:
def do_predictions(regressor: catboost.CatBoostRegressor,
                   feature_extractor: ReadabilityTextVectorizer,
                   output_scaler: StandardScaler, 
                   data: Union[Dict[str, Tuple[str, float, float]], Dict[str, str]],
                   identifiers: Union[List[str], None]=None) -> List[Tuple[str, float]]:
    if identifiers is None:
        identifiers_ = sorted(list(data.keys()))
    else:
        identifiers_ = sorted(identifiers)
    texts = list(map(
        lambda it: data[it] if isinstance(data[it], str) else data[it][0],
        identifiers_
    ))
    print(f'Number of texts for prediction is {len(texts)}.')
    features = feature_extractor.transform(texts)
    print(f'Feature matrix size is {features.shape}.')
    n_samples = len(texts)
    assert n_samples == features.shape[0], f'{n_samples} != {features.shape[0]}'
    predictions = output_scaler.inverse_transform(
        np.reshape(
            regressor.predict(features),
            newshape=(n_samples, 1)
        )
    )
    predictions = np.reshape(predictions, newshape=(n_samples,))
    return list(map(
        lambda idx: (identifiers_[idx], predictions[idx]),
        range(len(identifiers_))
    ))

In [None]:
def print_info_about_data(
    data: Union[List[Dict[str, str]], List[Dict[str, Tuple[str, float, float]]]],
    identifiers: List[str]
):
    for_training = isinstance(data[identifiers[0]], tuple)
    if for_training:
        print(f'Number of samples for training is {len(data)}.')
    else:
        print(f'Number of samples for submission is {len(data)}.')
    print('')
    print(f'{len(identifiers)} random samples:')
    for cur_id in identifiers:
        print('')
        print(f'  Sample {cur_id}')
        if for_training:
            print('  Text:')
            print(f'    {data[cur_id][0]}')
            print('  Target:')
            print(f'    {data[cur_id][1]} +- {data[cur_id][2]}')
        else:
            print(f'  {data[cur_id]}')

In [None]:
random.seed(42)
np.random.seed(42)

In [None]:
DATA_DIR = '/kaggle/input/commonlitreadabilityprize'
MODEL_DIR = '/kaggle/working'
print(f'{DATA_DIR} {os.path.isdir(DATA_DIR)}')
print(f'{MODEL_DIR} {os.path.isdir(MODEL_DIR)}')

In [None]:
trainset_name = os.path.join(DATA_DIR, 'train.csv')
print(f'{trainset_name} {os.path.isfile(trainset_name)}')

In [None]:
testset_name = os.path.join(DATA_DIR, 'test.csv')
print(f'{testset_name} {os.path.isfile(testset_name)}')

In [None]:
submission_name = os.path.join(MODEL_DIR, 'submission.csv')
print(f'{submission_name} {os.path.isfile(submission_name)}')

In [None]:
data_for_training = load_data_for_training(trainset_name)
assert len(data_for_training) > 100

In [None]:
all_IDs = sorted(list(data_for_training.keys()))
selected_IDs_for_training = random.sample(
    population=all_IDs,
    k=3
)
print_info_about_data(data_for_training, selected_IDs_for_training)

In [None]:
random.shuffle(all_IDs)
n_train_size = int(round(len(all_IDs) * 0.7))
n_val_size = int(round(len(all_IDs) * 0.15))
IDs_for_training = all_IDs[:n_train_size]
IDs_for_validation = all_IDs[n_train_size:(n_train_size + n_val_size)]
IDs_for_final_testing = all_IDs[(n_train_size + n_val_size):]

In [None]:
X_train = []
y_train_mean = []
y_train_std = []
for cur_id in IDs_for_training:
    X_train.append(data_for_training[cur_id][0])
    y_train_mean.append(data_for_training[cur_id][1])
    y_train_std.append(data_for_training[cur_id][2])
y_train_mean = np.array(y_train_mean, dtype=np.float64)
y_train_std = np.array(y_train_std, dtype=np.float64)
print(f'Data size for training is {y_train_mean.shape[0]}.')

In [None]:
X_val = []
y_val = []
for cur_id in IDs_for_validation:
    X_val.append(data_for_training[cur_id][0][0])
    y_val.append(data_for_training[cur_id][1])
y_val = np.array(y_val, dtype=np.float64)
print(f'Data size for validation is {y_val.shape[0]}.')

In [None]:
label_scaler = StandardScaler().fit(y_train_mean.reshape((y_train_mean.shape[0], 1)))
y_train = label_scaler.transform(
    y_train_mean.reshape((y_train_mean.shape[0], 1))
).reshape((y_train_mean.shape[0],))
y_val = label_scaler.transform(
    y_val.reshape((y_val.shape[0], 1))
).reshape((y_val.shape[0],))

In [None]:
fe = ReadabilityTextVectorizer(
    verbose=True
).fit(X_train, y_train)

In [None]:
X_train = fe.transform(X_train)
X_val = fe.transform(X_val)

In [None]:
print(fe.feature_names_)
for it in X_train[0:3].tolist(): print(f'{it}\n')

In [None]:
X_train_aug, y_train_aug = mixup(X=X_train, y_mean=y_train_mean, y_std=y_train_std,
                                 mixup_coeff=0.01, n_samples=100000)
y_train_aug = label_scaler.transform(
    y_train_aug.reshape((y_train_aug.shape[0], 1))
).reshape((y_train_aug.shape[0],))
print(f'Data size for training after augmentation is {X_train_aug.shape[0]}.')

In [None]:
print(y_train_aug.mean(), y_train_aug.std())

In [None]:
print(y_val.mean(), y_val.std())

In [None]:
train_dataset = catboost.Pool(data=X_train_aug, label=y_train_aug)
eval_dataset = catboost.Pool(data=X_val, label=y_val)

In [None]:
boosted_regressor = catboost.CatBoostRegressor(
    eval_metric='RMSE', use_best_model=True, early_stopping_rounds=100,
    thread_count=-1, random_state=42, verbose=True,
).fit(train_dataset, eval_set=eval_dataset, plot=True)
boosted_regressor.set_feature_names(fe.feature_names_)

In [None]:
importances = boosted_regressor.get_feature_importance(
    data=eval_dataset,
    prettified=False,
    type='PredictionValuesChange'
)
feature_importances = list(map(
    lambda idx: (fe.feature_names_[idx], importances[idx]),
    range(len(importances))
))
feature_importances.sort(key=lambda it: (-it[1], it[0]))

In [None]:
print('Top-50 most important features:')
for ft_name, ft_importance in feature_importances[:50]: print(f'{ft_name}\t{ft_importance}')

In [None]:
print('Top-50 most unimportant features:')
for ft_name, ft_importance in feature_importances[-50:]: print(f'{ft_name}\t{ft_importance}')

In [None]:
nonzero_features = []
for ft_name, ft_importance in feature_importances:
    if ft_importance <= 1e-5:
        break
    nonzero_features.append(ft_name)
nonzero_features.sort()
print(f'There are {len(nonzero_features)}.')
print(f'They are: {nonzero_features}.')

In [None]:
pred = do_predictions(
    regressor=boosted_regressor, feature_extractor=fe, output_scaler=label_scaler, 
    data=data_for_training, identifiers=IDs_for_validation
)

In [None]:
error = 0.0
for sample_ID, cur_prediction in pred:
    difference = cur_prediction - data_for_training[sample_ID][1]
    error += (difference * difference)
error /= float(len(IDs_for_validation))
error = np.sqrt(error)
print(f'RMSE on data for validation = {error}')

In [None]:
pred = do_predictions(
    regressor=boosted_regressor, feature_extractor=fe, output_scaler=label_scaler, 
    data=data_for_training, identifiers=IDs_for_final_testing
)

In [None]:
error = 0.0
for sample_ID, cur_prediction in pred:
    difference = cur_prediction - data_for_training[sample_ID][1]
    error += (difference * difference)
error /= float(len(IDs_for_final_testing))
error = np.sqrt(error)
print(f'Data size for testing is {len(IDs_for_final_testing)}.')
print(f'RMSE on data for testing = {error}')

In [None]:
del X_train, X_val, y_train_mean, y_train_std, y_val
del train_dataset, eval_dataset
del X_train_aug, y_train_aug
gc.collect()

In [None]:
with codecs.open(submission_name, mode='w', encoding='utf-8') as fp:
    data_writer = csv.writer(fp, quotechar='"', delimiter=',')
    data_writer.writerow(['id', 'target'])
    for data_part in load_data_for_testing(testset_name, 100):
        predictions_for_submission = do_predictions(
            regressor=boosted_regressor,
            feature_extractor=fe, output_scaler=label_scaler, 
            data=data_part
        )
        for sample_ID, cur_prediction in predictions_for_submission:
            data_writer.writerow([sample_ID, f'{cur_prediction}'])
        del predictions_for_submission
        print('')