In [1]:
import numpy as np
import torch

In [2]:
# Load the whole embedding matrix
embeddings_index = {}
with open('cleaned_glove.txt') as f:
    for line in f:
        values = line.split()
        word = values[0]
        embed = np.array(values[1:], dtype=np.float32)
        embeddings_index[word] = embed
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 326142 word vectors.


In [3]:
def get_word(query):
    return torch.FloatTensor(embeddings_index[query])

In [4]:
def closest(vec, n=100):
    """
    Find the closest words for a given vector
    """
    all_dists = [(w, torch.dist(vec, get_word(w))) for w in list(embeddings_index.keys())]
    return sorted(all_dists, key=lambda t: t[1])[:n]

In [5]:
def print_tuples(tuples):
    for tuple in tuples:
        print('(%.4f) %s' % (tuple[1], tuple[0]))

In [7]:
print_tuples(closest(get_word("watson")))

(0.0000) watson
(5.3011) svahng
(5.3217) wyatt
(5.3498) piyanart
(5.3500) srivalo
(5.3512) bulletinyyy
(5.3686) palmer
(5.4073) murphy
(5.4427) johnson
(5.4454) clarke
(5.4668) rohch
(5.4706) roberts
(5.4848) drohs
(5.4857) evans
(5.5083) rohsh
(5.5092) gibson
(5.5225) campbell
(5.5316) anderson
(5.5406) moore
(5.5657) holmes
(5.5729) tihg
(5.6076) skwy
(5.6091) kelly
(5.6121) walker
(5.6134) ooooooooooooooooooooooooooooooooooooooo
(5.6200) baker
(5.6238) willis
(5.6287) smith
(5.6397) thahl
(5.6488) grij
(5.6669) robinson
(5.6713) freeman
(5.6738) kahg
(5.6777) allen
(5.6786) rogers
(5.6865) prohertrib
(5.6913) davis
(5.6925) brohd
(5.6934) pyoot
(5.6952) devonengy
(5.6979) harris
(5.7077) irwin
(5.7117) barnes
(5.7136) nangk
(5.7176) howell
(5.7246) sihp
(5.7255) klehb
(5.7284) webb
(5.7298) davies
(5.7383) jahch
(5.7416) matthews
(5.7523) hoond
(5.7532) thompson
(5.7595) wilson
(5.7610) ironically
(5.7693) strahk
(5.7740) wrldcom
(5.7771) hnkj
(5.7833) biotechtrst
(5.7840) prihn
(5.

In [66]:
print_tuples(closest(get_word("data")))

(0.0000) data
(5.7147) information
(6.1718) analysis
(6.2668) database
(6.4859) suggest
(6.5133) provided
(6.5565) suggesting
(6.5594) suggests
(6.5640) tracking
(6.5644) indicating
(6.5652) indicate
(6.5681) instance
(6.5875) analyzed
(6.6227) moreover
(6.6400) analyzing
(6.6956) reports
(6.7160) showing
(6.7421) example
(6.7441) furthermore
(6.7809) using
(6.7886) statistics
(6.7888) indicated
(6.8095) same
(6.8201) measurements
(6.8454) provide
(6.8465) showed
(6.8505) analyses
(6.8636) indicates
(6.8693) numbers
(6.8832) databases
(6.8971) provides
(6.9024) however
(6.9130) study
(6.9168) available
(6.9365) according
(6.9381) indeed
(6.9608) findings
(6.9622) enabled
(6.9649) result
(6.9759) fact
(6.9800) additionally
(6.9889) addition
(7.0010) actual
(7.0033) comparison
(7.0037) surveys
(7.0039) computer
(7.0041) analyze
(7.0049) these
(7.0061) enables
(7.0166) obtained
(7.0169) uses
(7.0272) enabling
(7.0415) providing
(7.0447) detailed
(7.0528) particular
(7.0612) actually
(7.06

In [None]:
print_tuples(closest(get_word("")))

In [51]:
data = {
  'Adjectives': [line.rstrip() for line in list(open("english-adjectives.txt"))],
  'Nouns': [line.rstrip() for line in list(open("english-nouns.txt"))],
  'Verbs': [line.rstrip() for line in list(open("english-verbs.txt"))],
  'Names': [line.lower().rstrip() for line in list(open("english-names.txt"))],
}

In [52]:
categories = {word: key for key, words in data.items() for word in words}

In [53]:
data_embeddings = {key: value for key, value in embeddings_index.items() if key in categories.keys()}

In [54]:
# Processing the query
def process(query):
    adjectives_list = []
    verbs_list = []
    nouns_list = []
    names_list = []
    try: 
        query_embed = embeddings_index[query]
        scores = {}
        for word, embed in data_embeddings.items():
            category = categories[word]
            dist = query_embed.dot(embed)
            dist /= len(data[category])
            scores[category] = scores.get(category, 0) + dist
        if max(scores, key=scores.get) == "Adjectives":
            adjectives_list.append(query)
            with open("curated_adjectives.txt", "a") as f:
                f.write(query)
                f.write("\n")
        elif max(scores, key=scores.get) == "Verbs":
            verbs_list.append(query)
            with open("curated_verbs.txt", "a") as f:
                f.write(query)
                f.write("\n")
        elif max(scores, key=scores.get) == "Nouns":
            nouns_list.append(query)
            with open("curated_nouns.txt", "a") as f:
                f.write(query)
                f.write("\n")
        elif max(scores, key=scores.get) == "Names":
            names_list.append(query)
            with open("curated_names.txt", "a") as f:
                f.write(query)
                f.write("\n")
        return max(scores, key=scores.get)
    except:
        return None

In [10]:
for line in list(open("lemmas_60k.txt")):
    print(line.rstrip(), ": ", process(line.rstrip()))

of :  Nouns
do :  Verbs
they :  Verbs
she :  Verbs
about :  Verbs
there :  Verbs
him :  Verbs
could :  Verbs
no :  Verbs
man :  Nouns
very :  Verbs
work :  Verbs
call :  Verbs
in :  Nouns
as :  Verbs
help :  Verbs
every :  Verbs
yes :  Verbs
its :  Verbs
hand :  Verbs
about :  Verbs
without :  Verbs
story :  Nouns
begin :  Verbs
fact :  Verbs
maybe :  Verbs
issue :  Verbs
though :  Verbs
best :  Verbs
both :  Verbs
much :  Verbs
four :  Verbs
car :  Nouns
back :  Verbs
member :  Nouns
whether :  Verbs
stay :  Verbs
die :  Verbs
office :  Verbs
grow :  Verbs
second :  Nouns
low :  Verbs
whole :  Verbs
hard :  Verbs
police :  Verbs
break :  Verbs
listen :  Verbs
full :  Verbs
local :  Verbs
value :  Verbs
explain :  Verbs
movie :  Nouns
difference :  Verbs
effort :  Verbs
because :  Verbs
need :  Verbs
situation :  Verbs
share :  Verbs
simply :  Verbs
open :  Verbs
save :  Verbs
guess :  Verbs
election :  Verbs
certain :  Verbs
available :  Verbs
current :  Verbs
list :  Verbs
fire :  Ve

balloon :  Nouns
sudden :  Verbs
leak :  Verbs
usage :  Verbs
eyebrow :  Adjectives
align :  Verbs
predator :  Verbs
broker :  Verbs
wallet :  Adjectives
haul :  Verbs
rib :  Nouns
supporting :  Verbs
outbreak :  Verbs
weaken :  Verbs
editorial :  Nouns
authentic :  Adjectives
scent :  Adjectives
surrender :  Verbs
shh :  Adjectives
reasonably :  Verbs
taxi :  Nouns
utterly :  Adjectives
bow :  Verbs
plaintiff :  Verbs
lesser :  Verbs
frustrate :  Verbs
audio :  Nouns
subsequently :  Nouns
inclusion :  Verbs
miserable :  Adjectives
patron :  Nouns
absurd :  Adjectives
shrimp :  Verbs
developed :  Nouns
distress :  Verbs
metropolitan :  Nouns
isolated :  Verbs
swimming :  Nouns
flu :  Verbs
tweet :  Names
tenure :  Nouns
cooperative :  Verbs
integrated :  Verbs
egyptian :  Verbs
pumpkin :  Adjectives
tighten :  Verbs
contemplate :  Verbs
stove :  Nouns
coup :  Verbs
worldwide :  Verbs
startup :  Verbs
reason :  Verbs
supplement :  Verbs
circle :  Nouns
affection :  Adjectives
endorsemen

apocalypse :  Adjectives
shelter :  Verbs
inflate :  Verbs
affiliate :  Nouns
mingle :  Verbs
sexism :  Adjectives
shiite :  Verbs
inconsistency :  Adjectives
alphabet :  Nouns
stylish :  Adjectives
blackandwhite :  None
fm :  Nouns
nonfiction :  Adjectives
harness :  Verbs
fasten :  Verbs
lightweight :  Nouns
acknowledgment :  Adjectives
denomination :  Verbs
longevity :  Verbs
displacement :  Verbs
marxist :  Nouns
heightened :  Verbs
man :  Nouns
moderately :  Adjectives
augment :  Verbs
outnumber :  Verbs
deacon :  Names
interruption :  Verbs
ion :  Nouns
cosmetic :  Verbs
confidentiality :  Verbs
procession :  Adjectives
dane :  Names
memorandum :  Verbs
rum :  Adjectives
confide :  Names
polish :  Nouns
salty :  Adjectives
tar :  Nouns
autograph :  Adjectives
intimidating :  Adjectives
crib :  Adjectives
plainly :  Adjectives
metallic :  Adjectives
fuss :  Adjectives
divisive :  Adjectives
cleric :  Verbs
dispatch :  Verbs
ominous :  Adjectives
outpost :  Adjectives
boob :  Adjec

ratification :  Verbs
personalize :  Verbs
pouring :  Verbs
lengthen :  Verbs
swirl :  Adjectives
honored :  Nouns
fractured :  Adjectives
rescuer :  Names
peruvian :  Verbs
case :  Verbs
traffic :  Verbs
inward :  Adjectives
rendezvous :  Adjectives
galley :  Names
compulsive :  Adjectives
impossibly :  Adjectives
refill :  Verbs
misinterpret :  Adjectives
technologically :  Adjectives
nonverbal :  Adjectives
deteriorating :  Verbs
caramel :  Adjectives
crippling :  Verbs
filler :  Adjectives
bombard :  Verbs
troll :  Adjectives
detriment :  Verbs
meticulously :  Adjectives
mesmerize :  Adjectives
corner :  Nouns
wishful :  Adjectives
muchneeded :  None
scoot :  Names
prick :  Adjectives
inadequacy :  Adjectives
regularity :  Adjectives
comfy :  Adjectives
cola :  Nouns
cartoonist :  Names
pap :  Adjectives
dreaded :  Adjectives
showroom :  Adjectives
saturation :  Adjectives
tulip :  Names
stow :  Names
reckoning :  Adjectives
redevelopment :  Verbs
switching :  Verbs
meatball :  Adj

docking :  Verbs
nautical :  Nouns
maximal :  Adjectives
blockage :  Adjectives
revitalization :  Verbs
guatemalan :  Names
uncompromising :  Adjectives
flourishing :  Adjectives
rigorously :  Verbs
oncology :  Names
alchemy :  Adjectives
typify :  Adjectives
repugnant :  Adjectives
welleducated :  None
reassign :  Names
dingy :  Adjectives
headway :  Verbs
tint :  Adjectives
mirage :  Nouns
authoritarianism :  Adjectives
multimilliondollar :  None
aural :  Adjectives
timeframe :  Verbs
pluralistic :  Adjectives
trailhead :  Names
purr :  Adjectives
humbling :  Adjectives
unproven :  Adjectives
radial :  Adjectives
fluorescence :  Adjectives
accredit :  Names
daydream :  Adjectives
know :  Verbs
consign :  Verbs
forte :  Names
ballplayer :  Adjectives
vending :  Verbs
widower :  Names
hound :  Adjectives
institutionalized :  Adjectives
wickedness :  Adjectives
andean :  Adjectives
enthrall :  Names
springboard :  Verbs
pained :  Adjectives
horrified :  Adjectives
whoohoo :  None
superc

dusky :  Adjectives
curveball :  Adjectives
hyperactive :  Adjectives
airconditioning :  Names
oversimplify :  Adjectives
cheque :  Adjectives
myopic :  Adjectives
silhouette :  Adjectives
fortyone :  None
suffuse :  Adjectives
cheeky :  Adjectives
onetoone :  None
toolkit :  Names
desist :  Verbs
oneliner :  None
lumbar :  Adjectives
dioxin :  Adjectives
generalizability :  Names
unhook :  Names
fracturing :  Adjectives
irritant :  Adjectives
connective :  Adjectives
antiviral :  Verbs
disembodied :  Adjectives
lowrisk :  None
boson :  Names
peeling :  Adjectives
euphoric :  Adjectives
throwaway :  Adjectives
oneroom :  None
robustness :  Adjectives
methanol :  Adjectives
expeditionary :  Nouns
boat :  Nouns
helix :  Names
ream :  Names
snicker :  Adjectives
midlife :  Adjectives
agitator :  Names
rigging :  Verbs
carelessness :  Adjectives
grotto :  Names
metabolize :  Verbs
subliminal :  Adjectives
webmaster :  Names
oped :  Adjectives
generative :  Adjectives
chisel :  Adjectives
m

bop :  Adjectives
admittance :  Verbs
toy :  Nouns
prophylactic :  Adjectives
ruffled :  Adjectives
lowprofile :  None
reprise :  Names
cheyenne :  Names
ditty :  Adjectives
wench :  Adjectives
ocular :  Adjectives
integrator :  Adjectives
sealevel :  Names
bod :  Names
cymbal :  Adjectives
hoarder :  Adjectives
narrowminded :  None
upanddown :  None
obligated :  Verbs
bugle :  Names
anointed :  Names
monthly :  Verbs
unsay :  Names
conformation :  Adjectives
identically :  Adjectives
retrofit :  Adjectives
curate :  Names
framing :  Adjectives
soundness :  Adjectives
wellsuited :  None
peasantry :  Adjectives
plotline :  Adjectives
leaden :  Adjectives
pizzeria :  Names
prior :  Nouns
minotaur :  Names
scorching :  Adjectives
advance :  Verbs
flagstone :  Adjectives
carburetor :  Names
ungainly :  Adjectives
ageing :  Verbs
sunbathe :  Names
chipotle :  Names
councilor :  Names
binocular :  Adjectives
introduced :  Nouns
cityscape :  Adjectives
unimpeded :  Verbs
attire :  Adjectives


escapist :  Adjectives
minimumwage :  None
statefunded :  None
itching :  Adjectives
phishing :  Adjectives
schnapps :  Names
fasttrack :  Names
custombuilt :  None
coot :  Names
fashionably :  Adjectives
maimed :  Adjectives
hypodermic :  Adjectives
gaily :  Adjectives
frankness :  Adjectives
daunt :  Names
freestanding :  Adjectives
immunosuppressive :  Adjectives
litigious :  Adjectives
loveless :  Names
plover :  Adjectives
gettogether :  None
apostate :  Names
humbled :  Adjectives
twoheaded :  None
veep :  Names
bumbling :  Adjectives
selfguided :  None
intubation :  Names
arian :  Names
seventythree :  None
goodfaith :  None
remit :  Verbs
minty :  Names
ejaculate :  Adjectives
masochism :  Adjectives
readjustment :  Adjectives
cation :  Adjectives
mimosa :  Names
touchyfeely :  None
cohabit :  Adjectives
shiitake :  Adjectives
irreparably :  Adjectives
nonconference :  Adjectives
nonlocal :  Adjectives
speeder :  Adjectives
socialistic :  Adjectives
biome :  Adjectives
pillory 

shielding :  Verbs
filesharing :  Names
blueribbon :  None
bulgur :  Names
militate :  Adjectives
scarab :  Names
literalist :  Adjectives
ringed :  Adjectives
upright :  Adjectives
looky :  Names
selectman :  Names
blushing :  Names
diseasecausing :  None
fount :  Adjectives
wholehearted :  Adjectives
unrepresentative :  Adjectives
threeyearold :  None
skybox :  Adjectives
unglamorous :  Adjectives
gogetter :  None
oldworld :  None
bruschetta :  Adjectives
terrarium :  Adjectives
fahrenheit :  Nouns
armload :  Adjectives
backhanded :  Adjectives
southpaw :  Names
teething :  Adjectives
religionist :  None
chica :  Names
vexation :  Names
handspring :  Names
gab :  Adjectives
comefrombehind :  None
unwisely :  Names
wildtype :  Names
highefficiency :  None
offpeak :  None
brain :  Verbs
creative :  Verbs
schoolbook :  Adjectives
mastiff :  Names
druginduced :  None
impermanence :  Adjectives
gnosticism :  Names
perforate :  Adjectives
caulk :  Adjectives
vernal :  Names
gratuitously : 

halfcourt :  Adjectives
emasculate :  Adjectives
sevenmonth :  None
druggist :  Names
bifurcated :  Adjectives
shoeless :  Names
impulsiveness :  Adjectives
germanborn :  None
spitball :  Names
blowdry :  None
organza :  Adjectives
onebyone :  None
selfcontradictory :  None
halfcourt :  Adjectives
understudied :  Names
robocall :  Names
disused :  Adjectives
dross :  Names
wahhabi :  Adjectives
elastomer :  Adjectives
thiamine :  Names
fourthirty :  None
bookend :  Adjectives
discretely :  Adjectives
clearcutting :  Adjectives
verger :  Names
selfinstruction :  None
twoseater :  None
lamina :  Names
cogently :  Names
skincare :  Names
doityourselfer :  None
togo :  Names
fixerupper :  None
handsdown :  None
stevia :  Names
oarsman :  Names
dichotomize :  None
starforming :  None
misleadingly :  Adjectives
integrally :  Adjectives
twee :  Adjectives
weightbearing :  None
anodyne :  Adjectives
arpeggio :  Adjectives
niqab :  Names
rickets :  Names
subdiscipline :  Adjectives
interferomet

anthropomorphize :  Adjectives
neoplatonic :  Adjectives
barrenness :  Names
unifier :  Adjectives
glassenclosed :  None
maasai :  Names
pellmell :  None
streetwalker :  Names
orthopedics :  Names
detainment :  Names
notetaking :  None
chinesemade :  None
hidalgo :  Names
psychometrically :  None
uhuhuh :  None
tesseract :  Names
laryngoscope :  Names
dutybound :  None
neyo :  None
sixthround :  None
backroom :  Adjectives
threshing :  Adjectives
unethically :  Names
glucagon :  Names
headlining :  Adjectives
satisfyingly :  Adjectives
prole :  Names
stevedore :  Names
praetorian :  Names
nubby :  Adjectives
ptarmigan :  Names
squeamishness :  Adjectives
minimart :  Names
tapper :  Names
subscript :  Adjectives
antitoxin :  Adjectives
clusterfuck :  None
neocolonialism :  Names
silver :  Nouns
backhand :  Adjectives
bargainbasement :  None
shingled :  Adjectives
laminar :  Adjectives
endometrium :  Names
switchhitter :  None
wielder :  Names
craniofacial :  Names
stenciled :  Adjective

aircrew :  Adjectives
motheaten :  None
clintonian :  Adjectives
multivalent :  Adjectives
liberalleaning :  None
inebriation :  Adjectives
exsoldier :  None
jigger :  Names
wholesomeness :  Adjectives
constructivist :  Adjectives
fineboned :  None
herbarium :  Names
bungling :  Adjectives
centrum :  Names
chauffeured :  Names
orthodontic :  Adjectives
ohhoho :  None
cytosol :  Names
stepwise :  Adjectives
culturespecific :  None
minimalistic :  Adjectives
taoist :  Adjectives
insecticidal :  Names
unmonitored :  Adjectives
sharpening :  Adjectives
icehouse :  Names
natoled :  None
monotonically :  Adjectives
downsizing :  Verbs
axolotl :  Names
husk :  Names
beneficially :  Adjectives
duodenal :  Adjectives
underserved :  Adjectives
eighteenwheeler :  None
geoscience :  Names
twinkly :  Adjectives
milewide :  None
proteomic :  Adjectives
stemware :  Adjectives
publicworks :  None
centering :  Adjectives
posthuman :  Names
olivine :  Adjectives
datacollection :  None
springlike :  Adje

libration :  Names
weblike :  Adjectives
exothermic :  Adjectives
multifaith :  Adjectives
animallike :  None
naughtiness :  Adjectives
monstrance :  Adjectives
sixtwo :  None
affinal :  None
closefitting :  None
ascribed :  Adjectives
wellborn :  Names
rebuttable :  Adjectives
fleecelined :  None
instrumentalism :  Names
tatty :  Adjectives
windproof :  Names
semiformal :  Names
pricefixing :  None
firehose :  Names
stateimposed :  None
rager :  Names
bowlshaped :  None
outofseason :  None
catsup :  Names
ouzo :  Names
nonmonogamous :  None
bugbear :  Adjectives
straightedge :  Adjectives
highincidence :  None
propitiate :  Names
pendency :  Adjectives
gulley :  Names
goddamit :  None
interindividual :  None
flameout :  Adjectives
ineptly :  Adjectives
lifeforce :  Names
smouldering :  Adjectives
shakespearian :  Adjectives
beachy :  Names
rinkydink :  None
bacchanal :  Adjectives
slaver :  Names
backscatter :  Names
riskmanagement :  None
bearlike :  Names
phospholipid :  Names
gaste

renovator :  Names
chelator :  None
demimonde :  Adjectives
dilapidation :  Adjectives
nonsport :  None
debtreduction :  None
fretwork :  Adjectives
velar :  Adjectives
metafictive :  None
procommunist :  None
peritoneum :  Adjectives
screechy :  Adjectives
halfgrown :  None
harmlessness :  Adjectives
tenpound :  None
justpublished :  None
fistsize :  None
unitard :  Adjectives
estrange :  Names
fuelefficiency :  None
facedown :  Names
barrierfree :  None
hypervigilance :  Names
bionics :  Names
divining :  Adjectives
marketresearch :  None
actualized :  Adjectives
moog :  Names
colloid :  Adjectives
threemasted :  None
multiphasic :  Adjectives
dystonia :  Names
fab :  Names
ladyinwaiting :  None
ultramarine :  Adjectives
unsayable :  None
firsttier :  None
interwar :  Adjectives
isolette :  None
hubandspoke :  None
habitforming :  None
greatgreatgreat :  None
contingently :  None
perstudent :  None
feebleness :  Adjectives
forcedair :  None
puckering :  Names
bursa :  Names
distracti

dueling :  Adjectives
muscarinic :  Adjectives
amorite :  Names
coorganizer :  None
betterequipped :  None
atrioventricular :  Adjectives
draftday :  None
majoritywhite :  None
algebraically :  Adjectives
downonhisluck :  None
adlibbing :  None
moldavian :  Adjectives
blackskinned :  None
toiler :  Adjectives
humiliatingly :  Adjectives
durational :  Adjectives
photoop :  None
facerecognition :  None
laager :  Names
limpwristed :  None
uncirculated :  Names
userfriendliness :  None
sunyata :  Names
kickstart :  Verbs
testamentary :  Adjectives
hourbyhour :  None
womanliness :  None
savoirfaire :  None
sensitized :  Adjectives
proarab :  None
filmer :  Names
stanky :  Names
menonly :  None
hedger :  Names
sticklike :  None
pleasantlooking :  None
bordercrossing :  None
postnational :  None
onionskin :  None
deepvoiced :  None
celllike :  None
depositary :  Names
calculative :  None
postfeminist :  Adjectives
blustering :  Adjectives
positioner :  Names
radiometer :  Names
fortyminute : 

icepick :  Names
cowife :  None
blackpainted :  None
monoamine :  Names
slowcooker :  None
mangosteen :  Names
singleline :  None
motoric :  Adjectives
niggle :  Adjectives
bestorganized :  None
hydroquinone :  Adjectives
anarchical :  Adjectives
humorlessly :  None
assbackwards :  None
edibility :  Adjectives
seventeenyear :  None
smiler :  Names
underlay :  Names
rapini :  Names
inadvertence :  Adjectives
sailer :  Names
transposed :  Adjectives
teninch :  None
goodman :  Names
ultrafiltration :  Names
whitemale :  None
loworder :  None
sculler :  Names
selfacceptance :  None
consolingly :  None
finned :  Adjectives
splay :  Names
vibrance :  Names
hearingimpaired :  None
fourlevel :  None
clamping :  Adjectives
possessory :  Adjectives
oncewhite :  None
nucleolus :  Adjectives
outstay :  None
desultorily :  Adjectives
cetacean :  Names
parallelization :  Adjectives
reinsertion :  Names
directinjection :  None
nonreproductive :  None
bootlace :  None
occlusal :  Adjectives
pleura :  

In [55]:
import socket
print(socket.gethostbyname('sl.ug'))

gaierror: [Errno 8] nodename nor servname provided, or not known

In [56]:
print(socket.gethostbyname('shopi.fyi'))

gaierror: [Errno 8] nodename nor servname provided, or not known

In [68]:
def update_domains():
    
    tlds = ["ai", "app", "co", "io", "to", "me", "tech", "dev", "so", "xyz", "org", "net", "us", "vc", "fyi", "in", "link", "live", "de", "blog", "is", "gg"]
    
    for tld in tlds:

        categories = ["adjectives", "nouns", "verbs", "positive", "names", "tech", "spanish", "other"]

        for category in categories:
            txt_file = "data/"+tld+"/"+category+".txt"

            # getting the all words list
            all_txt_file = "data/"+tld+"/all.txt"
            all_words = []
            with open(all_txt_file, 'r+') as f:
                for word in f.read().splitlines():
                    all_words.append(word)

            word_list = []
            with open(txt_file, 'r+') as f:
                for word in f.read().splitlines():
                    if word[-1:] == "_":
                        try: 
                            socket.gethostbyname(word[:-1] + '.' + tld)
                            for entry in all_words:
                                if entry == word: 
                                    all_words.remove(word)
                        except: 
                            word_list.append(word)
                    else:
                        try: 
                            socket.gethostbyname(word + '.' + tld)
                            for entry in all_words:
                                if entry == word: 
                                    all_words.remove(word)
                        except: 
                            word_list.append(word)

            with open(txt_file, "w") as f:
                for word in word_list:
                    f.write(word)
                    f.write("\n")
            with open(all_txt_file, "w") as f:
                for entry in all_words:
                    f.write(entry)
                    f.write("\n")

            print("Done updating " + category + " for ." + tld + " domains!")

In [69]:
update_domains("org")

Done updating adjectives for .org domains!
Done updating nouns for .org domains!
Done updating verbs for .org domains!
Done updating positive for .org domains!
Done updating names for .org domains!
Done updating tech for .org domains!
Done updating spanish for .org domains!
Done updating other for .org domains!


In [22]:
positive_ly = []

for line in list(open("english-positive.txt")):
    if line.rstrip()[-2:] == "ly" and len(line.rstrip()) < 8:
        positive_ly.append(line.rstrip())

positive_ly    

['affably',
 'agilely',
 'amply',
 'audibly',
 'avidly',
 'capably',
 'cleanly',
 'clearly',
 'comely',
 'courtly',
 'eagerly',
 'evenly',
 'fairly',
 'finely',
 'fondly',
 'gaily',
 'gladly',
 'goodly',
 'handily',
 'happily',
 'holy',
 'ideally',
 'jolly',
 'justly',
 'keenly',
 'kindly',
 'lively',
 'lovably',
 'lovely',
 'lucidly',
 'merrily',
 'neatly',
 'nicely',
 'nobly',
 'notably',
 'openly',
 'orderly',
 'readily',
 'regally',
 'richly',
 'rightly',
 'safely',
 'sagely',
 'saintly',
 'smartly',
 'soundly',
 'stately',
 'suavely',
 'sweetly',
 'timely',
 'twinkly',
 'warmly',
 'wisely']

In [11]:
import requests
import time

def bulk_check_availability(domain_list):
    # Godaddy developer key and secret
    api_key = "e5CagPzPH4QQ_Pf8szkb7G9r8kpiw7yVmsn"
    secret_key = "QCfJ5wTB1UEwc9a4WuNe34"

    # API key and secret are sent in the header
    headers = {"Authorization" : "sso-key {}:{}".format(api_key, secret_key)}

    # Domain availability and appraisal end points with 
    # checkType parameter set to "FULL" instead of the default "FAST"
    url = "https://api.godaddy.com/v1/domains/available?checkType=FULL"
    
    domain_chunks = []

    # Split the original array into subarrays
    for i in range(0, len(domain_list), 30):
        domain_chunks.append(domain_list[i:i + 30])

    # Create empty list to store available domains
    available_domains = []

    for domains in domain_chunks:
        availability_res = requests.post(url, json=domains, headers=headers).json()
        for domain in availability_res["domains"]:
            if domain["available"] == True:
                domain_name = domain['domain']
                available_domains.append(domain_name)
                print(domain_name)
        print("-----------------------------------------------")
        # API call frequency should be ~ 20 calls per minute 
        time.sleep(10)
    return available_domains

In [88]:
noun_data = []
data_noun = []

for line in list(open("english-nouns.txt")):
    if len(line.rstrip()) == 4:
        noun_data.append(line.rstrip() + "data.com")
        data_noun.append("data" + line.rstrip() + ".com")

In [89]:
bulk_check_availability(noun_data)

wifedata.com
mooddata.com
ovendata.com
-----------------------------------------------
wifedata.com
poetdata.com
-----------------------------------------------
-----------------------------------------------
lackdata.com
-----------------------------------------------
beltdata.com
-----------------------------------------------
coatdata.com
nosedata.com
blowdata.com
-----------------------------------------------
blowdata.com
feardata.com
harmdata.com
ruindata.com
jokedata.com
-----------------------------------------------
jokedata.com
-----------------------------------------------
-----------------------------------------------
sinkdata.com
hurtdata.com
-----------------------------------------------
sinkdata.com
quitdata.com
teardata.com
suckdata.com
-----------------------------------------------


['wifedata.com',
 'mooddata.com',
 'ovendata.com',
 'wifedata.com',
 'poetdata.com',
 'lackdata.com',
 'beltdata.com',
 'coatdata.com',
 'nosedata.com',
 'blowdata.com',
 'blowdata.com',
 'feardata.com',
 'harmdata.com',
 'ruindata.com',
 'jokedata.com',
 'jokedata.com',
 'sinkdata.com',
 'hurtdata.com',
 'sinkdata.com',
 'quitdata.com',
 'teardata.com',
 'suckdata.com']

In [90]:
bulk_check_availability(data_noun)

-----------------------------------------------
-----------------------------------------------
-----------------------------------------------
datalack.com
-----------------------------------------------
-----------------------------------------------
datanail.com
-----------------------------------------------
dataharm.com
datajoke.com
dataknee.com
datajury.com
dataruin.com
-----------------------------------------------
datajury.com
datajoke.com
-----------------------------------------------
datawait.com
-----------------------------------------------
datawait.com
datahate.com
datahurt.com
datahang.com
-----------------------------------------------
dataquit.com
datamale.com
-----------------------------------------------


['datalack.com',
 'datanail.com',
 'dataharm.com',
 'datajoke.com',
 'dataknee.com',
 'datajury.com',
 'dataruin.com',
 'datajury.com',
 'datajoke.com',
 'datawait.com',
 'datawait.com',
 'datahate.com',
 'datahurt.com',
 'datahang.com',
 'dataquit.com',
 'datamale.com']

## PositiveLY Verbs

In [26]:
verb_list = ['know', 'make', 'take', 'give', 'find', 'like', 'keep', 'work', 'show', 'read', 'play', 'live', 'meet', 'move', 'talk', 'hope', 'wish', 'send', 'sell', 'open', 'join', 'save', 'deal', 'mind', 'draw', 'care', 'note', 'drop', 'sign', 'pray', 'list', 'copy', 'fund', 'race', 'snap', 'swap', 'book', 'date', 'chat', 'wave', 'post', 'file']

In [27]:
verb_4L_positive_ly = []

for verb in verb_list:
    for term in positive_ly:
        try: 
            socket.gethostbyname(verb + term + '.com')
        except: 
            verb_4L_positive_ly.append(verb + term + '.com')
            print(verb + term + '.com')

knowaffably.com
knowagilely.com
knowamply.com
knowaudibly.com
knowavidly.com
knowcapably.com
knowcleanly.com
knowclearly.com
knowcomely.com
knowcourtly.com
knoweagerly.com
knowevenly.com
knowfairly.com
knowfinely.com
knowfondly.com
knowgaily.com
knowgladly.com
knowgoodly.com
knowhandily.com
knowhappily.com
knowholy.com
knowideally.com
knowjolly.com
knowjustly.com
knowkeenly.com
knowkindly.com
knowlively.com
knowlovably.com
knowlovely.com
knowlucidly.com
knowmerrily.com
knowneatly.com
knownicely.com
knownobly.com
knownotably.com
knowopenly.com
knoworderly.com
knowreadily.com
knowregally.com
knowrichly.com
knowsafely.com
knowsagely.com
knowsaintly.com
knowsmartly.com
knowsoundly.com
knowstately.com
knowsuavely.com
knowsweetly.com
knowtimely.com
knowtwinkly.com
knowwarmly.com
knowwisely.com
makeaffably.com
makeagilely.com
makeamply.com
makeaudibly.com
makeavidly.com
makecapably.com
makecleanly.com
makeclearly.com
makecomely.com
makecourtly.com
makeeagerly.com
makeevenly.com
makefairly.com

liveholy.com
liveideally.com
livelovably.com
livemerrily.com
livenotably.com
livereadily.com
livesafely.com
livesoundly.com
livesuavely.com
livetimely.com
livetwinkly.com
meetaffably.com
meetagilely.com
meetamply.com
meetaudibly.com
meetavidly.com
meetcapably.com
meetcleanly.com
meetclearly.com
meetcomely.com
meetcourtly.com
meeteagerly.com
meetevenly.com
meetfairly.com
meetfinely.com
meetfondly.com
meetgaily.com
meetgladly.com
meetgoodly.com
meethandily.com
meethappily.com
meetholy.com
meetideally.com
meetjolly.com
meetjustly.com
meetkeenly.com
meetkindly.com
meetlovably.com
meetlucidly.com
meetmerrily.com
meetneatly.com
meetnobly.com
meetnotably.com
meetorderly.com
meetreadily.com
meetregally.com
meetrichly.com
meetrightly.com
meetsagely.com
meetsaintly.com
meetsoundly.com
meetstately.com
meetsuavely.com
meetsweetly.com
meettimely.com
meettwinkly.com
meetwarmly.com
meetwisely.com
moveaffably.com
moveagilely.com
moveamply.com
moveaudibly.com
moveavidly.com
movecapably.com
movecleanly.

dealsweetly.com
dealtwinkly.com
dealwarmly.com
dealwisely.com
mindaffably.com
mindagilely.com
mindamply.com
mindaudibly.com
mindavidly.com
mindcapably.com
mindcleanly.com
mindclearly.com
mindcomely.com
mindcourtly.com
mindeagerly.com
mindevenly.com
mindfairly.com
mindfinely.com
mindfondly.com
mindgaily.com
mindgladly.com
mindgoodly.com
mindhandily.com
mindhappily.com
mindholy.com
mindideally.com
mindjolly.com
mindjustly.com
mindkeenly.com
mindkindly.com
mindlovably.com
mindlovely.com
mindlucidly.com
mindmerrily.com
mindneatly.com
mindnicely.com
mindnobly.com
mindnotably.com
mindopenly.com
mindorderly.com
mindreadily.com
mindregally.com
mindrichly.com
mindrightly.com
mindsafely.com
mindsagely.com
mindsaintly.com
mindsmartly.com
mindsoundly.com
mindstately.com
mindsuavely.com
mindsweetly.com
mindtimely.com
mindtwinkly.com
mindwarmly.com
mindwisely.com
drawaffably.com
drawagilely.com
drawamply.com
drawaudibly.com
drawavidly.com
drawcapably.com
drawcleanly.com
drawclearly.com
drawcomely.co

racelovely.com
racelucidly.com
racemerrily.com
raceneatly.com
racenicely.com
racenobly.com
racenotably.com
raceopenly.com
raceorderly.com
racereadily.com
raceregally.com
racerichly.com
racerightly.com
racesagely.com
racesaintly.com
racesmartly.com
racesoundly.com
racestately.com
racesuavely.com
racesweetly.com
racetimely.com
racetwinkly.com
racewarmly.com
racewisely.com
snapaffably.com
snapagilely.com
snapamply.com
snapaudibly.com
snapavidly.com
snapcapably.com
snapcleanly.com
snapclearly.com
snapcomely.com
snapcourtly.com
snapeagerly.com
snapevenly.com
snapfairly.com
snapfinely.com
snapfondly.com
snapgaily.com
snapgladly.com
snapgoodly.com
snaphandily.com
snaphappily.com
snapholy.com
snapideally.com
snapjustly.com
snapkeenly.com
snapkindly.com
snaplively.com
snaplovably.com
snaplovely.com
snaplucidly.com
snapmerrily.com
snapneatly.com
snapnicely.com
snapnobly.com
snapnotably.com
snapopenly.com
snaporderly.com
snapreadily.com
snapregally.com
snaprichly.com
snaprightly.com
snapsagely.co

In [40]:
noun_4L = []

for line in list(open("english-nouns.txt")):
    if len(line.rstrip()) == 4:
        noun_4L.append(line.rstrip())

noun_4L

['meat',
 'year',
 'data',
 'food',
 'bird',
 'love',
 'fact',
 'idea',
 'area',
 'oven',
 'week',
 'exam',
 'army',
 'goal',
 'news',
 'user',
 'disk',
 'road',
 'role',
 'soup',
 'math',
 'wood',
 'unit',
 'cell',
 'lake',
 'mood',
 'city',
 'debt',
 'loss',
 'bath',
 'mall',
 'hair',
 'mode',
 'song',
 'town',
 'wife',
 'gate',
 'girl',
 'hall',
 'meal',
 'poem',
 'desk',
 'king',
 'menu',
 'beer',
 'dirt',
 'gene',
 'lady',
 'poet',
 'tale',
 'time',
 'work',
 'film',
 'game',
 'life',
 'form',
 'part',
 'fish',
 'back',
 'heat',
 'hand',
 'book',
 'type',
 'home',
 'body',
 'size',
 'card',
 'list',
 'mind',
 'line',
 'care',
 'risk',
 'word',
 'name',
 'boss',
 'page',
 'term',
 'test',
 'kind',
 'soil',
 'rate',
 'site',
 'case',
 'boat',
 'cash',
 'plan',
 'side',
 'rule',
 'head',
 'rock',
 'salt',
 'note',
 'rent',
 'bank',
 'half',
 'fire',
 'step',
 'face',
 'item',
 'room',
 'view',
 'ball',
 'gift',
 'tool',
 'wind',
 'sign',
 'task',
 'hope',
 'date',
 'link',
 'post',
 

In [4]:
import requests
import time
import xml.etree.ElementTree as ET

# Namecheap API
url = "https://api.namecheap.com/xml.response?ApiUser=steventey&ApiKey=aac5b2e14a4b49748787523cdf815edc&UserName=steventey&ClientIp=42.189.168.79&RequestIP=&Command=namecheap.domains.check"

# 40 names per chunk is the 10 below the limit of 50
chunk_size = 40

def check_available_domains(all_domains):

    domain_chunks = []

    # Split the original array into subarrays
    for i in range(0, len(all_domains), chunk_size):
        domain_chunks.append(all_domains[i:i + chunk_size])

    # Create empty list to store available domains
    available_domains = []

    for domains in domain_chunks:
        # Get availability and premium info by calling Namecheap API
        response = requests.post(url, data={'DomainList': domains})
        responseXml = ET.fromstring(response.content)
        # ET.dump(responseXml)

        # Get only available domains
        for domain in responseXml.iter('{http://api.namecheap.com/xml.response}DomainCheckResult'):
            # comment for debugging
            # print("Domain: " + domain.attrib['Domain'], "Available: " + domain.attrib['Available'], "Premium: " + domain.attrib['IsPremiumName'])
            if domain.attrib['Available'] == "true":
                domain_name = domain.attrib['Domain']
                # if it's a premium name
                if domain.attrib['IsPremiumName'] == "true":
                    available_domains.append(domain_name.replace('.com', '')+'_')
                
                # if it's not a premium name
                elif domain.attrib['IsPremiumName'] == "false":    
                    available_domains.append(domain_name.replace('.com', ''))
                print(domain_name)
        print("-----------------------------------------------")
        # API call frequency should be ~ 20 calls per minute 
        time.sleep(5)
    
    return available_domains

In [45]:
print(check_available_domains('combine.to'))

combine.to
-----------------------------------------------
['combine.to_']


In [37]:
whois_checked_verbs_positively = check_available_domains(verb_4L_positive_ly)

knowkindly.com
knowlively.com
knowlovably.com
knowlovely.com
knowlucidly.com
knowmerrily.com
knowneatly.com
knownicely.com
knownobly.com
knownotably.com
knowopenly.com
knoworderly.com
knowreadily.com
knowregally.com
knowrichly.com
knowaffably.com
knowagilely.com
knowamply.com
knowaudibly.com
knowavidly.com
knowcapably.com
knowcleanly.com
knowclearly.com
knowcomely.com
knowcourtly.com
knoweagerly.com
knowevenly.com
knowfairly.com
knowfinely.com
knowfondly.com
knowgaily.com
knowgladly.com
knowgoodly.com
knowhandily.com
knowhappily.com
knowholy.com
knowideally.com
knowjolly.com
knowjustly.com
knowkeenly.com
-----------------------------------------------
makefinely.com
makefondly.com
makegaily.com
makegladly.com
makegoodly.com
makehandily.com
makehappily.com
makeideally.com
makejolly.com
makejustly.com
makekeenly.com
makekindly.com
makelovably.com
makelucidly.com
makemerrily.com
knowsafely.com
knowsagely.com
knowsaintly.com
knowsmartly.com
knowsoundly.com
knowstately.com
knowsuavely.com
k

meetamply.com
meetaudibly.com
meetavidly.com
meetcapably.com
meetcleanly.com
meetclearly.com
meetcomely.com
meetcourtly.com
meeteagerly.com
meetevenly.com
meetfairly.com
meetfinely.com
meetfondly.com
meetgaily.com
meetgladly.com
liveamply.com
liveaudibly.com
livecapably.com
livecleanly.com
livecomely.com
livecourtly.com
livefinely.com
livefondly.com
livegaily.com
livegladly.com
livehandily.com
livelovably.com
livemerrily.com
livenotably.com
livereadily.com
livesuavely.com
livetwinkly.com
meetaffably.com
meetagilely.com
-----------------------------------------------
meetsweetly.com
meettimely.com
meettwinkly.com
meetwarmly.com
moveaffably.com
moveagilely.com
moveamply.com
moveaudibly.com
moveavidly.com
movecapably.com
movecleanly.com
moveclearly.com
movecomely.com
movecourtly.com
meetgoodly.com
meethandily.com
meethappily.com
meetideally.com
meetjolly.com
meetjustly.com
meetkeenly.com
meetlovably.com
meetlucidly.com
meetmerrily.com
meetneatly.com
meetnobly.com
meetnotably.com
meetorder

dealtwinkly.com
dealwarmly.com
mindaffably.com
mindagilely.com
mindamply.com
mindaudibly.com
mindavidly.com
mindcapably.com
mindcleanly.com
mindclearly.com
mindcomely.com
mindcourtly.com
mindeagerly.com
mindevenly.com
dealhappily.com
dealideally.com
dealjustly.com
dealkeenly.com
deallively.com
deallovably.com
deallovely.com
deallucidly.com
dealmerrily.com
dealneatly.com
dealnicely.com
dealnobly.com
dealnotably.com
dealorderly.com
dealreadily.com
dealregally.com
dealrichly.com
dealrightly.com
dealsafely.com
dealsagely.com
dealsaintly.com
dealsoundly.com
dealstately.com
dealsuavely.com
dealsweetly.com
-----------------------------------------------
mindregally.com
mindrichly.com
mindrightly.com
mindsafely.com
mindsagely.com
mindsaintly.com
mindsmartly.com
mindsoundly.com
mindstately.com
mindsuavely.com
mindsweetly.com
mindtimely.com
mindtwinkly.com
mindwarmly.com
mindwisely.com
mindfairly.com
mindfinely.com
mindfondly.com
mindgaily.com
mindgladly.com
mindgoodly.com
mindhandily.com
mindha

racefondly.com
racegaily.com
racegladly.com
racegoodly.com
racehandily.com
racehappily.com
raceholy.com
raceideally.com
racejolly.com
racejustly.com
racekeenly.com
racekindly.com
racelively.com
racelovably.com
racelovely.com
fundregally.com
fundrichly.com
fundsagely.com
fundsaintly.com
fundsoundly.com
fundstately.com
fundsuavely.com
fundsweetly.com
fundtimely.com
fundtwinkly.com
fundwarmly.com
raceaffably.com
raceagilely.com
raceamply.com
raceaudibly.com
raceavidly.com
racecapably.com
racecleanly.com
raceclearly.com
racecomely.com
racecourtly.com
raceeagerly.com
raceevenly.com
racefairly.com
racefinely.com
-----------------------------------------------
snapamply.com
snapaudibly.com
snapavidly.com
snapcapably.com
snapcleanly.com
snapclearly.com
snapcomely.com
snapcourtly.com
snapeagerly.com
snapevenly.com
snapfairly.com
snapfinely.com
snapfondly.com
snapgaily.com
snapgladly.com
racelucidly.com
racemerrily.com
raceneatly.com
racenicely.com
racenobly.com
racenotably.com
raceopenly.com
ra

## Spell check

In [39]:
with open("verbs_positively.txt", "w") as f:
    for term in whois_checked_verbs_positively:
        f.write(term)
        f.write("\n")

In [None]:
typo_list = ['wonderous', 'wonderously', 'supurb', 'supurbly', 'satisified', 'recomend', 'raptureous', 'prospros', 'jubiliant', 'invaluablely', 'inpressed', 'imaculate', 'futurestic', 'fecilitous', 'fancinating', 'exellent', 'excellant', 'excelent', 'examplar', 'examplary', 'excallent', 'exaltingly', 'ecenomical', 'deginified', 'convienient', 'convient', 'convience', 'beutifully', 'benifits', 'believeable', 'amiabily', 'accomodative']


good_version = []

for line in list(open("data/ai/positive.txt")):
    if line.rstrip() in typo_list:
        continue
    else
        good_version.append(line.rstrip())
        
good_version_no_dupes = list(dict.fromkeys(good_version))

with open("data/ai/positive.txt", "w") as f:
    for term in good_version_no_dupes:
        f.write(term)
        f.write("\n")


## Compound Words

In [37]:
compound4x4 = []

for line in list(open("CompoundWords.txt",encoding='cp1252')):
    if len(line.rstrip().partition(" ")[0]) == 4 and len(line.rstrip().partition(" ")[2]) == 5 and line.rstrip().count(" ") == 1:
        compound4x4.append(line.rstrip().replace(" ", "") + ".com")
        print(line.rstrip().replace(" ", "") + ".com")

1200hours.com
AGCAradar.com
AISIsteel.com
AbulAbbas.com
Adamtiler.com
AdarSheni.com
Agincourt.com
Aichmetal.com
AlanPaton.com
AlbaFlora.com
AlbaLonga.com
Almamater.com
Amenglass.com
AmerPicon.com
AminHafez.com
AmirDrory.com
Amurlilac.com
Amurmaple.com
AndyBenes.com
AndyRisau.com
Arabbread.com
ArgoNavis.com
Argonavis.com
AryaSamaj.com
AsiaMajor.com
AsiaMinor.com
AuntSally.com
AzizAhdab.com
BackRiver.com
Bailcourt.com
Bankstock.com
Bathbrick.com
Bathchair.com
Bathmetal.com
Bathstone.com
BearRiver.com
Bearstate.com
BeniHasan.com
Beniabbas.com
BillDoran.com
BillFusco.com
BillSikes.com
BinhXuyen.com
BintJbail.com
BlueCross.com
BlueEarth.com
BlueRider.com
BlueRidge.com
BlueScout.com
BlueVinny.com
Blueshirt.com
Brixscale.com
Buhlstage.com
BullMoose.com
Bullmoose.com
Burrtruss.com
Bushnegro.com
Caenstone.com
CapeDutch.com
CapeFlats.com
CapeVerde.com
CapeWrath.com
Capedutch.com
CarlIcahn.com
ChinHills.com
ChirRiver.com
CoalGrove.com
Cocklorel.com
CookInlet.com
CoteRotie.com
Crimtatar.com
CubaLi

golfgreen.com
golflinks.com
golfwidow.com
gonearrow.com
gonegoose.com
goodangel.com
goodbreak.com
goodcheap.com
goodcheer.com
goodchild.com
goodfaith.com
goodhumor.com
goodjudge.com
goodlooks.com
goodloser.com
goodmixer.com
goodnight.com
goodprice.com
goodsense.com
goodshape.com
goodsooth.com
goodspeed.com
goodsport.com
goodstory.com
goodsword.com
goodtaste.com
goodterms.com
goodthing.com
goodtimes.com
goodusage.com
goodvibes.com
goodwoman.com
goodworks.com
goonsquad.com
gowkstorm.com
grancassa.com
graybirch.com
graygoods.com
grayhairs.com
grayscale.com
grayskate.com
graytrout.com
graywhale.com
greyfriar.com
greygoose.com
greyheron.com
greywhale.com
gridmetal.com
gripblock.com
gripwheel.com
grosgrain.com
grospoint.com
growlight.com
growthick.com
growweary.com
growworse.com
grubplank.com
grubscrew.com
gulfrupee.com
gullgrass.com
guteNacht.com
hackboard.com
hackhouse.com
hackwatch.com
hairbraid.com
hairbrown.com
haircolor.com
hairglove.com
hairgrass.com
hairsheep.com
hairshirt.com
hairsi

skinlayer.com
skiphoist.com
slabhouse.com
sledknife.com
slipcloth.com
slipcoach.com
slipcover.com
slipgauge.com
slipglaze.com
slipjoint.com
slipnoose.com
slippanel.com
slipproof.com
slipshare.com
slipsheet.com
slitskirt.com
sloethorn.com
slopbasin.com
slopchest.com
slopchute.com
slopcloth.com
slopfrock.com
slowahead.com
slowcoach.com
slowloris.com
slowmarch.com
slowmatch.com
slugsnail.com
smutgrass.com
snagtooth.com
snapcatch.com
snapflask.com
snapgauge.com
snappoint.com
snaptable.com
snowapple.com
snowblast.com
snowboard.com
snowcover.com
snowcrust.com
snowdevil.com
snoweater.com
snowfence.com
snowfield.com
snowfinch.com
snowglare.com
snowglory.com
snowgoose.com
snowgrass.com
snowguard.com
snowknife.com
snowlight.com
snowlimit.com
snowmouse.com
snowplant.com
snowplume.com
snowpoppy.com
snowquail.com
snowscale.com
snowsheen.com
snowsleep.com
snowslope.com
snowslush.com
snowsmoke.com
snowsnake.com
snowstake.com
snowtrack.com
snowtrain.com
snowunder.com
snowwater.com
snowwhite.com
soapap

In [25]:
available_compound4x4 = bulk_check_availability(compound4x4)

AminHafez.com
AzizAhdab.com
AlanPaton.com
AmerPicon.com
Amurmaple.com
AGCAradar.com
AmirDrory.com
AndyBenes.com
Bailcourt.com
AndyRisau.com
Adamtiler.com
Aichmetal.com
Amurlilac.com
AdarSheni.com
-----------------------------------------------
BeniHasan.com
Burrtruss.com
Bathmetal.com
Beniabbas.com
Bushnegro.com
BinhXuyen.com
Brixscale.com
Buhlstage.com
-----------------------------------------------
DinoRadja.com
ChirRiver.com
DaveValle.com
CurtGiles.com
DaveFeitl.com
DedeAgach.com
DaveStieb.com
DeusRamos.com
Crimtatar.com
DVOPradar.com
Daunstage.com
Cocklorel.com
-----------------------------------------------
Finktruss.com
EricJelen.com
ElonMoreh.com
-----------------------------------------------
GaryRedus.com
GeneKeady.com
Granchimu.com
GlekBogaz.com
GarySuter.com
GregGagne.com
FoxeBasin.com
Hilograss.com
GlenBeall.com
-----------------------------------------------
JeanJadot.com
JohnDruce.com
JeanKibwe.com
JereGoyan.com
JohnDavys.com
Ionacross.com
HwaiRiver.com
------------------

folkcarol.com
flaxwheel.com
flaxolive.com
flatdrill.com
flueplate.com
flaxbrake.com
flaxplant.com
flealouse.com
-----------------------------------------------
footfront.com
footstove.com
forkcross.com
footmange.com
footscrew.com
fooltrick.com
forkmaker.com
-----------------------------------------------
froggrass.com
formfours.com
frogclock.com
formgenus.com
fowlgrass.com
-----------------------------------------------
fullvowel.com
fullrhyme.com
fusegauge.com
-----------------------------------------------
germgland.com
germlayer.com
gangdrill.com
gearoiler.com
germstock.com
gatetable.com
gangalong.com
gateshear.com
gangedger.com
gallmidge.com
gangtooth.com
-----------------------------------------------
gillcleft.com
gillframe.com
gillpouch.com
glennymph.com
gillscoop.com
gillplume.com
glueplant.com
goaljudge.com
gillhelix.com
gluebrush.com
-----------------------------------------------
gonearrow.com
-----------------------------------------------
goodsooth.com
gowkstorm.com
goodus

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [22]:
with open("testing.txt", "w") as f:
    for domain in available_compound4x4:
        f.write(domain.partition(".")[0].rstrip().lower())
        f.write("\n")

## Common Book Words

In [11]:
import socket
commonBookWords = []

for line in list(open("commonBookWords.txt",encoding='cp1252')):
    try: 
        socket.gethostbyname(line.rstrip() + '.com')
    except: 
        commonBookWords.append(line.rstrip())
        print(line.rstrip())

Decreasing Order of Occurrence

a
in
I
at
or
two
like
where
same
might
problem
Mrs.
night
something
room
boy
member
often
development
important
reach
rate
like
already
learn
federal
stop
strong
less
less
wear
committee
accept
station
suppose
product
assume
front
ten
Congress
rather
catch
achieve
length
western
inch
loss
fear
rest
respect
analysis
hall
director
wide
U.S.
division
rather
myself
authority


## Combining Adjective Files (failed)

In [2]:
old_adj = []
new_adj = []
        
with open("english-adjectives.txt", "r") as f:
    for line in f:
        old_adj.append(line.rstrip()) 

with open("curated_adjectives.txt", "r") as f:
    for line in f:
        if line.rstrip() not in old_adj:
            new_adj.append(line.rstrip())
        else:
            continue

print(len(new_adj))
print(len(old_adj))

2396
1411


In [3]:
new_adj

['shit',
 'smile',
 'dude',
 'stranger',
 'sauce',
 'idiot',
 'tomato',
 'narrator',
 'humor',
 'crap',
 'dumb',
 'goodness',
 'carbohydrate',
 'jaw',
 'sibling',
 'curiosity',
 'emotionally',
 'doorway',
 'thou',
 'eyebrow',
 'wallet',
 'scent',
 'shh',
 'utterly',
 'absurd',
 'pumpkin',
 'affection',
 'atheist',
 'sofa',
 'jerk',
 'tub',
 'obsession',
 'vinegar',
 'comet',
 'puppet',
 'mug',
 'peculiar',
 'overweight',
 'qualitative',
 'stink',
 'whisper',
 'satisfying',
 'mindset',
 'primitive',
 'inconsistent',
 'flashlight',
 'syrup',
 'dwarf',
 'remnant',
 'pod',
 'finely',
 'horrific',
 'strangely',
 'pancake',
 'prevalent',
 'unusually',
 'pasture',
 'pastry',
 'inhale',
 'awe',
 'obscure',
 'shining',
 'lace',
 'canned',
 'twisted',
 'razor',
 'righteous',
 'fungus',
 'ineffective',
 'disgust',
 'multitude',
 'woe',
 'respectable',
 'inflammation',
 'authoritarian',
 'wardrobe',
 'hue',
 'admittedly',
 'vegan',
 'intensely',
 'delusion',
 'stew',
 'adaptive',
 'vapor',
 'liber

## Fintech Names

In [36]:
pay_positive_ly = []
positive_ly = ['affably',
 'agilely',
 'amply',
 'audibly',
 'avidly',
 'capably',
 'cleanly',
 'clearly',
 'comely',
 'courtly',
 'eagerly',
 'evenly',
 'fairly',
 'finely',
 'fondly',
 'gaily',
 'gladly',
 'goodly',
 'handily',
 'happily',
 'holy',
 'ideally',
 'jolly',
 'justly',
 'keenly',
 'kindly',
 'lively',
 'lovably',
 'lovely',
 'lucidly',
 'merrily',
 'neatly',
 'nicely',
 'nobly',
 'notably',
 'openly',
 'orderly',
 'readily',
 'regally',
 'richly',
 'rightly',
 'safely',
 'sagely',
 'saintly',
 'smartly',
 'soundly',
 'stately',
 'suavely',
 'sweetly',
 'timely',
 'twinkly',
 'warmly',
 'wisely']

for line in positive_ly:
    pay_positive_ly.append("pay" + line.rstrip() + ".com")

pay_positive_ly

['payaffably.com',
 'payagilely.com',
 'payamply.com',
 'payaudibly.com',
 'payavidly.com',
 'paycapably.com',
 'paycleanly.com',
 'payclearly.com',
 'paycomely.com',
 'paycourtly.com',
 'payeagerly.com',
 'payevenly.com',
 'payfairly.com',
 'payfinely.com',
 'payfondly.com',
 'paygaily.com',
 'paygladly.com',
 'paygoodly.com',
 'payhandily.com',
 'payhappily.com',
 'payholy.com',
 'payideally.com',
 'payjolly.com',
 'payjustly.com',
 'paykeenly.com',
 'paykindly.com',
 'paylively.com',
 'paylovably.com',
 'paylovely.com',
 'paylucidly.com',
 'paymerrily.com',
 'payneatly.com',
 'paynicely.com',
 'paynobly.com',
 'paynotably.com',
 'payopenly.com',
 'payorderly.com',
 'payreadily.com',
 'payregally.com',
 'payrichly.com',
 'payrightly.com',
 'paysafely.com',
 'paysagely.com',
 'paysaintly.com',
 'paysmartly.com',
 'paysoundly.com',
 'paystately.com',
 'paysuavely.com',
 'paysweetly.com',
 'paytimely.com',
 'paytwinkly.com',
 'paywarmly.com',
 'paywisely.com']

In [37]:
bulk_check_availability(pay_positive_ly)

payagilely.com
payhandily.com
paycapably.com
paylucidly.com
payavidly.com
paylovably.com
paygoodly.com
paycourtly.com
payamply.com
paylovely.com
paygladly.com
payjustly.com
payevenly.com
payaudibly.com
paycleanly.com
payideally.com
paycomely.com
payfondly.com
payfinely.com
payholy.com
payhappily.com
payaffably.com
paygaily.com
paykeenly.com
paykindly.com
payeagerly.com
-----------------------------------------------
paysaintly.com
paynobly.com
paytwinkly.com
payregally.com
paysoundly.com
payrichly.com
payreadily.com
paynotably.com
paystately.com
paysuavely.com
paysagely.com
payneatly.com
paywarmly.com
paymerrily.com
paysweetly.com
paynicely.com
-----------------------------------------------


['payagilely.com',
 'payhandily.com',
 'paycapably.com',
 'paylucidly.com',
 'payavidly.com',
 'paylovably.com',
 'paygoodly.com',
 'paycourtly.com',
 'payamply.com',
 'paylovely.com',
 'paygladly.com',
 'payjustly.com',
 'payevenly.com',
 'payaudibly.com',
 'paycleanly.com',
 'payideally.com',
 'paycomely.com',
 'payfondly.com',
 'payfinely.com',
 'payholy.com',
 'payhappily.com',
 'payaffably.com',
 'paygaily.com',
 'paykeenly.com',
 'paykindly.com',
 'payeagerly.com',
 'paysaintly.com',
 'paynobly.com',
 'paytwinkly.com',
 'payregally.com',
 'paysoundly.com',
 'payrichly.com',
 'payreadily.com',
 'paynotably.com',
 'paystately.com',
 'paysuavely.com',
 'paysagely.com',
 'payneatly.com',
 'paywarmly.com',
 'paymerrily.com',
 'paysweetly.com',
 'paynicely.com']

In [16]:
prefix_word = []
word_suffix = []

for line in list(open("english-nouns.txt")):
    if len(line.rstrip()) <= 6:
        prefix_word.append(line.rstrip() + "ify.com")
        word_suffix.append("bank" + line.rstrip() + ".com")

In [17]:
available_adj_pay = bulk_check_availability(prefix_word)

theoryify.com
-----------------------------------------------
policyify.com
armyify.com
roleify.com
basisify.com
-----------------------------------------------
countyify.com
estateify.com
-----------------------------------------------
cousinify.com
actorify.com
entryify.com
memoryify.com
engineify.com
sampleify.com
singerify.com
agencyify.com
-----------------------------------------------
ratioify.com
poetryify.com
sectorify.com
farmerify.com
injuryify.com
volumeify.com
-----------------------------------------------
tongueify.com
affairify.com
pianoify.com
drawerify.com
potatoify.com
cookieify.com
-----------------------------------------------
uncleify.com
whileify.com
-----------------------------------------------
-----------------------------------------------
pieceify.com
-----------------------------------------------
figureify.com
ruleify.com
-----------------------------------------------
insideify.com
-----------------------------------------------
bottomify.com
mediumify.

In [64]:
available_pay_adj = bulk_check_availability(word_suffix)

bankeased.com
bankattune.com
bankdeadon.com
bankclassy.com
bankcajole.com
bankdote.com
bankdanke.com
bankblithe.com
bankcherub.com
bankcute.com
bankeases.com
bankedify.com
bankcatchy.com
bankavidly.com
bankcozy.com
bankbrainy.com
bankcushy.com
bankacumen.com
bankdanken.com
bankdazzle.com
bankamazes.com
bankamaze.com
bankaver.com
bankbeckon.com
bankcomfy.com
bankavid.com
bankcohere.com
-----------------------------------------------
bankexult.com
bankextoll.com
bankfeisty.com
bankhooray.com
bankflashy.com
bankfervid.com
bankelan.com
bankendear.com
banknobly.com
bankenrapt.com
banklaud.com
bankilu.com
bankgeeky.com
bankkudos.com
bankgooood.com
bankgoood.com
bankfav.com
bankfave.com
bankglitz.com
bankftw.com
bankgutsy.com
bankexcels.com
bankextol.com
bankelate.com
-----------------------------------------------
bankpep.com
bankposh.com
bankoutwit.com
banktrendy.com
banksucces.com
bankupbeat.com
bankswanky.com
banksnappy.com
bankproven.com
bankscenic.com
bankpoise.com
bankpeach.com
bankpep

## Misspelled Words

In [21]:
misspelled = []

for line in list(open("english-words.txt")):
    if len(line.rstrip()) <= 12:
        if line.rstrip()[-2:] == "er":
            misspelled.append(line.rstrip()[:-2] + "r.com")
        elif line.rstrip()[-3:] == "ure":
            misspelled.append(line.rstrip()[:-3] + "r.com")
        elif "ph" in line.rstrip():
            misspelled.append(line.rstrip()[:-3] + "r.com")
        else:
            continue

In [22]:
bulk_check_availability(misspelled)

miniatr.com
meagr.com
-----------------------------------------------
-----------------------------------------------
Xzavir.com
-----------------------------------------------
temperatr.com
literatr.com
grandmothr.com
Eliezr.com
Konnr.com
-----------------------------------------------
-----------------------------------------------
-----------------------------------------------
grandfathr.com
serialnumbr.com
devicedrivr.com
realnumbr.com
wholenumbr.com
-----------------------------------------------
remoteusr.com
-----------------------------------------------
agricultr.com
earlir.com
-----------------------------------------------
neithr.com
-----------------------------------------------
-----------------------------------------------
-----------------------------------------------
legislatr.com
soldir.com
expenditr.com
-----------------------------------------------
rochestr.com
-----------------------------------------------
viewpictr.com
treasurr.com
---------------------------

['miniatr.com',
 'meagr.com',
 'Xzavir.com',
 'temperatr.com',
 'literatr.com',
 'grandmothr.com',
 'Eliezr.com',
 'Konnr.com',
 'grandfathr.com',
 'serialnumbr.com',
 'devicedrivr.com',
 'realnumbr.com',
 'wholenumbr.com',
 'remoteusr.com',
 'agricultr.com',
 'earlir.com',
 'neithr.com',
 'legislatr.com',
 'soldir.com',
 'expenditr.com',
 'rochestr.com',
 'viewpictr.com',
 'treasurr.com',
 'remaindr.com',
 'enclosr.com',
 'thereaftr.com',
 'powersellr.com',
 'westminstr.com',
 'groundwatr.com',
 'practitionr.com',
 'worcestr.com',
 'reassr.com']

In [31]:
misspelled = []

for line in list(open("english-words.txt")):
    if len(line.rstrip()) <= 7 and len(line.rstrip()) > 5:
        if "i" in line.rstrip():
            misspelled.append(line.rstrip().replace("i", "y") + ".com")
        else:
            continue

In [34]:
bulk_check_availability(misspelled)

acydyc.com
anxyous.com
babyysh.com
amusyng.com
blaryng.com
boryng.com
bruysed.com
boylyng.com
achyng.com
buzzyng.com
-----------------------------------------------
foolysh.com
fryzzy.com
dotyng.com
dympled.com
genuyne.com
feysty.com
fyttyng.com
faylyng.com
frygyd.com
-----------------------------------------------
jyttery.com
guylty.com
ydyotyc.com
ynborn.com
ympysh.com
yllegal.com
lympyng.com
hummyng.com
glaryng.com
lastyng.com
ympure.com
leadyng.com
knowyng.com
-----------------------------------------------
rynged.com
putryd.com
noxyous.com
quyrky.com
pryckly.com
pytyful.com
seryous.com
shryll.com
-----------------------------------------------
teemyng.com
tryvyal.com
tedyous.com
stayned.com
spyffy.com
tragyc.com
-----------------------------------------------
foreygn.com
ynytyal.com
worryed.com
wyndyng.com
vycyous.com
yawnyng.com
untydy.com
unlyned.com
weyghty.com
warlyke.com
welllyt.com
untryed.com
wyllyng.com
-----------------------------------------------
Aalyyah.com
Lyllyan.co

varyyng.com
ynsulyn.com
smylyes.com
hyttyng.com
coatyng.com
beyngs.com
squyrt.com
framyng.com
varyes.com
-----------------------------------------------
pyerce.com
pushyng.com
wyryng.com
pyckyng.com
ynquyre.com
penguyn.com
nvydya.com
bryefs.com
-----------------------------------------------
obesyty.com
touryng.com
cardyac.com
skatyng.com
yyelds.com
paynful.com
lyberya.com
suyted.com
tytled.com
bufyng.com
blowyng.com
pullyng.com
ratyos.com
-----------------------------------------------
weldyng.com
assysts.com
exysted.com
myssyle.com
lockyng.com
pursuyt.com
maylman.com
fygured.com
statyng.com
backyng.com
vyewers.com
-----------------------------------------------
fabrycs.com
wyshyng.com
matyng.com
halyfax.com
arryves.com
quyetly.com
Russyan.com
jumpyng.com
yndyces.com
hygyene.com
vyolate.com
-----------------------------------------------
Jewysh.com
crytyc.com
aholyc.com
tronycs.com
Brytysh.com
bombyng.com
omatyc.com
sdyrect.com
-----------------------------------------------
forkyds.c

['acydyc.com',
 'anxyous.com',
 'babyysh.com',
 'amusyng.com',
 'blaryng.com',
 'boryng.com',
 'bruysed.com',
 'boylyng.com',
 'achyng.com',
 'buzzyng.com',
 'foolysh.com',
 'fryzzy.com',
 'dotyng.com',
 'dympled.com',
 'genuyne.com',
 'feysty.com',
 'fyttyng.com',
 'faylyng.com',
 'frygyd.com',
 'jyttery.com',
 'guylty.com',
 'ydyotyc.com',
 'ynborn.com',
 'ympysh.com',
 'yllegal.com',
 'lympyng.com',
 'hummyng.com',
 'glaryng.com',
 'lastyng.com',
 'ympure.com',
 'leadyng.com',
 'knowyng.com',
 'rynged.com',
 'putryd.com',
 'noxyous.com',
 'quyrky.com',
 'pryckly.com',
 'pytyful.com',
 'seryous.com',
 'shryll.com',
 'teemyng.com',
 'tryvyal.com',
 'tedyous.com',
 'stayned.com',
 'spyffy.com',
 'tragyc.com',
 'foreygn.com',
 'ynytyal.com',
 'worryed.com',
 'wyndyng.com',
 'vycyous.com',
 'yawnyng.com',
 'untydy.com',
 'unlyned.com',
 'weyghty.com',
 'warlyke.com',
 'welllyt.com',
 'untryed.com',
 'wyllyng.com',
 'Aalyyah.com',
 'Lyllyan.com',
 'Wyllyam.com',
 'Lylyana.com',
 'Ezekyel.

In [35]:
misspelled = []

for line in list(open("english-words.txt")):
    if len(line.rstrip()) <= 8:
        if line.rstrip()[-1:] == "k":
            misspelled.append(line.rstrip()[:-1] + "c.com")
        else:
            continue

In [36]:
bulk_check_availability(misspelled)

Mavericc.com
Kendricc.com
Derricc.com
-----------------------------------------------
Merricc.com
-----------------------------------------------
-----------------------------------------------
capslocc.com
numlocc.com
-----------------------------------------------
norfolc.com
cookbooc.com
bangkoc.com
handbooc.com
-----------------------------------------------
sbooc.com
-----------------------------------------------
-----------------------------------------------
overlooc.com
-----------------------------------------------


['Mavericc.com',
 'Kendricc.com',
 'Derricc.com',
 'Merricc.com',
 'capslocc.com',
 'numlocc.com',
 'norfolc.com',
 'cookbooc.com',
 'bangkoc.com',
 'handbooc.com',
 'sbooc.com',
 'overlooc.com']

## Commonly occuring phrases

In [57]:
bagofwords = []
word_length = 4

for line in list(open("startup_names/finance-processed.txt")):
    for i in range(len(line.rstrip()) - (word_length-1)):
        bagofwords.append(line.rstrip()[i] + line.rstrip()[i+1] + line.rstrip()[i+2] + line.rstrip()[i+3])

In [58]:
from collections import Counter

for i in range(20):
    print(Counter(bagofwords).most_common(20)[i][0])

fina
nanc
inan
ance
apit
capi
pita
ital
cred
redi
lend
ding
bank
edit
anci
tion
ment
ndin
oney
ncia


In [7]:
top10k_startups = []
word_length = 6

for line in list(open("top10kstartups.txt")):
    word = line.rstrip().lower().partition(".")[0]
    for i in range(len(word) - (word_length-1)):
        top10k_startups.append(word[i] + word[i+1] + word[i+2] + word[i+3] + word[i+4] + word[i+5])
    
len(top10k_startups)

50419

In [8]:
from collections import Counter

for i in range(20):
    print(Counter(top10k_startups).most_common(20)[i][0])

health
olutio
lution
soluti
utions
global
pharma
servic
ervice
onsult
energy
consul
partne
artner
capita
digita
rvices
apital
igital
nsulti
