In [17]:
#!pip install wordfreq
#!pip install syllables
#!pip install pandas

from wordfreq import zipf_frequency
import syllables
from pandas import DataFrame

# Selecting rare adverbs

### zipf frequencies:
- zipf frequency rare adverbs [1.0 - 3.0]
- zipf frequency common adverbs [3.0 - 5.0]

### addition criteria for common adverb selection:
- https://www.englishprofile.org/wordlists/evp
- filter settings:
    - proficiency levels: A1 - B2
    - category: words
    - word class: adverb
    - suffix: ly


### attention: the automatic split into syllables is not always correct!

In [18]:
# create a list of rare/difficult adverbs of latin origin
rare_words = ["equitably", 
    "furtively", 
    "affably", 
    "dexterously", 
    "tenaciously", 
    "incredulously", 
    "sedulously", 
    "audaciously", 
    "copiously", 
    "amiably", 
    "belligerently", 
    "candidly", 
    "colloquially", 
    "conspicuously", 
    "dexterously", 
    "diligently", 
    "egregiously", 
    "ferociously", 
    "insidiously", 
    "morbidly", 
    "mundanely", 
    "sedulously",
    "oppressively", 
    "petulantly", 
    "assiduously",
    "placidly", 
    "nefariously", 
    "sagaciously", 
    "tacitornly", 
    "vehemently", 
    "incessantly", 
    "abberantly", 
    "ardently", 
    "magnanimously", 
    "immutably", 
    "languidly", 
    "obliquely", 
    "vicariously"]

In [19]:
# create a dict of words and their frequencies
freqs_dict = {w: zipf_frequency(w, "en") for w in rare_words if zipf_frequency(w, "en") < 3.0 and zipf_frequency(w, "en") > 1.0}
print(freqs_dict)

# create a dict of words and their lengths
lengths_dict = {w: len(w) for w in rare_words if zipf_frequency(w, "en") < 3.0 and zipf_frequency(w, "en") > 1.0}
print(lengths_dict)

# create a dict of words and their syllable counts
syllables_dict = {w: syllables.estimate(w) for w in rare_words if zipf_frequency(w, "en") < 3.0 and zipf_frequency(w, "en") > 1.0}
print(syllables_dict)

print(len(freqs_dict))

{'equitably': 2.4, 'furtively': 2.23, 'affably': 1.68, 'dexterously': 1.43, 'tenaciously': 2.19, 'incredulously': 2.22, 'sedulously': 1.2, 'audaciously': 1.87, 'copiously': 2.05, 'amiably': 2.02, 'belligerently': 1.9, 'candidly': 2.76, 'colloquially': 2.69, 'conspicuously': 2.88, 'egregiously': 2.17, 'ferociously': 2.46, 'insidiously': 2.09, 'morbidly': 2.58, 'mundanely': 1.19, 'oppressively': 1.97, 'petulantly': 1.7, 'assiduously': 2.44, 'placidly': 1.8, 'nefariously': 1.35, 'incessantly': 2.91, 'ardently': 2.44, 'magnanimously': 1.73, 'immutably': 1.47, 'languidly': 2.04, 'obliquely': 2.7, 'vicariously': 2.72}
{'equitably': 9, 'furtively': 9, 'affably': 7, 'dexterously': 11, 'tenaciously': 11, 'incredulously': 13, 'sedulously': 10, 'audaciously': 11, 'copiously': 9, 'amiably': 7, 'belligerently': 13, 'candidly': 8, 'colloquially': 12, 'conspicuously': 13, 'egregiously': 11, 'ferociously': 11, 'insidiously': 11, 'morbidly': 8, 'mundanely': 9, 'oppressively': 12, 'petulantly': 10, 'ass

In [20]:
# create a dataframe of words, their frequencies, lengths, and syllable counts
df = DataFrame.from_dict(freqs_dict, orient='index', columns=['frequency'])
df.columns.name = 'rare_word'
df['length'] = df.index.map(lengths_dict)
df['syllables'] = df.index.map(syllables_dict)
df.sort_values(by=['length'], inplace=True)
df

rare_word,frequency,length,syllables
affably,1.68,7,3
amiably,2.02,7,3
morbidly,2.58,8,3
placidly,1.8,8,3
candidly,2.76,8,3
ardently,2.44,8,3
obliquely,2.7,9,4
mundanely,1.19,9,4
equitably,2.4,9,4
immutably,1.47,9,4


# Selecting common adverbs

In [21]:
common_words = [
    "sadly",
    "loudly",
    "secretly",
    "carefully",
    "possibly",
    "kindly",
    "personally",
    "jealously",
    "conveniently",
    "honestly",
    "suddenly",
    "actually",
    "finally",
    "really",
    "quickly",
    "easily",
    "simply",
    "leisurely",
    "carelessly",
    "creatively",
    "effortlessly",
    "enthusiastically",
    "basically",
    "absolutely",
    "definitely",
    "literally",
    "completely",
    "occasionally",
    "totally",
    "seriously",
    "constantly",
    "obviously",
    "clearly",
    "apparently",
    "hopefully",
    "unusually",
    "confidently",
    "regularly",
    "continuously",
    "nervously",
    "unexpectedly",
    "frequently",
    "independently",
    "foolishly",
    "carefully",
    "dangerously",
    "appropriately",
    "efficiently",
    "desperately",
    "unusually",
    "generally",
    "curiously",
    "impatiently",
    "suspiciously",
    "accidentally",
    "cheerfully",
    "happily",
    "calmly",
    "angrily",
    "permanently",
    "silently",
    "successfully",
    "simultaneously",
    "potentially", 
    "gently",
    "safely",
    "badly",
    "passionately",
    "individually"
]

In [22]:
# create a dict of words and their frequencies
freqs_dict_common = {w: zipf_frequency(w, "en") for w in common_words if zipf_frequency(w, "en") > 3.0}
print(freqs_dict_common)

# create a dict of words and their lengths
lengths_dict_common = {w: len(w) for w in common_words if zipf_frequency(w, "en") > 3.0}
print(lengths_dict_common)

# create a dict of words and their syllable counts
syllables_dict_common = {w: syllables.estimate(w) for w in common_words if zipf_frequency(w, "en") > 3.0}
print(syllables_dict_common)

{'sadly': 4.18, 'loudly': 3.79, 'secretly': 4.01, 'carefully': 4.46, 'possibly': 4.77, 'kindly': 4.0, 'personally': 4.57, 'conveniently': 3.58, 'honestly': 4.69, 'suddenly': 4.69, 'actually': 5.49, 'finally': 5.17, 'really': 5.97, 'quickly': 5.0, 'easily': 4.91, 'simply': 5.08, 'leisurely': 3.05, 'creatively': 3.1, 'effortlessly': 3.22, 'enthusiastically': 3.25, 'basically': 4.63, 'absolutely': 4.98, 'definitely': 4.96, 'literally': 4.91, 'completely': 5.0, 'occasionally': 4.39, 'totally': 4.83, 'seriously': 4.94, 'constantly': 4.53, 'obviously': 4.8, 'clearly': 4.92, 'apparently': 4.8, 'hopefully': 4.56, 'unusually': 3.76, 'confidently': 3.41, 'regularly': 4.52, 'continuously': 3.97, 'nervously': 3.27, 'unexpectedly': 3.71, 'frequently': 4.56, 'independently': 4.06, 'dangerously': 3.47, 'appropriately': 3.74, 'efficiently': 3.79, 'desperately': 3.98, 'generally': 4.95, 'curiously': 3.2, 'suspiciously': 3.08, 'accidentally': 4.22, 'happily': 4.11, 'calmly': 3.56, 'angrily': 3.34, 'perm

In [23]:
# create a dataframe of common words, their frequencies, lengths, and syllable counts
df_common = DataFrame.from_dict(freqs_dict_common, orient='index', columns=['frequency'])
df_common.columns.name = 'common_word'
df_common['length'] = df_common.index.map(lengths_dict_common)
df_common['syllables'] = df_common.index.map(syllables_dict_common)
df_common.sort_values(by=['length'], inplace=True)
df_common

common_word,frequency,length,syllables
sadly,4.18,5,2
badly,4.44,5,2
loudly,3.79,6,2
safely,4.22,6,3
gently,4.08,6,2
...,...,...,...
individually,3.90,12,5
appropriately,3.74,13,5
independently,4.06,13,5
simultaneously,4.17,14,5


In [24]:
# export the dataframes to csv files
df.to_csv('rare_words.csv')
df_common.to_csv('common_words.csv')

# export the dataframes to json files
df.to_json('rare_words.json')
df_common.to_json('common_words.json')

# export the dataframes to html files
df.to_html('rare_words.html')
df_common.to_html('common_words.html')

In [25]:
# test
zipf_frequency("sedulously", "en")
zipf_frequency("unexpectedly", "en", wordlist="large")

3.71

In [26]:
#  format: [rare_sentence, common_sentence]

pair_1 = ["They looked at each other amiably.", "They looked at each other happily."] # 7/3, 7/3
pair_2 = ["The experienced doctor performed the operation dexterously.", "The experienced doctor performed the operation sucessfully"] # 11/4, 12/4
pair_3 = ["She did not move, continuing to stare at me belligerently.", "She did not move, continuing to stare at me passionately."] # 13/5, 12/5
pair_4 = ["The dogs barked ferociously.", "The dogs barked unexpectedly."]
pair_5 = ["The man on the boat waved affably", "The man on the boat waved angrily."] # 7/3, 7/3
pair_6 = ["One important person has been conspicuously absent", "One important person has been continuously absent."] # 13/4, 12/4
pair_7 = ["By the swimming pool, the neighbour waited languidly.", "By the swimming pool, the neighbour waited nervously."] # 10/4, 10/4
pair_8 = ["It is so easy to be vicariously charitable.", "It is so easy to be occasionally charitable."] # 11/4, 12/5
pair_9 = ["She clings to her marriage tenaciously.", "She clings to her marriage desperately."] # 11/4, 11/5
pair_10 = ["In this country, racism has been spreading insidiously.", "In this country, racism is spreading constantly."]
pair_11 = ["The little girl screamed and stamped her foot petulantly.", "The little girl screamed and stamped her foot emotionally."] # 10/4, 11/5
pair_12 = ["The guests avoided all political discussion sedulously.", "The guests avoided all political discussion carefully."] # 10/4, 9/4
pair_13 = ["The day started mundanely.", "The day started normally."] # 9/3, 8/4
pair_14 = ["She grabbed the microphone and stepped onto the stage audaciously.", "She grabbed the microphone stepped onto the stage confidently."] # 11/4, 11/4
pair_15 = ["They have been misleading you most egregiously.", "They have been misleading you most unacceptably."]
pair_16 = ["To meet the deadline, the team worked assiduously.", "To meet the deadline, the team worked efficiently."]
pair_17 = ["The student studied for the exam sedulously.", "The student studied for the exam carefully."]

