In [17]:
import io
# import numpy as np
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer

def get_most_common_words(textStream):

    #init vectorizer
    ngram_vectors = CountVectorizer(analyzer='word', 
                                    ngram_range=(1, 1), 
                                    min_df=1,
                                    stop_words='english')
    
    #make textStream into an iterable for fit_transform
    textStream = [textStream]
    
    #build vectors
    X = ngram_vectors.fit_transform(textStream)

    #build ngrab vocabulary
    vocab = ngram_vectors.get_feature_names()

    #get ngram counts
    counts = X.sum(axis=0).A1

    #get frequency distribution of all ngrams and their respective counts
    freq_distribution = Counter(dict(zip(vocab, counts)))

    #get 100 most common ngrams, with its respective count
    most_common_words = freq_distribution.most_common(100)

    return most_common_words

def read_file(text_file):
    file = open(text_file,mode='r',encoding='UTF-8')
    file_contents = file.read()
    file.close()
    return file_contents

def format_tuple(input_tuple):
    dict_entry = { 'text': input_tuple[0],
                    'value': str(input_tuple[1])}
    return dict_entry



In [23]:
#get vocab and counts for Kant texts

purereason_text = read_file('texts/kant/purereason.txt')
purereason_ngrams = get_most_common_words(purereason_text)

practicalreason_text = read_file('texts/kant/practicalreason.txt')
practicalreason_ngrams = get_most_common_words(practicalreason_text)

judgment_text = read_file('texts/kant/judgment.txt')
judgment_ngrams = get_most_common_words(judgment_text)

morals_text = read_file('texts/kant/metaphysicsofmorals.txt')
morals_ngrams = get_most_common_words(morals_text)

prolegomena_text = read_file('texts/kant/prolegomena.txt')
prolegomena_ngrams = get_most_common_words(prolegomena_text)

#add all kant texts into a single textstream
kant_text = purereason_text + practicalreason_text + judgment_text + morals_text + prolegomena_text
kant_ngrams = get_most_common_words(kant_text)

#build dictionaries
purereason_dictlist = [format_tuple(item) for item in purereason_ngrams ]
practicalreason_dictlist = [format_tuple(item) for item in practicalreason_ngrams ]
morals_dictlist = [format_tuple(item) for item in morals_ngrams ]
prolegomena_dictlist = [format_tuple(item) for item in prolegomena_ngrams ]
judgment_dictlist = [format_tuple(item) for item in judgment_ngrams ]
kant_dictlist = [format_tuple(item) for item in kant_ngrams ]



In [32]:
#get vocab and counts for hume texts

enquiry_text = read_file('texts/hume/enquiryhumanunderstanding.txt')
enquiry_ngrams = get_most_common_words(enquiry_text)

humannature_text = read_file('texts/hume/humannature.txt')
humannature_ngrams = get_most_common_words(humannature_text)

naturalreligion_text = read_file('texts/hume/naturalreligion.txt')
naturalreligion_ngrams = get_most_common_words(naturalreligion_text)

humemorals_text = read_file('texts/hume/principlesofmorals.txt')
humemorals_ngrams = get_most_common_words(humemorals_text)

#add all kant texts into a single textstream
hume_text = enquiry_text + humannature_text + naturalreligion_text + humemorals_text
hume_ngrams = get_most_common_words(hume_text)

#build dictionaries
enquiry_dictlist = [format_tuple(item) for item in enquiry_ngrams ]
humannature_dictlist = [format_tuple(item) for item in humannature_ngrams ]
naturalreligion_dictlist = [format_tuple(item) for item in naturalreligion_ngrams ]
humemorals_dictlist = [format_tuple(item) for item in humemorals_ngrams ]
hume_dictlist = [format_tuple(item) for item in hume_ngrams ]



In [34]:
import json
json_file = { 'Hume: All': hume_dictlist,
              'Hume: An Enquiry Concerning Human Understanding' : enquiry_dictlist,
              'Hume: An Enquiry Concerning the Principles of Morals':humemorals_dictlist,
              'Hume: Dialogues Concerning Natural Religion': naturalreligion_dictlist,
              'Hume: Treatise of Human Nature':humannature_dictlist, 
              'Kant: All': kant_dictlist,
              'Kant: Critique of Judgment' : judgment_dictlist,
              'Kant: Critique of Pure Reason': purereason_dictlist,
              'Kant: Critique of Practical Reason':practicalreason_dictlist,
              'Kant: Grounding for a Metaphysics of Morals':morals_dictlist, 
              'Kant: Prolegomena for any Future Metaphyics': prolegomena_dictlist}

with open('static/wordcloud.json', 'w', encoding='UTF-8') as f:
    json.dump(json_file,f,ensure_ascii=False, indent=4)
