In [1]:
import io
from collections import Counter

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

infile = 'kanttext/purereason.txt'

ngram_vectorizer = CountVectorizer(analyzer='word', 
                                    ngram_range=(1, 1), 
                                    min_df=1,
                                    stop_words='english')

with io.open(infile, 'r', encoding='utf8') as fin:
    X = ngram_vectorizer.fit_transform(fin)
    vocab = ngram_vectorizer.get_feature_names()
    counts = X.sum(axis=0).A1
    freq_distribution = Counter(dict(zip(vocab, counts)))
    purereason = freq_distribution.most_common(100)


[('reason', 1174), ('conception', 916), ('experience', 819), ('time', 720), ('object', 717), ('conceptions', 699), ('pure', 697), ('understanding', 682), ('intuition', 628), ('empirical', 600), ('possible', 593), ('phenomena', 584), ('priori', 580), ('existence', 574), ('given', 516), ('cognition', 510), ('transcendental', 509), ('unity', 495), ('objects', 484), ('nature', 474), ('merely', 451), ('things', 425), ('necessary', 395), ('relation', 392), ('conditions', 389), ('space', 379), ('means', 369), ('world', 365), ('consequently', 353), ('principles', 352), ('principle', 337), ('idea', 332), ('condition', 331), ('possibility', 325), ('series', 325), ('mere', 324), ('general', 320), ('case', 309), ('cause', 307), ('thing', 297), ('does', 292), ('synthesis', 291), ('thought', 291), ('form', 290), ('subject', 277), ('according', 275), ('far', 264), ('sense', 259), ('proposition', 239), ('sensuous', 239), ('reality', 232), ('representation', 231), ('say', 231), ('certain', 228), ('laws

In [None]:
def 

In [2]:
infile = 'kanttext/practicalreason.txt'

ngram_vectorizer = CountVectorizer(analyzer='word', 
                                    ngram_range=(1, 1), 
                                    min_df=1,
                                    stop_words='english')

with io.open(infile, 'r', encoding='utf8') as fin:
    X = ngram_vectorizer.fit_transform(fin)
    vocab = ngram_vectorizer.get_feature_names()
    counts = X.sum(axis=0).A1
    freq_distribution = Counter(dict(zip(vocab, counts)))
    practicalreason = freq_distribution.most_common(100)
    print(practicalreason)

[('reason', 609), ('law', 517), ('practical', 476), ('moral', 338), ('pure', 313), ('principle', 234), ('object', 174), ('world', 142), ('freedom', 140), ('principles', 140), ('nature', 138), ('possible', 127), ('determining', 122), ('respect', 122), ('existence', 121), ('good', 121), ('happiness', 118), ('causality', 116), ('rational', 113), ('use', 112), ('man', 110), ('knowledge', 106), ('priori', 106), ('merely', 104), ('feeling', 100), ('duty', 99), ('laws', 99), ('case', 98), ('summum', 98), ('time', 98), ('bonum', 97), ('paragraph', 95), ('action', 94), ('objects', 94), ('gutenberg', 93), ('speculative', 93), ('condition', 92), ('far', 88), ('order', 87), ('project', 87), ('means', 86), ('necessary', 86), ('objective', 86), ('notion', 85), ('possibility', 85), ('make', 84), ('morality', 82), ('sense', 82), ('consequently', 81), ('sensible', 81), ('subject', 78), ('faculty', 77), ('determination', 76), ('does', 76), ('self', 76), ('concept', 75), ('conception', 75), ('things', 75

In [3]:
infile = 'kanttext/metaphysicsofmorals.txt'

ngram_vectorizer = CountVectorizer(analyzer='word', 
                                    ngram_range=(1, 1), 
                                    min_df=1,
                                    stop_words='english')

with io.open(infile, 'r', encoding='utf8') as fin:
    X = ngram_vectorizer.fit_transform(fin)
    vocab = ngram_vectorizer.get_feature_names()
    counts = X.sum(axis=0).A1
    freq_distribution = Counter(dict(zip(vocab, counts)))
    morals = freq_distribution.most_common(100)

In [4]:
total_words = set(purereason + practicalreason + morals)
len(total_words)
print(total_words)



{('project', 87), ('universal', 59), ('object', 717), ('principles', 352), ('called', 57), ('objective', 190), ('philosophy', 49), ('value', 23), ('sphere', 213), ('conditions', 59), ('law', 174), ('just', 34), ('idea', 332), ('substance', 157), ('moral', 338), ('action', 94), ('say', 31), ('inclinations', 28), ('simply', 22), ('place', 169), ('shall', 149), ('representations', 188), ('does', 76), ('beings', 59), ('pure', 313), ('nature', 138), ('moral', 90), ('ends', 61), ('means', 86), ('priori', 37), ('nature', 474), ('summum', 98), ('determined', 56), ('form', 68), ('subject', 277), ('parts', 138), ('does', 55), ('different', 187), ('object', 174), ('maxim', 51), ('happiness', 44), ('consciousness', 67), ('necessary', 86), ('means', 61), ('internal', 197), ('things', 22), ('quantity', 145), ('ebook', 34), ('respect', 122), ('conception', 916), ('knowledge', 32), ('intuition', 51), ('regard', 203), ('common', 30), ('merely', 104), ('cause', 62), ('necessarily', 25), ('human', 35), (

In [13]:
def format_tuple(input_tuple):
    dict_entry = { 'text': input_tuple[0],
                    'value': str(input_tuple[1])}
    return dict_entry

In [14]:
purereason_dictlist = [format_tuple(item) for item in purereason ]
practicalreason_dictlist = [format_tuple(item) for item in practicalreason ]
morals_dictlist = [format_tuple(item) for item in morals ]

In [15]:
import json
json_file = { 'Kant: Critique of Pure Reason': purereason_dictlist,
              'Kant: Critique of Practical Reason':practicalreason_dictlist,
              'Kant: Grounding for a Metaphysics of Morals':morals_dictlist}

with open('kant.json', 'w', encoding='UTF-8') as f:
    json.dump(json_file,f,ensure_ascii=False, indent=4)
