Skip to content
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
57 lines (50 sloc) 2.14 KB
#!/usr/bin/env python
import re, numpy as np, codecs, os, nltk, collections, time
from string import digits, punctuation
import generatortools as gt
import sys
def main(input_folder, iter = 1000, probability_cutoff = - 35 , threshold = 10, tweet=True):
iter sets the number of times to run the sentence builder
threshold is fed into the sentbuilder and sets the number of items to consider when choosing from the candidates
for next word from a sorted list.
probability_cutoff sets the lowest probability (log) of a sentence to be published
tweets=[]"aristotlelog_"+time.strftime("%m_%d_%Y")+".txt", "a", "utf-8")
files=[os.path.join(dir, i) for i in os.listdir(dir) if not i.startswith(".")]
#print "We're working with these files: ", files
print "Making the word dicti"
worddicti=gt.dictmaker(files, 1)
print "We have {} unique words and {} words total".format(len(worddicti.keys()), sum(worddicti.values()))
print "Making the bigram dicti"
bigramdicti=gt.dictmaker(files, 2)
print "Making the trigram dicti"
trigramdicti=gt.dictmaker(files, 3)
for item in range(0,int(iter)):
print "Starting the sentence builder"
sent, probs = gt.sentbuilder(".", ".?!", threshold, worddicti, bigramdicti, trigramdicti)
#here we set the probability cutoff (logged values)
#note that we might want to exclude the first, "seed" item
if sum(probs[1:len(probs)]) > int(probability_cutoff):
output=" ".join(sent).lstrip(".")
print "Aristotle says: "
print output, "\n"
print "Probability", sum(probs[1:len(probs)]), probs
if tweet == True:
bot=gt.loginmachine(os.path.expanduser(os.path.join("~/Downloads", "twitter_keys.txt")))
print "tweeting"
print "now sleeping"
print "No good sentence found"
outputfile.write (" ".join(sent)+","+str(sum(probs[1:len(probs)]))+","+" ".join([str(i) for i in probs])+"\n")
print "{} successes".format(success)
if __name__ == "__main__":
You can’t perform that action at this time.