Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
172 lines (132 sloc) 4.74 KB
import os
import json
import requests
import cltk
from cltk.stem.latin.declension import CollatinusDecliner
from cltk.tag.pos import POSTag
import nltk
from nltk.tokenize.moses import MosesDetokenizer
import schedule
import time
import tweepy
# add cltk models to path
rel_path = os.path.join('/app/cltk_data/latin/model/latin_models_cltk/')
path = os.path.expanduser(rel_path)
# get Twitter access keys from env
consumer_key = os.environ.get('consumer_key')
consumer_secret = os.environ.get('consumer_secret')
access_token = os.environ.get('access_token')
access_token_secret = os.environ.get('access_token_secret')
# to ensure that the sentence ends with !?.
def repunctuate(str):
punctuation = ",-:;' "
translator = str.maketrans({key: "." for key in punctuation})
new_str = str.translate(translator)
return new_str
# catchall function to hit urls and return text from json data
def get_data(url):
r = requests.get(url)
# parse the json response
doc = r.json()
raw_text = doc['text'][0]
return raw_text
# make an owl sentence in English
def get_owl():
raw_text = get_data("http://api.aeneid.eu/sortes")
# break the sentence down into words, then parts of speech
text = nltk.word_tokenize(raw_text)
tokenized_list = nltk.pos_tag(text)
# find the first noun so we can replace it, and keep track of where it's at
replacement_index = ''
for index, token in enumerate(tokenized_list):
if token[1][:1] == 'N':
replacement_index = index
break
# replace that word with 'owl' using the index
if replacement_index == 0:
#capitalize if first word
text[replacement_index] = 'Owl'
else:
text[replacement_index] = 'owl'
# put the sentence back together again
detokenizer = MosesDetokenizer()
raw_sentence = detokenizer.detokenize(text, return_str=True)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
# make an owl sentence in Latin
def get_latin_owl():
raw_text = get_data("http://api.aeneid.eu/sortes?version=latin")
# break down sentence into a list of words with syntax info
tagger = POSTag('latin')
tagged_sentence = tagger.tag_ngram_123_backoff(raw_text)
# create an array showing the various forms of bubo with parts of speech
decliner = CollatinusDecliner()
declined_owl = decliner.decline("bubo")
#create variables to collect the word to be replaced, and the replacement form
replacement_str = ''
commutandum = ''
# loop through the list of tagged words, with
for item in tagged_sentence:
# some tags return None, hand with try/except
try:
#get the tag info
syntax_str = item[1]
# check the part of speech, number, and case
if syntax_str[0] == 'N' and syntax_str[2] and syntax_str[7]:
commutandum = item[0]
number = syntax_str[2]
case = syntax_str[7]
# find the matching case and number for bubo
for owl in declined_owl:
owl_syntax = owl[1]
if owl_syntax[2].capitalize() == number and owl_syntax[7].capitalize() == case:
replacement_str = owl[0]
print (owl[0])
# stop after the first one, so we only replace one word
break
else:
pass
except:
pass
#replace the word
raw_sentence = raw_text.replace(commutandum, replacement_str)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
def clean_sentence(sentence):
#replace non-final punctuation and add owls
punct = sentence[-1:]
punct = repunctuate(punct)
sentence = sentence[:-1]
sentence = sentence + punct
sentence = emojify(sentence)
return sentence
def emojify(str):
return u'\U0001F989' + str + u'\U0001F989'
def make_tweet(str):
# get Twitter access
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
except:
print("Error: Authentication failed")
# tweet the string
api.update_status(str)
# #trial sentences for testing
# get_owl()
# get_latin_owl()
#define tweet functions as jobs for scheduler
def tweet_latin_owl():
make_tweet(get_latin_owl())
def tweet_owl():
make_tweet(get_owl())
# schedule time - 6/7 hrs = CST, also it uses 24-hr time
schedule.every().day.at("13:30").do(tweet_owl)
schedule.every().day.at("18:00").do(tweet_latin_owl)
schedule.every().day.at("22:30").do(tweet_owl)
schedule.every().day.at("06:00").do(tweet_latin_owl)
while True:
schedule.run_pending()
time.sleep(1)