import os
import json
import requests
import cltk
from cltk.stem.latin.declension import CollatinusDecliner
from cltk.tag.pos import POSTag
import nltk
from nltk.tokenize.moses import MosesDetokenizer
import schedule
import time
import tweepy
# add cltk models to path
rel_path = os.path.join('/app/cltk_data/latin/model/latin_models_cltk/')
path = os.path.expanduser(rel_path)
# get Twitter access keys from env
consumer_key = os.environ.get('consumer_key')
consumer_secret = os.environ.get('consumer_secret')
access_token = os.environ.get('access_token')
access_token_secret = os.environ.get('access_token_secret')
# to ensure that the sentence ends with !?.
def repunctuate(str):
punctuation = ",-:;' "
translator = str.maketrans({key: "." for key in punctuation})
new_str = str.translate(translator)
return new_str
# catchall function to hit urls and return text from json data
def get_data(url):
r = requests.get(url)
# parse the json response
doc = r.json()
raw_text = doc['text'][0]
return raw_text
# make an owl sentence in English
def get_owl():
raw_text = get_data("")
# break the sentence down into words, then parts of speech
text = nltk.word_tokenize(raw_text)
tokenized_list = nltk.pos_tag(text)
# find the first noun so we can replace it, and keep track of where it's at
replacement_index = ''
for index, token in enumerate(tokenized_list):
if token[1][:1] == 'N':
replacement_index = index
# replace that word with 'owl' using the index
if replacement_index == 0:
#capitalize if first word
text[replacement_index] = 'Owl'
text[replacement_index] = 'owl'
# put the sentence back together again
detokenizer = MosesDetokenizer()
raw_sentence = detokenizer.detokenize(text, return_str=True)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
# make an owl sentence in Latin
def get_latin_owl():
raw_text = get_data("")
# break down sentence into a list of words with syntax info
tagger = POSTag('latin')
tagged_sentence = tagger.tag_ngram_123_backoff(raw_text)
# create an array showing the various forms of bubo with parts of speech
decliner = CollatinusDecliner()
declined_owl = decliner.decline("bubo")
#create variables to collect the word to be replaced, and the replacement form
replacement_str = ''
commutandum = ''
# loop through the list of tagged words, with
for item in tagged_sentence:
# some tags return None, hand with try/except
#get the tag info
syntax_str = item[1]
# check the part of speech, number, and case
if syntax_str[0] == 'N' and syntax_str[2] and syntax_str[7]:
commutandum = item[0]
number = syntax_str[2]
case = syntax_str[7]
# find the matching case and number for bubo
for owl in declined_owl:
owl_syntax = owl[1]
if owl_syntax[2].capitalize() == number and owl_syntax[7].capitalize() == case:
replacement_str = owl[0]
print (owl[0])
# stop after the first one, so we only replace one word
#replace the word
raw_sentence = raw_text.replace(commutandum, replacement_str)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
def clean_sentence(sentence):
#replace non-final punctuation and add owls
punct = sentence[-1:]
punct = repunctuate(punct)
sentence = sentence[:-1]
sentence = sentence + punct
sentence = emojify(sentence)
return sentence
def emojify(str):
return u'\U0001F989' + str + u'\U0001F989'
def make_tweet(str):
# get Twitter access
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
print("Error: Authentication failed")
# tweet the string
# #trial sentences for testing
# get_owl()
# get_latin_owl()
#define tweet functions as jobs for scheduler
def tweet_latin_owl():
def tweet_owl():
# schedule time - 6/7 hrs = CST, also it uses 24-hr time
while True: