-
Notifications
You must be signed in to change notification settings - Fork 0
/
owlbot.py
171 lines (132 loc) · 4.74 KB
/
owlbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
import json
import requests
import cltk
from cltk.stem.latin.declension import CollatinusDecliner
from cltk.tag.pos import POSTag
import nltk
from nltk.tokenize.moses import MosesDetokenizer
import schedule
import time
import tweepy
# add cltk models to path
rel_path = os.path.join('/app/cltk_data/latin/model/latin_models_cltk/')
path = os.path.expanduser(rel_path)
# get Twitter access keys from env
consumer_key = os.environ.get('consumer_key')
consumer_secret = os.environ.get('consumer_secret')
access_token = os.environ.get('access_token')
access_token_secret = os.environ.get('access_token_secret')
# to ensure that the sentence ends with !?.
def repunctuate(str):
punctuation = ",-:;' "
translator = str.maketrans({key: "." for key in punctuation})
new_str = str.translate(translator)
return new_str
# catchall function to hit urls and return text from json data
def get_data(url):
r = requests.get(url)
# parse the json response
doc = r.json()
raw_text = doc['text'][0]
return raw_text
# make an owl sentence in English
def get_owl():
raw_text = get_data("http://api.aeneid.eu/sortes")
# break the sentence down into words, then parts of speech
text = nltk.word_tokenize(raw_text)
tokenized_list = nltk.pos_tag(text)
# find the first noun so we can replace it, and keep track of where it's at
replacement_index = ''
for index, token in enumerate(tokenized_list):
if token[1][:1] == 'N':
replacement_index = index
break
# replace that word with 'owl' using the index
if replacement_index == 0:
#capitalize if first word
text[replacement_index] = 'Owl'
else:
text[replacement_index] = 'owl'
# put the sentence back together again
detokenizer = MosesDetokenizer()
raw_sentence = detokenizer.detokenize(text, return_str=True)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
# make an owl sentence in Latin
def get_latin_owl():
raw_text = get_data("http://api.aeneid.eu/sortes?version=latin")
# break down sentence into a list of words with syntax info
tagger = POSTag('latin')
tagged_sentence = tagger.tag_ngram_123_backoff(raw_text)
# create an array showing the various forms of bubo with parts of speech
decliner = CollatinusDecliner()
declined_owl = decliner.decline("bubo")
#create variables to collect the word to be replaced, and the replacement form
replacement_str = ''
commutandum = ''
# loop through the list of tagged words, with
for item in tagged_sentence:
# some tags return None, hand with try/except
try:
#get the tag info
syntax_str = item[1]
# check the part of speech, number, and case
if syntax_str[0] == 'N' and syntax_str[2] and syntax_str[7]:
commutandum = item[0]
number = syntax_str[2]
case = syntax_str[7]
# find the matching case and number for bubo
for owl in declined_owl:
owl_syntax = owl[1]
if owl_syntax[2].capitalize() == number and owl_syntax[7].capitalize() == case:
replacement_str = owl[0]
print (owl[0])
# stop after the first one, so we only replace one word
break
else:
pass
except:
pass
#replace the word
raw_sentence = raw_text.replace(commutandum, replacement_str)
sentence = clean_sentence(raw_sentence)
print (sentence)
return sentence
def clean_sentence(sentence):
#replace non-final punctuation and add owls
punct = sentence[-1:]
punct = repunctuate(punct)
sentence = sentence[:-1]
sentence = sentence + punct
sentence = emojify(sentence)
return sentence
def emojify(str):
return u'\U0001F989' + str + u'\U0001F989'
def make_tweet(str):
# get Twitter access
try:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
except:
print("Error: Authentication failed")
# tweet the string
api.update_status(str)
# #trial sentences for testing
# get_owl()
# get_latin_owl()
#define tweet functions as jobs for scheduler
def tweet_latin_owl():
make_tweet(get_latin_owl())
def tweet_owl():
make_tweet(get_owl())
# schedule time - 6/7 hrs = CST, also it uses 24-hr time
schedule.every().day.at("13:30").do(tweet_owl)
schedule.every().day.at("18:00").do(tweet_latin_owl)
schedule.every().day.at("22:30").do(tweet_owl)
schedule.every().day.at("06:00").do(tweet_latin_owl)
while True:
schedule.run_pending()
time.sleep(1)