-
Notifications
You must be signed in to change notification settings - Fork 0
/
update-bot.py
executable file
·142 lines (121 loc) · 4.81 KB
/
update-bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/home/siddharth/venv/bin/python
#########################################################################
#
# Script to automate tweeting of blog update notifications to
# the timeline of @SidDarthious. Uses GNU Make to create
# formatted tweets of new blog/updated posts, which include title,
# URL, excerpt and hashtags.
#
# Siddharth Maddali
# siddharth.mv@protonmail.com
# May 2020
#
#########################################################################
import twitter # for auto-tweeting
import subprocess # to access pass through python
import re
from bs4 import BeautifulSoup # to extract Twitter handles from HTML anchors
# Get a searchable string for the blog post permalink
def getPermalinkSubstring( mystr ):
link_substrings = mystr.split( '/' )[-1].replace( '.md', '' ).split( '-' )
search_string = '/'.join(
[
'/'.join( link_substrings[:3] ),
'-'.join( link_substrings[3:] )
]
)
return search_string
# Given a searchable string, mine the xml object for the
# post title, excerpt and permalink that will go into the
# final tweet.
def getTweetContent( xmlobj, pl_substr ):
entry = [
[ this[ 'title' ], this[ 'description' ], this[ 'link' ] ]
for this in xmlobj[ 'rss' ][ 'channel' ][ 'item' ]
if pl_substr in this[ 'link' ]
][0]
entry[1] = cleanUpDescription( entry[1] )
return tuple( entry )
# Extracts blog excerpt from description, which contains
# other HTML tags like <script>, puts in Twitter handles
# extracted from HTML comments
def cleanUpDescription( entry ):
excerpt = entry.split( '\n' )[-1].strip( '<p>' ).strip( '</p>' )
soup = BeautifulSoup( excerpt )
for this in soup.body.find_all( name='a' ):
excerpt = excerpt.replace(
'<a name="%s">%s</a>'%( this.attrs[ 'name' ], this.text ),
this.attrs[ 'name' ].replace( '#', '@' )
)
return excerpt
# Extracts tags from the input markdown file, and perses
# them as hashtags to be tweeted.
def extractTags( mdfile ):
with open( mdfile ) as mdf:
for line in mdf:
if re.match( 'tags: ', line ):
break
tags = [
hashtag[1:] if '\\@' in hashtag else '#%s'%( hashtag.replace( ' ', '' ) )
for hashtag in line.split( ': ' )[-1].strip().strip( '[' ).strip( ']' ).split( ', ' )
]
# Jekyll doesn't like blog tags beginning with @, which I want to use to tag
# people on Twitter, so I need to prefix each with \@ and then teach this script
# to ignore the leading \
# TODO: create new HTML element to use in future .md files, that reads something like this:
# ... in collaboration with <twitter-person id="ComixLab">the COMIX lab</twitter-person> in Marseille...
# This script should be able to extract "ComixLab" and replace the above instance with:
# ... in collaboration with @ComixLab in Marseille...
# in the final tweet. This is a much better way of tagging people.
return tags
# Creates the update bot and authenticates.
def getAuthenticatedBot(
root='social/twitter/siddarthious/update-bot',
labels=[ 'access-token', 'access-token-secret', 'api-key', 'api-secret-key' ]
):
tokens = [
subprocess.Popen(
[ 'pass', '%s/%s'%( root, thislabel ) ],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
).communicate()[0].strip().decode( 'utf-8' )
for thislabel in labels
]
mybot = twitter.Twitter(
auth=twitter.OAuth(
token=tokens[0],
token_secret=tokens[1],
consumer_key=tokens[2],
consumer_secret=tokens[3]
)
)
tokens = [] # flushing all tokens
return mybot
if __name__=="__main__":
import sys # command line args
import xmltodict # to parse rss feed
import re # searching for tags
import datetime # for logging and makefile tracking purposes
rss = '/home/smaddali/local/siddharth-maddali.github.io/_site/feed.xml'
root = 'https://siddharth-maddali.github.io'
fakeroot = 'http://localhost:4000/'
mdfile = sys.argv[1]
logfile = sys.argv[2]
tags = extractTags( mdfile )
with open( rss ) as fid:
feed = xmltodict.parse( fid.read() )
pl_substr = getPermalinkSubstring( mdfile )
title, desc, link = getTweetContent( feed, pl_substr )
link = link.replace( fakeroot, root )
tweet = 'Blog update: %s \n%s \n%s '%(
desc,
link,
' '.join( tags )
)
timestamp = datetime.datetime.now().strftime( '%Y-%m-%d, %H:%M:%S' )
with open( logfile, 'a' ) as lf:
lf.write( 'Tweeted on: %s\n'%timestamp )
lf.write( tweet )
lf.write( '\n' )
bot = getAuthenticatedBot()
bot.statuses.update( status=tweet )