Permalink
Browse files

Changing twitter.py to look at the most recent two statuses.

Previously, we only looked at one status.  However, BeijingAir occasionally posts two statuses in quick succession, with the final one being a summary of recent air quality, rather than a new update.  If the /tasks/updateStatus cron job encountered that status, it would attempt to parse it and add it to the database, but it would miss the real update for that hour.  This typically happens at noon.

Now that we have de-deplicating code, it's ok to just always get the most recent two statuses.  We don't have to worry about readding the previous one during the hours that only have one update, and we need to make sure we get the second-to-last update in those hours that have the summary tweet.
  • Loading branch information...
1 parent 68b22a3 commit 7a678fac8822c566509d73f7505377299f7e3276 @mpdaugherty committed Mar 12, 2010
Showing with 33 additions and 27 deletions.
  1. +27 −25 main.py
  2. +6 −2 twitter.py
View
52 main.py
@@ -38,32 +38,34 @@ def getCurrentPollutantHTML(self, pollutant):
class UpdateStatusFromTwitter(webapp.RequestHandler):
def get(self):
- status = twitter.getStatus()
- # Example status: "03-05-2010; 13:00; PM2.5; 17.0; 55; Moderate // Ozone; 43.3; 36; Good"
- statusArray = status.split(";")
- statusArray.insert(5, statusArray[5].split("//")[0])
- statusArray[6] = statusArray[6].split("//")[1]
- statusArray = map((lambda str: str.strip()),statusArray)
- year = int(statusArray[0].split("-")[2])
- day = int(statusArray[0].split("-")[1])
- month = int(statusArray[0].split("-")[0])
- hour = int(statusArray[1][0:2])
- updateDateTime = datetime.datetime(year, month, day, hour)
- for i in range(2, len(statusArray), 4):
- newPollutantStatus = PollutantStatus(
- type = statusArray[i],
- date = updateDateTime,
- description = statusArray[i+3],
- concentration = float(statusArray[i+1]),
- aqi = int(statusArray[i+2]))
+ statuses = twitter.getRecentStatuses()
- # Ensure that we aren't accidentally adding a duplicate status to
- # the database
- if PollutantStatus.gql("WHERE type = :1 AND date = :2", newPollutantStatus.type, newPollutantStatus.date).count() < 1:
- newPollutantStatus.put()
- else:
- self.response.out.write("Duplicate status detected<br />")
- logging.info("Duplicate status detected: "+newPollutantStatus.type+" "+str(newPollutantStatus.date));
+ for status in statuses:
+ # Example status: "03-05-2010; 13:00; PM2.5; 17.0; 55; Moderate // Ozone; 43.3; 36; Good"
+ statusArray = status.split(";")
+ statusArray.insert(5, statusArray[5].split("//")[0])
+ statusArray[6] = statusArray[6].split("//")[1]
+ statusArray = map((lambda str: str.strip()),statusArray)
+ year = int(statusArray[0].split("-")[2])
+ day = int(statusArray[0].split("-")[1])
+ month = int(statusArray[0].split("-")[0])
+ hour = int(statusArray[1][0:2])
+ updateDateTime = datetime.datetime(year, month, day, hour)
+ for i in range(2, len(statusArray), 4):
+ newPollutantStatus = PollutantStatus(
+ type = statusArray[i],
+ date = updateDateTime,
+ description = statusArray[i+3],
+ concentration = float(statusArray[i+1]),
+ aqi = int(statusArray[i+2]))
+
+ # Ensure that we aren't accidentally adding a duplicate status to
+ # the database
+ if PollutantStatus.gql("WHERE type = :1 AND date = :2", newPollutantStatus.type, newPollutantStatus.date).count() < 1:
+ newPollutantStatus.put()
+ else:
+ self.response.out.write("Duplicate status detected<br />")
+ logging.info("Duplicate status detected: "+newPollutantStatus.type+" "+str(newPollutantStatus.date));
self.response.out.write("Updated with "+status)
application = webapp.WSGIApplication(
View
@@ -1,8 +1,12 @@
import urllib
import simplejson
-def getStatus():
- return simplejson.load(urllib.urlopen("http://api.twitter.com/users/show/15527964.json"))['status']['text']
+def getRecentStatuses():
+ statuses = simplejson.load(urllib.urlopen("http://api.twitter.com/1/statuses/user_timeline/beijingair.json?count=2"))
+ cleanStatuses = []
+ for status in statuses:
+ cleanStatuses.append(status['text'])
+ return cleanStatuses
# The following line is for testing on my localhost, which is in Beijing and therefore Twitter is blocked.
# return "03-05-2010; 13:00; PM2.5; 88.0; 55; Moderate // Ozone; 43.3; 36; Good"

0 comments on commit 7a678fa

Please sign in to comment.