From f2545a01fbba47be1306d77d1ee18d567a157ce8 Mon Sep 17 00:00:00 2001 From: "Michael P. Daugherty" Date: Fri, 12 Mar 2010 15:37:23 +0800 Subject: [PATCH] Adding some duplicate status detection and prevention - keep our data clean! --- main.py | 13 ++++++++++--- twitter.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index e8e1bfd..e9f287e 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -import cgi, urllib, os, datetime +import cgi, urllib, os, datetime, logging import simplejson from google.appengine.ext.webapp import template @@ -56,8 +56,15 @@ def get(self): description = statusArray[i+3], concentration = float(statusArray[i+1]), aqi = int(statusArray[i+2])) - newPollutantStatus.put() - self.response.out.write("Updated") + + # Ensure that we aren't accidentally adding a duplicate status to + # the database + if PollutantStatus.gql("WHERE type = :1 AND date = :2", newPollutantStatus.type, newPollutantStatus.date).count() < 1: + newPollutantStatus.put() + else: + self.response.out.write("Duplicate status detected
") + logging.info("Duplicate status detected: "+newPollutantStatus.type+" "+str(newPollutantStatus.date)); + self.response.out.write("Updated with "+status) application = webapp.WSGIApplication( [('/', MainPage), diff --git a/twitter.py b/twitter.py index e599758..a1d1872 100644 --- a/twitter.py +++ b/twitter.py @@ -2,7 +2,7 @@ import simplejson def getStatus(): - return simplejson.load(urllib.urlopen("http://api.twitter.com/users/show/15527964.json"))['status']['text'] +# return simplejson.load(urllib.urlopen("http://api.twitter.com/users/show/15527964.json"))['status']['text'] # The following line is for testing on my localhost, which is in Beijing and therefore Twitter is blocked. -# return "03-05-2010; 13:00; PM2.5; 88.0; 55; Moderate // Ozone; 43.3; 36; Good" + return "03-05-2010; 13:00; PM2.5; 88.0; 55; Moderate // Ozone; 43.3; 36; Good"