Permalink
Browse files

Fix usfirst legacy datafeed. Fixes #983

  • Loading branch information...
1 parent 884c0c0 commit 63090e3caefee1d900b4efc1d3cbafc41df67abc @fangeugene fangeugene committed May 8, 2014
Showing with 38 additions and 4 deletions.
  1. +3 −1 datafeeds/datafeed_base.py
  2. +35 −3 datafeeds/datafeed_usfirst_legacy.py
@@ -8,7 +8,7 @@ class DatafeedBase(object):
Provides structure for fetching and parsing pages from websites.
Other Datafeeds inherit from here.
"""
- def parse(self, url, parser):
+ def parse(self, url, parser, usfirst_session_key=None):
headers = {
'Cache-Control': 'no-cache, max-age=10',
'Pragma': 'no-cache',
@@ -17,6 +17,8 @@ def parse(self, url, parser):
# FIRST is now checking the 'Referer' header for the string 'usfirst.org'.
# See https://github.com/patfair/frclinks/commit/051bf91d23ca0242dad5b1e471f78468173f597f
headers['Referer'] = 'usfirst.org'
+ if usfirst_session_key is not None:
+ headers['Cookie'] = usfirst_session_key
result = urlfetch.fetch(url,
headers=headers,
@@ -18,18 +18,50 @@
class DatafeedUsfirstLegacy(DatafeedUsfirst):
+ SESSION_KEY_GENERATING_PATTERN = "https://my.usfirst.org/myarea/index.lasso?event_type=FRC&year=%s" # % (year)
+
EVENT_DETAILS_URL_PATTERN = "https://my.usfirst.org/myarea/index.lasso?page=event_details&eid=%s"
EVENT_TEAMS_URL_PATTERN = "https://my.usfirst.org/myarea/index.lasso?page=event_teamlist&results_size=250&eid=%s"
TEAM_DETAILS_URL_PATTERN = "https://my.usfirst.org/myarea/index.lasso?page=team_details&tpid=%s"
def __init__(self, *args, **kw):
+ self._session_key = {}
super(DatafeedUsfirstLegacy, self).__init__(*args, **kw)
+ def getSessionKey(self, year):
+ """
+ Grab a page from FIRST so we can get a session key out the response header.
+ """
+ year = int(year)
+
+ if self._session_key.get(year, False):
+ return self._session_key.get(year)
+
+ memcache_key = "usfirst_session_key_%s" % year
+ session_key = memcache.get(memcache_key)
+ if session_key is not None:
+ self._session_key[year] = session_key
+ return self._session_key.get(year)
+
+ result = urlfetch.fetch(self.SESSION_KEY_GENERATING_PATTERN % year, headers={'Referer': 'usfirst.org'}, deadline=60)
+ if result.status_code == 200:
+ session_key = result.headers.get('Set-Cookie', None)
+ if session_key is not None:
+ if tba_config.CONFIG["memcache"]:
+ memcache.set(memcache_key, session_key, 60 * 5)
+ self._session_key[year] = session_key
+ return self._session_key[year]
+ logging.error('Unable to get USFIRST session key for %s.' % year)
+ return None
+ else:
+ logging.error('HTTP code %s. Unable to retreive url: %s' %
+ (result.status_code, self.SESSION_KEY_GENERATING_URL))
+
def getEventDetails(self, year, first_eid):
if type(year) is not int:
raise TypeError("year must be an integer")
url = self.EVENT_DETAILS_URL_PATTERN % (first_eid)
- event, _ = self.parse(url, UsfirstLegacyEventDetailsParser)
+ event, _ = self.parse(url, UsfirstLegacyEventDetailsParser, self.getSessionKey(year))
if event is None:
return None
@@ -57,7 +89,7 @@ def getEventTeams(self, year, first_eid):
if type(year) is not int:
raise TypeError("year must be an integer")
url = self.EVENT_TEAMS_URL_PATTERN % (first_eid)
- teams, _ = self.parse(url, UsfirstLegacyEventTeamsParser)
+ teams, _ = self.parse(url, UsfirstLegacyEventTeamsParser, self.getSessionKey(year))
if teams is None:
return None
@@ -73,7 +105,7 @@ def getTeamDetails(self, team):
if hasattr(team, 'first_tpid'):
if team.first_tpid:
url = self.TEAM_DETAILS_URL_PATTERN % (team.first_tpid)
- team_dict, _ = self.parse(url, UsfirstLegacyTeamDetailsParser)
+ team_dict, _ = self.parse(url, UsfirstLegacyTeamDetailsParser, self.getSessionKey(team.first_tpid_year))
if team_dict is not None and "team_number" in team_dict:
return Team(

0 comments on commit 63090e3

Please sign in to comment.