Skip to content

Commit

Permalink
Start implementing tests for update scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
jpatokal committed Nov 30, 2018
1 parent 588ec61 commit f6cdf22
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 199 deletions.
163 changes: 0 additions & 163 deletions data/update-airports.py

This file was deleted.

70 changes: 34 additions & 36 deletions data/update-airlines.py → data/update_airlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
# virtualenv env
# source env/bin/activate
# curl https://bootstrap.pypa.io/get-pip.py | python
# pip install mysql-connector
# pip install mysql-connector unittest

import argparse
import codecs
import mysql.connector
import sys
import urllib2
from collections import defaultdict

# Needed to allow piping UTF-8 (srsly Python wtf)
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
Expand All @@ -23,8 +25,8 @@ def __init__(self):
self.cursor = self.read_cnx.cursor(dictionary=True)
self.write_cnx = self.connect(host, pw)
self.write_cursor = self.write_cnx.cursor(dictionary=True)
self.of_iata = {}
self.of_icao = {}
self.of_iata = defaultdict(list)
self.of_icao = defaultdict(list)

def connect(self, host, pw):
cnx = mysql.connector.connect(user='openflights', database=self.DB, host=host, password=pw)
Expand All @@ -36,20 +38,21 @@ def load_all_airlines(self):
for row in dbc.cursor:
if row['iata'] == "":
row['iata'] = None
self.of_iata[row['iata']] = row
self.of_icao[row['icao']] = row

def find_by_iata(self, code):
if code in self.of_iata:
return self.of_iata[code]
else:
return None

def find_by_icao(self, code):
if code in self.of_icao:
return self.of_icao[code]
else:
return None
self.of_iata[row['iata']].append(row)
self.of_icao[row['icao']].append(row)

def match_airline(self, match):
icao, iata, callsign, country = match['icao'], match['iata'], match['callsign'], match['country']
if icao and icao in self.of_icao:
for airline in self.of_icao[icao]:
if (iata and airline['iata'] == iata) or airline['callsign'] == callsign or airline['country'] == country:
return airline
if iata and iata in self.of_iata:
for airline in self.of_iata[iata]:
if airline['callsign'] == callsign or airline['country'] == country:
print "IATA MATCH %s, %s" % (airline, match)
return airline
return None

def safe_execute(self, sql, params, live_run):
if live_run:
Expand Down Expand Up @@ -85,17 +88,16 @@ def safe_execute(self, sql, params, live_run):

def parse_airline(block):
iata, icao, name, callsign, country = [clean(x) for x in block[0:5]]
airlines[icao] = {'iata': iata, 'name': name, 'callsign': callsign, 'country': country}
airlines.append({'icao': icao, 'iata': iata, 'name': name, 'callsign': callsign, 'country': country})

def clean(x):
# | [[Foo|Bar]] -> Bar
x = unicode(x.split('|')[-1].translate(None, "[|]'"), 'utf-8')
# | ''[[Foo|Bar]]'' -> Bar
x = unicode(x.split('|')[-1].translate(None, "[|]").replace("''", ""), 'utf-8')
if x == '':
return None
return x

airlines = {}

airlines = []
airline_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=List_of_airline_codes_(%s)&prop=revisions&rvprop=content&format=php'
response = urllib2.urlopen(airline_url % 'A').read()
block = []
Expand All @@ -113,21 +115,17 @@ def clean(x):
count = 0
updated = 0
added = 0
for icao, airline in airlines.iteritems():
of_airline = dbc.find_by_icao(icao)
for airline in airlines:
of_airline = dbc.match_airline(airline)
if of_airline:
# If ICAO matches and IATA *or* callsign are the same, the two are a match
if of_airline['iata'] == airline['iata'] or of_airline['callsign'] == airline['callsign']:
if of_airline['name'] != airline['name']:
print 'MATCH %s: update name %s to %s' % (icao, of_airline['name'], airline['name'])
updated += 1
if airline['callsign'] != None and of_airline['callsign'] != airline['callsign']:
print 'MATCH %s: update callsign %s to %s' % (icao, of_airline['callsign'], airline['callsign'])
updated += 1
else:
print 'MISMATCH %s: %s, %s' % (icao, of_airline, airline)
else:
print 'NEW', icao, airline
if of_airline['name'] != airline['name']:
print 'MATCH %s/%s: update name %s to %s' % (of_airline['iata'], of_airline['icao'], of_airline['name'], airline['name'])
updated += 1
if airline['callsign'] != None and of_airline['callsign'] != airline['callsign']:
print 'MATCH %s/%s: update callsign %s to %s' % (of_airline['iata'], of_airline['icao'], of_airline['callsign'], airline['callsign'])
updated += 1
else:
print 'NEW', airline
added += 1
count += 1

Expand Down
Loading

0 comments on commit f6cdf22

Please sign in to comment.