In [2]:
from collections import namedtuple
import re


class ParseError(Exception):
    pass
    

def load_constellation_data():
    """ 
    Loads data from constellationship.fab, a list of hipparcos catalog constellations.
    
    This data is taken from the wackymorningdj.users.sourceforge.net/ha_rey_stellarium.zip
    
    Warning:
        The license for this data is not yet known.
    """
    with open('../data/constellationship.fab', 'r') as file:
        return file.read()

    
def get_constellation_data():
    """ 
    Returns a list of Constellation namedtuples. 
    
    Each Constellation namedtuple has an `abbreviation` attrib and a `line_segments` 
    attrib, which is an iterable of ints. 
    
    The constellation data is formatted as:
    <Three letter abreviation> <number of segments> <star coordinate> where each field is 
    white space seperated and star coordinates are ints. Constellation lines appear to be
    defined by start and end coordinates, eg:
    
    Sge 3 98337 97365 97365 96837 97365 96757
    """
    
    constellation_data = load_constellation_data().split('\n')
    cleaned_data = []
    for constellation_string in constellation_data:
        cleaned_string = constellation_string.strip('\r')
        try:
            constellation_tuple = parse_constellation_string(cleaned_string)
            cleaned_data.append(constellation_tuple)
        except ParseError:
            print 'Failed to parse: {const_string}'.format(const_string=constellation_string)
    return cleaned_data
    
def parse_constellation_string(constellation_string):
    """ 
    Returns a namedtuple of the constellation's abbrevation and line segments. 
    
    Raises:
        ParseError
    """
    constellation = namedtuple('Constellation', ['abreviation', 'line_segments'])
    
    pattern = re.compile('(?P<abbrev>[A-Za-z]+) (?P<segs>[\d]+)')
    match = pattern.search(constellation_string)
    try:
        abbreviation, number_of_segments = match.group('abbrev'), match.group('segs')
        line_segments = constellation_string.split(' ')[2:] 
        if int(number_of_segments) != len(line_segments)/2:
            print("Segs don't match for: {}".format(constellation_string))
        
        return constellation(abbreviation, line_segments)    
    except AttributeError:
        raise ParseError("Failed to parse: {const_string}".format(const_string=constellation_string))
    
get_constellation_data()



Failed to parse: 80
Segs don't match for: CrA 6 92989 93174 93174 93825 93825 94114 94114 94160 94160 94005 94005 93542 93542 92953
Failed to parse: 


[Constellation(abreviation='GSq', line_segments=['113963', '113881', '113881', '677', '677', '1067', '1067', '113963']),
 Constellation(abreviation='Peg', line_segments=['113963', '113881', '113881', '1067', '1067', '113963', '113963', '112440', '112440', '112748', '112748', '112158', '112158', '109410', '114520', '113963', '113963', '114144', '113963', '112447', '112447', '112029', '112029', '109427', '112029', '107315', '112447', '109176', '109176', '107348', '112440', '109176', '109176', '107354', '107354', '105502']),
 Constellation(abreviation='Cyg', line_segments=['95853', '97165', '97165', '100453', '100453', '102098', '102098', '104060', '102098', '99675', '99675', '96441', '96441', '95853', '100453', '102488', '102488', '104732', '104732', '103413', '103413', '102098', '103413', '105102', '105102', '104887', '100453', '98110', '98110', '96683', '96683', '95947']),
 Constellation(abreviation='Lyr', line_segments=['91262', '91926', '91926', '92862', '92862', '94481', '94481', '9