In [5]:
from zipfile import ZipFile
from io import TextIOWrapper
from collections import defaultdict
from utilities.constants import *

#loc = RAW_DATA_PATH.joinpath('GTFS_KRK_A.zip')
loc = RAW_DATA_PATH.joinpath('GTFS_KRK_T.zip')

class Stop:
    def __init__(self, stop_id, name, lat, lon):
        self.stop_id = stop_id
        self.name = name
        self.lat = lat
        self.lon = lon
        
        
def find_stops_by_name(name):
    ret = set()
    for stop_id, stop in stop_list.items():
        if stop.name == name:
            ret.add(stop_id)
    return ret
        
    
class Route:
    def __init__(self, route_id, name):
        self.route_id = route_id
        self.name = name
        
        self.trip_ids = set()
        
    def add_trip(self, trip_id):
        self.trip_ids.add(trip_id)
        
    def get_trip_ids_beg_in(self, some_stop_list):
        ret = set()
        for trip_id in self.trip_ids:
            trip = trip_list[trip_id]
            if trip.beg_stop_id() in some_stop_list:
                ret.add(trip_id)
        return ret
            
    def get_trip_ids_end_in(self, some_stop_list):
        ret = set()
        for trip_id in self.trip_ids:
            trip = trip_list[trip_id]
            if trip.end_stop_id() in some_stop_list:
                ret.add(trip_id)
        return ret
            
    def get_trip_ids_beg_or_end_in(self, some_stop_list):
        return self.get_trip_ids_beg_in(some_stop_list) | self.get_trip_ids_end_in(some_stop_list)
        

def find_routes_by_name(name):
    ret = set()
    for route_id, route in route_list.items():
        if route.name == name:
            ret.add(route_id)
    return ret

def find_route_by_name(name):
    return route_list[find_routes_by_name(name).pop()]
    
class Trip:
    def __init__(self, trip_id, route_id, service_id, trip_headsign, direction_id, block_id):
        self.trip_id = trip_id
        self.route_id = route_id
        self.service_id = service_id
        self.trip_headsign = trip_headsign
        self.direction_id = direction_id
        self.block_id = block_id
        
        self.stop_times = []
    
    def add_stop(self, time, stop_seq, stop_id):
        self.stop_times.append((time, stop_seq, stop_id))
        
    def finalise(self):
        self.stop_times.sort()
        route_list[self.route_id].add_trip(self.trip_id)
        
    def nth_stop_id(self, n):
        return self.stop_times[n][2]
    
    def beg_stop_id(self):
        return self.stop_times[0][2]
    
    def end_stop_id(self):
        return self.stop_times[-1][2]
        
    def __str__(self):
        msg = f'{route_list[self.route_id].name}: {self.trip_id}\n'
        msg += f'{self.trip_headsign} (dir: {self.direction_id}), {services_list[self.service_id]}\n\n';
        for time, seq, stop_id in self.stop_times:
            msg += f'{time} {stop_list[stop_id].name}({seq})\n'
        msg += '\n';
        return msg;

In [6]:
#load .zip file
with ZipFile(loc) as my_zip:
    #load stops
    stop_list = dict()
    stop_desc = dict()
    with TextIOWrapper(my_zip.open('stops.txt')) as file:
        s = file.readline().strip('\n').split(',')
        for i in range(len(s)):
            stop_desc[s[i]] = i

        s = file.readline().strip('\n')
        while s:
            s = s.split(',')
            stop_id = s[stop_desc['stop_id']]
            name = s[stop_desc['stop_name']].strip('\"')
            lat = float(s[stop_desc['stop_lat']])
            lon = float(s[stop_desc['stop_lon']])

            stop_list[stop_id] = Stop(stop_id, name, lat, lon)
            s = file.readline().strip('\n')

    #load routes
    route_list = dict()
    route_desc = dict()
    with TextIOWrapper(my_zip.open('routes.txt')) as file:
        s = file.readline().strip('\n').split(',')
        for i in range(len(s)):
            route_desc[s[i]] = i

        s = file.readline().strip('\n')
        while s:
            s = s.split(',')
            route_id = s[route_desc['route_id']]
            name = s[route_desc['route_short_name']].strip('\"')

            route_list[route_id] = Route(route_id, name)
            s = file.readline().strip('\n')

    #load trips
    trip_list = dict()
    trip_desc = dict()
    with TextIOWrapper(my_zip.open('trips.txt')) as file:
        s = file.readline().strip('\n').split(',')
        for i in range(len(s)):
            trip_desc[s[i]] = i

        s = file.readline().strip('\n')
        while s:
            s = s.split(',')
            trip_id = s[trip_desc['trip_id']]
            route_id = s[trip_desc['route_id']]
            service_id = s[trip_desc['service_id']]
            trip_headsign = s[trip_desc['trip_headsign']]
            direction_id = int(s[trip_desc['direction_id']])
            block_id = s[trip_desc['block_id']]

            trip_list[trip_id] = Trip(trip_id, route_id, service_id, trip_headsign, direction_id, block_id)
            s = file.readline().strip('\n')

    #load stop times
    stop_times_desc = dict()
    with TextIOWrapper(my_zip.open('stop_times.txt')) as file:
        s = file.readline().strip('\n').split(',')
        for i in range(len(s)):
            stop_times_desc[s[i]] = i

        s = file.readline().strip('\n')
        while s:
            s = s.split(',')
            trip_id = s[stop_times_desc['trip_id']]
            arrival_time = s[stop_times_desc['arrival_time']]
            departure_time = s[stop_times_desc['departure_time']]
            stop_sequence = s[stop_times_desc['stop_sequence']]
            stop_id = s[stop_times_desc['stop_id']]

            if not arrival_time == departure_time:
                #print('arrival_time != departure_time : ' + str(s))
                raise Exception('arrival_time != departure_time : ' + str(s))

            trip_list[trip_id].add_stop(arrival_time, stop_sequence, stop_id)
            s = file.readline().strip('\n')
            
    #load service types
    services_list = dict()
    sevices_desc = dict()
    with TextIOWrapper(my_zip.open('calendar.txt')) as file:
        s = file.readline().strip('\n').split(',')
        for i in range(len(s)):
            sevices_desc[s[i]] = i

        s = file.readline().strip('\n')
        while s:
            s = s.split(',')
            service_id = s[sevices_desc['service_id']]
            s = s[sevices_desc['monday']:sevices_desc['sunday'] + 1]
            if s == '1 1 1 1 1 1 1'.split(' '):
                val = 'Week'
            elif s == '1 1 1 1 1 0 0'.split(' '):
                val = 'Workweek'
            elif s == '0 0 0 0 0 1 1'.split(' '):
                val = 'Weekend'
            elif s == '0 0 0 0 0 1 0'.split(' '):
                val = 'Saturday'
            elif s == '0 0 0 0 0 0 1'.split(' '):
                val = 'Sunday'
            elif s == '0 0 0 0 0 0 0'.split(' '):
                val = 'None'
            else: val = ' '.join(s)
            
            services_list[service_id] = val
            s = file.readline().strip('\n')
            
#finalise loading trips
for trip_id, trip in trip_list.items():
    trip.finalise()

In [7]:
for route_id, route in route_list.items():
    print(f'line: {route.name}\n')
    #for each line we want to have a list of first and last stops (with multiplicity)
    beg = defaultdict(lambda: 0) 
    norm = defaultdict(lambda: 0)
    end = defaultdict(lambda: 0)
    for trip_id in route.trip_ids:
        trip = trip_list[trip_id]
        
        beg[trip.beg_stop_id()] += 1
        end[trip.end_stop_id()] += 1
        
        for n in range(1, len(trip.stop_times) - 1):
            norm[trip.nth_stop_id(n)] += 1
        
    #print all stops which are both first and last stops of some route
    for stop_id in beg.keys() & end.keys():
        print(f'{stop_list[stop_id].name}: beg:{beg[stop_id]}, end:{end[stop_id]}, norm:{norm[stop_id]}')
    print()
    #only first
    for stop_id in beg.keys() - end.keys():
        print(f'{stop_list[stop_id].name}: beg:{beg[stop_id]}, norm:{norm[stop_id]}')
    print()
    #only last
    for stop_id in end.keys() - beg.keys():
        print(f'{stop_list[stop_id].name}: end:{end[stop_id]}, norm:{norm[stop_id]}')
    print('\n')

line: 1

Salwator: beg:236, end:242, norm:0
Wzgórza Krzesławickie: beg:231, end:253, norm:0

Salwator: beg:11, norm:236
Jarzębiny: beg:22, norm:231
Brożka (nż): beg:11, norm:0
Elektromontaż (nż): beg:17, norm:0

PH: end:22, norm:0
PT: end:11, norm:0


line: 19

Dworzec Towarowy: beg:177, end:183, norm:0
Borek Fałęcki: beg:177, end:189, norm:0

Politechnika: beg:6, norm:177
Elektromontaż (nż): beg:6, norm:0
Łagiewniki: beg:12, norm:0
Borek Fałęcki I: beg:12, norm:177

PH: end:6, norm:0
PT: end:12, norm:0


line: 22

Brożka (nż): beg:6, end:3, norm:177
Borek Fałęcki: beg:177, end:184, norm:0
Wzgórza Krzesławickie: beg:160, end:160, norm:0
Kombinat: beg:20, end:20, norm:160

Zajezdnia Nowa Huta: beg:20, norm:160
Zajezdnia Nowa Huta: beg:20, norm:160
Łagiewniki: beg:4, norm:180
Borek Fałęcki I: beg:7, norm:177

Kombinat: end:20, norm:160
PT: end:7, norm:0
PH: end:20, norm:0


line: 3

Krowodrza Górka: beg:217, end:226, norm:0
Nowy Bieżanów P+R: beg:229, end:246, norm:0
Dworzec Towarowy: be

In [8]:
#first of these does not actually exist (T)
r18 = find_route_by_name('18')
pt = find_stops_by_name('PT')
strange_trips = r18.get_trip_ids_end_in(pt)
for trip_id in strange_trips:
    trip = trip_list[trip_id]
    print(trip)

18: block_167_trip_20_service_2
Zajezdnia Podgórze (dir: 0), Saturday

22:42:00 Chmieleniec(2)
22:43:00 Kampus UJ(3)
22:44:00 Ruczaj(4)
22:45:00 Norymberska(5)
22:47:00 Grota-Roweckiego(6)
22:49:00 Lipińskiego(7)
22:50:00 Borsucza(8)
22:51:00 Brożka (nż)(9)
22:52:00 Łagiewniki(10)
22:55:00 PT(11)


18: block_64_trip_18_service_3
Zajezdnia Podgórze (dir: 0), Sunday

24:27:00 Chmieleniec(2)
24:28:00 Kampus UJ(3)
24:29:00 Ruczaj(4)
24:30:00 Norymberska(5)
24:32:00 Grota-Roweckiego(6)
24:34:00 Lipińskiego(7)
24:35:00 Borsucza(8)
24:36:00 Brożka (nż)(9)
24:37:00 Łagiewniki(10)
24:40:00 PT(11)


18: block_332_trip_22_service_2
Zajezdnia Podgórze (dir: 0), Saturday

24:07:00 Chmieleniec(2)
24:08:00 Kampus UJ(3)
24:09:00 Ruczaj(4)
24:10:00 Norymberska(5)
24:12:00 Grota-Roweckiego(6)
24:14:00 Lipińskiego(7)
24:15:00 Borsucza(8)
24:16:00 Brożka (nż)(9)
24:17:00 Łagiewniki(10)
24:20:00 PT(11)


18: block_166_trip_19_service_2
Zajezdnia Podgórze (dir: 1), Saturday

21:05:00 Bratysławska(2)
21:06:0

In [4]:
#at least some of the trips below actually exist (A)
r133 = find_route_by_name('133')
prokocim = find_stops_by_name('Prokocim Szpital')
strange_trips = r133.get_trip_ids_beg_in(prokocim)
for trip_id in strange_trips:
    trip = trip_list[trip_id]
    print(trip)

133: block_402_trip_2_service_2
Bieżanów Potrzask (dir: 1), Saturday

04:58:00 Prokocim Szpital(1)
04:59:00 Teligi(2)
05:00:00 Nowy Prokocim(3)
05:02:00 Ćwiklińskiej(4)
05:03:00 Nowy Bieżanów P+R(5)
05:04:00 Mała Góra (nż)(6)
05:05:00 Cmentarz Bieżanów(7)
05:06:00 Ks.Łaczka (nż)(8)
05:08:00 Bieżanów Drożdżownia SKA(9)
05:09:00 Drożdżowa(10)
05:10:00 Ślósarczyka(11)
05:11:00 Bieżanów Kaim(12)
05:12:00 Bieżanów Potrzask(13)


133: block_910_trip_2_service_1
Bieżanów Potrzask (dir: 1), Workweek

04:46:00 Prokocim Szpital(1)
04:47:00 Teligi(2)
04:48:00 Nowy Prokocim(3)
04:50:00 Ćwiklińskiej(4)
04:51:00 Nowy Bieżanów P+R(5)
04:52:00 Mała Góra (nż)(6)
04:53:00 Cmentarz Bieżanów(7)
04:54:00 Ks.Łaczka (nż)(8)
04:56:00 Bieżanów Drożdżownia SKA(9)
04:57:00 Drożdżowa(10)
04:58:00 Ślósarczyka(11)
04:59:00 Bieżanów Kaim(12)
05:00:00 Bieżanów Potrzask(13)


133: block_147_trip_2_service_3
Bieżanów Potrzask (dir: 1), Sunday

04:58:00 Prokocim Szpital(1)
04:59:00 Teligi(2)
05:00:00 Nowy Prokocim(3)
05

In [3]:
#at least some of the trips below actually exist (A)
r204 = find_route_by_name('204')
biezankowska = find_stops_by_name('Bieżanowska')
strange_trips = r204.get_trip_ids_beg_in(biezankowska)
for trip_id in strange_trips:
    trip = trip_list[trip_id]
    print(trip)

204: block_121_trip_2_service_2
Wieliczka Miasto (dir: 0), Saturday

04:47:00 Bieżanowska(1)
04:48:00 Wlotowa(2)
04:50:00 Prokocim Szpital(3)
04:52:00 Szpital Uniwersytecki / Instytut Pediatr(4)
04:54:00 Jerzmanowskiego(5)
04:55:00 Węzeł Wielicki (nż)(6)
04:57:00 Wielicka Granica Miasta(7)
04:59:00 Wieliczka Stacja Paliw(8)
05:01:00 Wieliczka Klasztor(9)
05:03:00 Wieliczka Cmentarz(10)
05:05:00 Wieliczka Miasto(11)


204: block_933_trip_2_service_1
Wieliczka Miasto (dir: 0), Workweek

05:18:00 Bieżanowska(1)
05:19:00 Wlotowa(2)
05:21:00 Prokocim Szpital(3)
05:23:00 Szpital Uniwersytecki / Instytut Pediatr(4)
05:25:00 Jerzmanowskiego(5)
05:26:00 Węzeł Wielicki (nż)(6)
05:28:00 Wielicka Granica Miasta(7)
05:30:00 Wieliczka Stacja Paliw(8)
05:32:00 Wieliczka Klasztor(9)
05:34:00 Wieliczka Cmentarz(10)
05:36:00 Wieliczka Miasto(11)


204: block_857_trip_2_service_2
Czerwone Maki P+R (dir: 1), Saturday

05:12:00 Bieżanowska(1)
05:14:00 Dauna(2)
05:16:00 Piaski Nowe(3)
05:17:00 Nowosądecka(4