## Strætó EDA

### Having a look at the files in "gtfs.zip" found at https://opendata.straeto.is/data/gtfs/.

In [5]:
import pandas as pd
import os

data_path = 'data'

files = os.listdir(data_path)

for file in files:
    print(file)

agency.txt
calendar_dates.txt
stop_times.txt
shapes.txt
trips.txt
stops.txt
routes.txt


In [8]:
dataframes = {}

for file in files:
    if file.endswith('.txt'):
        filename = os.path.splitext(file)[0]
        dataframes[filename] = pd.read_csv(os.path.join(data_path, file), sep= ',')

In [15]:
for name, df in list(dataframes.items()):
    print(f"Showing head and shape for {name}.txt:\n")
    display(df.head())
    display(df.shape)

Showing head and shape for agency.txt:



Unnamed: 0,agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url
0,1,Strætó BS,http://straeto.is/,Atlantic/Reykjavik,is,(+354)5402700,https://www.straeto.is/verslun
1,2,Reykjanesbær,http://straeto.is/,Atlantic/Reykjavik,is,(+354)5402700,http://straeto.is/verslun
2,3,Akureyri,http://straeto.is/,Atlantic/Reykjavik,is,(+354)5402700,http://straeto.is/verslun


(3, 7)

Showing head and shape for calendar_dates.txt:



Unnamed: 0,service_id,date,exception_type
0,20220522_MTWTF--_1,20230628,1
1,20220522_MTWTF--_3,20230628,1
2,20220904_--W----_90,20230628,1
3,20220904_--WT---_10,20230628,1
4,20220904_M-W-F--_11,20230628,1


(2162, 3)

Showing head and shape for stop_times.txt:



Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type
0,509532,07:25:00,07:25:00,60000001,1,Glerárgata / Eiðsvallagata,0
1,509532,07:26:00,07:26:00,60000069,2,Glerárgata / Eyrarvegur,0
2,509532,07:26:00,07:26:00,60000070,3,Þórunnarstræti / Lögreglustöð,0
3,509532,07:28:00,07:28:00,60000054,4,Þórunnarstræti / Hamarstígur,0
4,509532,07:29:00,07:29:00,60000052,5,Þórunnarstræti / Íþróttahöll,0


(336963, 7)

Showing head and shape for shapes.txt:



Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence
0,103_1006480_1_9,64.143304,-21.914677,1
1,103_1006480_1_9,64.143445,-21.914596,2
2,103_1006480_1_9,64.143513,-21.914589,3
3,103_1006480_1_9,64.143563,-21.914743,4
4,103_1006480_1_9,64.143734,-21.915586,5


(462677, 4)

Showing head and shape for trips.txt:



Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id
0,AF.91,20220904_-----S-_26,516175,,,0,6193_91-A,91_1006320_1_30
1,AF.91,20220904_-----S-_26,516183,,,1,6193_91-A,91_1006320_2_72
2,AF.91,20220904_MTWTF--_27,516176,,,0,6189_91-A,91_1006320_1_30
3,AF.91,20220904_MTWTF--_27,516179,,,0,6189_91-A,91_1006320_1_30
4,AF.91,20220904_MTWTF--_27,516180,,,1,6189_91-A,91_1006320_2_67


(11130, 8)

Showing head and shape for stops.txt:



Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type
0,10000802,Hamraborg,64.111089,-21.908351,0
1,10000804,Sunnuhlíð,64.109082,-21.909596,0
2,10000805,Kópavogslaug,64.109198,-21.917279,0
3,10000806,Kársnesskóli,64.109192,-21.924001,0
4,10000807,Kópavör,64.109299,-21.930667,0


(1320, 5)

Showing head and shape for routes.txt:



Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_type
0,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3
1,AF.92,1,92,Breiðdalsvík <-> Fáskrúðsfjörður,3
2,AF.93,1,93,Seyðisfjörður <-> Egilsstaðir,3
3,AF.94,1,94,Breiðdalsvík <-> Höfn,3
4,AF.95,1,95,Egilsstaðir <-> Borgarfjörður,3


(70, 5)

In [17]:
schema = {}

for name, df in dataframes.items():
    schema[name] = df.dtypes

for name, dtypes in schema.items():
    print(f"Schema for {name}:\n")
    print(dtypes)

Schema for agency:

agency_id           int64
agency_name        object
agency_url         object
agency_timezone    object
agency_lang        object
agency_phone       object
agency_fare_url    object
dtype: object
Schema for calendar_dates:

service_id        object
date               int64
exception_type     int64
dtype: object
Schema for stop_times:

trip_id            int64
arrival_time      object
departure_time    object
stop_id            int64
stop_sequence      int64
stop_headsign     object
pickup_type        int64
dtype: object
Schema for shapes:

shape_id              object
shape_pt_lat         float64
shape_pt_lon         float64
shape_pt_sequence      int64
dtype: object
Schema for trips:

route_id            object
service_id          object
trip_id              int64
trip_headsign       object
trip_short_name    float64
direction_id         int64
block_id            object
shape_id            object
dtype: object
Schema for stops:

stop_id            int64
stop_name  

### By creating a schema, we can see the relationship between tables

![title](data/schema.png)

## Now using the stops table, we can visualize all stops first in the Reykjavik area, then the whole country.

In [19]:
import folium

map_reykjavik = folium.Map(location=[64.1265, -21.8174], zoom_start=12)

stops_df = dataframes["stops"]


for index, row in stops_df.iterrows():
    folium.Marker([row['stop_lat'], row['stop_lon']], popup=row['stop_name']).add_to(map_reykjavik)


map_reykjavik

In [66]:
map_iceland = folium.Map(location=[64.9631, -19.0208], zoom_start=7)


for index, row in stops_df.iterrows():
    folium.Marker([row['stop_lat'], row['stop_lon']], popup=row['stop_name']).add_to(map_iceland)
    
map_iceland

In [50]:
routes_df = dataframes['routes']
trips_df = dataframes['trips']
stop_times_df = dataframes['stop_times']
stops_df = dataframes['stops']


merged_df = pd.merge(routes_df, trips_df, on='route_id')  
merged_df = pd.merge(merged_df, stop_times_df, on='trip_id') 
merged_df = pd.merge(merged_df, stops_df, on='stop_id')

merged_df.sort_values(by=['route_id', 'trip_id', 'stop_sequence'], inplace=True)

grouped = merged_df.groupby(['route_id', 'trip_id']).agg({'stop_lat': list, 'stop_lon': list, 'stop_name': list}).reset_index()

grouped

Unnamed: 0,route_id,trip_id,stop_lat,stop_lon,stop_name
0,AF.91,516175,"[65.2748, 65.2587083533151, 65.0344, 65.0323, ...","[-14.4062, -14.4069628003902, -14.2278, -14.21...","[Egilsstaðir - Flugvöllur, Egilsstaðir - Tjald..."
1,AF.91,516176,"[65.2748, 65.2587083533151, 65.0344, 65.0323, ...","[-14.4062, -14.4069628003902, -14.2278, -14.21...","[Egilsstaðir - Flugvöllur, Egilsstaðir - Tjald..."
2,AF.91,516179,"[65.2748, 65.2587083533151, 65.0344, 65.0323, ...","[-14.4062, -14.4069628003902, -14.2278, -14.21...","[Egilsstaðir - Flugvöllur, Egilsstaðir - Tjald..."
3,AF.91,516180,"[65.1485, 65.1489, 65.1484, 65.1479, 65.1475, ...","[-13.668, -13.6785, -13.69, -13.6982, -13.7069...","[Norðfjörður - Nesbakki, Norðfjörður - VA, Nor..."
4,AF.91,516181,"[65.1485, 65.1489, 65.1484, 65.1479, 65.1475, ...","[-13.668, -13.6785, -13.69, -13.6982, -13.7069...","[Norðfjörður - Nesbakki, Norðfjörður - VA, Nor..."
...,...,...,...,...,...
11125,VL.84,516082,"[65.659872, 65.8242363570201]","[-20.2769, -20.300123841206]","[Blönduós, Skagaströnd]"
11126,VL.84,516083,"[65.659872, 65.8242363570201]","[-20.2769, -20.300123841206]","[Blönduós, Skagaströnd]"
11127,VL.84,516084,"[65.659872, 65.8242363570201]","[-20.2769, -20.300123841206]","[Blönduós, Skagaströnd]"
11128,VL.84,516085,"[65.659872, 65.8242363570201]","[-20.2769, -20.300123841206]","[Blönduós, Skagaströnd]"


In [49]:
merged_df

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_type,service_id,trip_id,trip_headsign,trip_short_name,direction_id,...,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,stop_name,stop_lat,stop_lon,location_type
0,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3,20220904_-----S-_26,516175,,,0,...,10:00:00,10:00:00,76207002,1,,0,Egilsstaðir - Flugvöllur,65.274800,-14.406200,0
16,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3,20220904_-----S-_26,516175,,,0,...,10:02:00,10:02:00,76200005,2,,0,Egilsstaðir - Tjaldsvæðið,65.258708,-14.406963,0
40,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3,20220904_-----S-_26,516175,,,0,...,10:30:00,10:30:00,73007354,3,,0,Reyðarfjörður - Orkuskálinn,65.034400,-14.227800,0
46,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3,20220904_-----S-_26,516175,,,0,...,10:35:00,10:35:00,73007353,4,,0,Reyðarfjörður - Molinn,65.032300,-14.218600,0
64,AF.91,1,91,Egilsstaðir <-> Norðfjörður,3,20220904_-----S-_26,516175,,,0,...,10:37:00,10:37:00,73007558,5,,0,Reyðarfjörður - Barkur,65.031219,-14.206566,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336960,VL.84,1,84,Skagaströnd <-> Blönduós,3,20220904_MTWTF--_16,516084,Skagaströnd,,1,...,13:46:00,13:46:00,56110001,2,,2,Skagaströnd,65.824236,-20.300124,0
336576,VL.84,1,84,Skagaströnd <-> Blönduós,3,20220904_MTWTF--_16,516085,Skagaströnd,,1,...,18:38:00,18:38:00,56040001,1,,2,Blönduós,65.659872,-20.276900,0
336961,VL.84,1,84,Skagaströnd <-> Blönduós,3,20220904_MTWTF--_16,516085,Skagaströnd,,1,...,19:08:00,19:08:00,56110001,2,,2,Skagaströnd,65.824236,-20.300124,0
336577,VL.84,1,84,Skagaströnd <-> Blönduós,3,20220904_MTWTF--_16,516086,Skagaströnd,,1,...,21:47:00,21:47:00,56040001,1,,2,Blönduós,65.659872,-20.276900,0


### Now we can view things like the route that connects Keflavik Airport to Reykjavik.

In [61]:
route55_df = grouped[grouped['route_id'] == 'SN.55']

map_airport = folium.Map(location=[64.05745145190305, -22.204075292344328], zoom_start=11)

for index, row in route55_df.iterrows():
    points = list(zip(row['stop_lat'], row['stop_lon'], row['stop_name']))
    polyline_points = list(zip(row['stop_lat'], row['stop_lon']))
    for lat, lon, name in points:
        folium.Marker([lat, lon], popup=name).add_to(map_airport)
        
    folium.PolyLine(polyline_points, color="blue", weight=2.5, opacity=1).add_to(map_airport)
    
map_airport