# Exploration of the data

- Useful variables
- Visualising the data

In [67]:
import pandas as pd
import matplotlib.pyplot as plt
import folium

In [68]:
stations_holland = pd.read_csv('data/StationsHolland.csv')
stations_national = pd.read_csv('data/StationsNationaal.csv')
connections_holland = pd.read_csv('data/ConnectiesHolland.csv')
connections_national = pd.read_csv('data/ConnectiesNationaal.csv')

print("Part 1: North- and South-Holland")
print(f"Stations in North- and South-Holland: {len(stations_holland)}")
print(f"Connections in North- and South-Holland: {len(connections_holland)}")
print(f"Mean distance of connections in North- and South-Holland: {connections_holland['distance'].mean()}")
print("________________________________________________________________________")
print("Part 2: the Netherlands")
print(f"Stations in the Netherlands: {len(stations_national)}")
print(f"Connections in the Netherlands: {len(connections_national)}")
print(f"Mean distance of connections in North- and South-Holland: {connections_national['distance'].mean()}")

Part 1: North- and South-Holland
Stations in North- and South-Holland: 22
Connections in North- and South-Holland: 28
Mean distance of connections in North- and South-Holland: 13.607142857142858
________________________________________________________________________
Part 2: the Netherlands
Stations in the Netherlands: 61
Connections in the Netherlands: 89
Mean distance of connections in North- and South-Holland: 17.426966292134832


## Where are the stations located?

For plotting the maps, we use the Folium library. <br>
[Folium documentation](https://python-visualization.github.io/folium/)

To actually view the maps (because GitHub won't let you), please visit [this link](https://nbviewer.org/github/maiklarooij/Trainspotters/blob/main/exploration.ipynb).

In [69]:
# Create a nice interactive map using folium, starting point is Amsterdam Central station
map_of_holland = folium.Map(location=[52.37888718,4.900277615], tiles='cartodbpositron', zoom_start=9)

# Add stations as markers
for id, station in stations_holland.iterrows():
    folium.Marker(
        location = [station['x'], station['y']],
        popup = f"Station {station['station']}. Location: ({station['x']}, {station['y']})",
        icon = folium.Icon(color = 'darkblue', icon = 'train', icon_color = '#FEBE00', prefix = 'fa')).add_to(map_of_holland)

# Adding a nice title to the map
title_holland = '''
             <h3 align="center" style="font-size:20px"><b>Stations located in North- and South-Holland</b></h3>
             '''
map_of_holland.get_root().html.add_child(folium.Element(title_holland))

map_of_holland

In [70]:
# Create a nice interactive map using folium, starting point is middle point of the Netherlands
map_of_netherlands = folium.Map(location=[52.243333, 5.634167], tiles='cartodbpositron', zoom_start=8)

# Add stations as markers
for id, station in stations_national.iterrows():
    folium.Marker(
        location = [station['x'], station['y']],
        popup = f"Station {station['station']}. Location: ({station['x']}, {station['y']})",
        icon = folium.Icon(color = 'darkblue', icon = 'train', icon_color = '#FEBE00', prefix = 'fa')).add_to(map_of_netherlands)

# Adding a nice title to the map
title_nl = '''
             <h3 align="center" style="font-size:20px"><b>Stations located in the Netherlands</b></h3>
             '''
map_of_netherlands.get_root().html.add_child(folium.Element(title_nl))

map_of_netherlands