In [80]:
%%writefile busdists.py

from mrjob.job import MRJob
from math import radians, cos, sin, asin, sqrt

def haversine(p1, p2):  # p1, p2 coordinate points of form (latitude, longitude).
    # Convert decimal degrees to radians (also ensures values are floats).
    pr1 = (radians(float(p1[0])), radians(float(p1[1])))
    pr2 = (radians(float(p2[0])), radians(float(p2[1])))

    # Haversine formula
    dlat = pr2[0] - pr1[0]   # Difference of latitudes.
    dlon = pr2[1] - pr1[1]   # Difference of longitudes.
    a = sin(dlat/2)**2 + cos(pr1[0]) * cos(pr2[0]) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers.
    return c * r

class BusDistances(MRJob):
    
    def mapper(self, _, line):
        import os
        file_name = os.getenv('mapreduce_map_input_file')
        key = file_name[-6:-4]
        stats = line.split(",")
        try:
            bus = stats[9]
            lon = float(stats[4])
            lat = float(stats[5])
            time = stats[0]
            yield key, (bus, time, (lat,lon))
        except:
            pass
       
            
    def combiner(self, key, value):
        buses = {}
        for v in value:
            if v[0] not in buses.keys():
                buses[v[0]] = [(v[1], v[2])]
            else:
                buses[v[0]].append((v[1], v[2]))
        yield key, buses
    
    def reducer(self, key, value):
        for buses in value:
            dist = 0
            for bus in buses.values():
                for i in range(len(bus)-1):
                    dist += haversine(bus[i][1],bus[i+1][1])
        yield key, dist
        

if __name__ == '__main__':
    BusDistances.run()

Overwriting busdists.py


In [81]:
%run busdists.py bus250315/journeys_2015-03-25-*.csv

No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configuration
No configs found; falling back on auto-configu

job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620\output
job output is in C:\Users\harju\AppData\

"00"	3270.1954674275
"01"	4394.8628428908
"03"	4169.965705799
"04"	8361.9378498356
"05"	50154.3303925624
"06"	112480.9419672559
"07"	16393.6568169924
"08"	1164.3450837225
"09"	63192.1021711036
"10"	46636.2472907115
"11"	62229.6196268488
"12"	165270.832944228
"13"	121871.5587314992
"14"	2369.8816920621
"15"	122710.3179147632
"16"	48.8529954643
"17"	62740.3424322282
"18"	44408.8383602345
"19"	43663.9899317591
"20"	81484.5700752441
"21"	65626.5183210537
"22"	27759.8157207253
"23"	95238.7428033509


Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing temp directory C:\Users\harju\AppData\Local\Temp\busdists.harju.20191128.172201.534620...
Removing t