In [None]:
# Import the required libraries

import pymongo
import datetime
import collections

import pandas as pd
import scipy.stats

import folium
import uuid

from bson.objectid import ObjectId
from bson.son import SON

In [None]:
# Open a connection to the Mongo server, open the accidents database and name the collections of accidents and labels
client = pymongo.MongoClient('mongodb://localhost:27351/')

db = client.accidents
accidents = db.accidents
labels = db.labels
roads = db.roads

In [None]:
# Load the expanded names of keys and human-readable codes into memory
expanded_name = collections.defaultdict(str)
for e in labels.find({'expanded': {"$exists": True}}):
    expanded_name[e['label']] = e['expanded']
    
label_of = collections.defaultdict(str)
for l in labels.find({'codes': {"$exists": True}}):
    for c in l['codes']:
        try:
            label_of[l['label'], int(c)] = l['codes'][c]
        except ValueError: 
            label_of[l['label'], c] = l['codes'][c]

# Activity 1

In [None]:
# Solution
def normalise_road_name(road_class, road_number):
    if road_number == 0:
        return None
    if road_class == 1:
        return 'M' + str(road_number)
    elif road_class == 2:
        if road_number == 1:
            return 'A1(M)'
        else:
            return 'A' + str(road_number) + 'M'
    elif road_class >= 3 and road_class <= 5:
        return label_of[('2nd_Road_Class', road_class)] + str(road_number)
    else:
        return None

In [None]:
# Test it works
[(normalise_road_name(a['1st_Road_Class'], a['1st_Road_Number']), 
  normalise_road_name(a['2nd_Road_Class'], a['2nd_Road_Number'])) 
 for a in accidents.find(limit=20)]

# Activity 2

In [None]:
# Solution
def road_for_accident(accident):
    first_road_name = normalise_road_name(accident['1st_Road_Class'], accident['1st_Road_Number'])
    second_road_name = normalise_road_name(accident['2nd_Road_Class'], accident['2nd_Road_Number'])
    if first_road_name:
        first_road_result = db.command(SON([('geoNear', 'roads'), 
                ('near', accident['loc']),
                ('spherical', True),
                ('query', {'Road': first_road_name}),
                ('limit', 1)]))
    else:
        first_road_result = {'results': []}
    if second_road_name:
        second_road_result = db.command(SON([('geoNear', 'roads'), 
                ('near', accident['loc']),
                ('spherical', True),
                ('query', {'Road': second_road_name}),
                ('limit', 1)]))
    else:
        second_road_result = {'results': []}

    all_results = first_road_result['results'] + second_road_result['results']
    sorted_results = sorted(all_results, key=lambda r: r['dis'])
    
    if sorted_results:
        nearest_road = sorted_results[0]['obj']
        return nearest_road
    else:
        return None

In [None]:
# Test it works
road_for_accident(accidents.find_one())

In [None]:
# Test it works a lot
res = []
for a in accidents.find(limit=20):
    rfa = road_for_accident(a)
    if rfa:
        res.append((rfa['Road'], rfa['CP'],
                    normalise_road_name(a['1st_Road_Class'], a['1st_Road_Number']),
                    normalise_road_name(a['2nd_Road_Class'], a['2nd_Road_Number'])))
    else:
        res.append((None, None,
                    normalise_road_name(a['1st_Road_Class'], a['1st_Road_Number']),
                    normalise_road_name(a['2nd_Road_Class'], a['2nd_Road_Number'])))
res

# Activity 3

In [None]:
# Solution

# First, remove all the existing cached accident indexes
roads.update_many({}, {'$unset': {'nearby_accidents': True},
                  '$set': {'nearby_accident_count': 0}})

# Include the accident indexes in the road documents.
#   Also maintain a count of how many accidents there are for each record.
for a in accidents.find():
    rfa = road_for_accident(a)
    if rfa and ('nearby_accidents' not in rfa or a['Accident_Index'] not in rfa['nearby_accidents']):
        roads.update_one({'_id': rfa['_id']}, {'$push': {'nearby_accidents': a['Accident_Index']},
                                           '$inc': {'nearby_accident_count': 1}})

In [None]:
list(roads.find({'CP': 16854}))