# 15.4_allocating_accidents_to_roads_REVIEW

In [5]:
# Import the required libraries
import pymongo
import datetime
import collections

import pandas as pd
import scipy.stats

import folium
import uuid

from bson.objectid import ObjectId
from bson.son import SON

In [2]:
# Open a connection to the Mongo server, open the accidents database and name the collections of accidents and labels
client = pymongo.MongoClient('mongodb://localhost:27351/')

db = client.accidents
accidents = db.accidents
labels = db.labels
roads = db.roads

Bugfix: there's an index on the `roads` collection that shouldn't be there. This will remove the index if it exists.

In [3]:
if 'Road_1_loc_2dsphere' in roads.index_information():
    roads.drop_index('Road_1_loc_2dsphere')

In [4]:
# Load the expanded names of keys and human-readable codes into memory
expanded_name = collections.defaultdict(str)
for e in labels.find({'expanded': {"$exists": True}}):
    expanded_name[e['label']] = e['expanded']
    
label_of = collections.defaultdict(str)
for l in labels.find({'codes': {"$exists": True}}):
    for c in l['codes']:
        try:
            label_of[l['label'], int(c)] = l['codes'][c]
        except ValueError: 
            label_of[l['label'], c] = l['codes'][c]

To start work on the _danger_ of each road section, we need to identify which accidents took place on which road.

The simple approach is to associate each accident with the nearest road census point. However, this has two problems. One is that the accident may be on a minor road that isn't associated with the traffic census point. The other is that the road on which the accident occurs may not be the one with the closest road census point (for instance, if the accident occurs near a junction).

Therefore, we need to annotate each road census document with the accidents that occur on that road section. To avoid too much duplication, we'll just annotate the `road` document with the relevant accident indexes. If we want more details about the accidents, we can look them up directly from the `accidents` collection.

## Activity 1

Convert the road information in the accident document into the standard name as used in the `road` document.

We are starting with information lice this

In [6]:
pd.DataFrame(list(accidents.find({},['1st_Road_Class', '1st_Road_Number', 
                                     '2nd_Road_Class', '2nd_Road_Number'], 
                                 limit=5)),
             columns=['1st_Road_Class', '1st_Road_Number', 
                      '2nd_Road_Class', '2nd_Road_Number'])


Unnamed: 0,1st_Road_Class,1st_Road_Number,2nd_Road_Class,2nd_Road_Number
0,3,308,5,0
1,4,412,6,0
2,3,3220,6,0
3,5,0,6,0
4,4,325,6,0


... and this ...


In [7]:
sorted((c, label_of[l, c]) for l, c in label_of if l == '2nd_Road_Class')

[(0, 'Not at junction or within 20 metres'),
 (1, 'Motorway'),
 (2, 'A(M)'),
 (3, 'A'),
 (4, 'B'),
 (5, 'C'),
 (6, 'Unclassified')]

and you have to convert it to the 'typical' road numbers like this:

In [8]:
pd.DataFrame(list(roads.find({}, ['Road', 'RCat'], limit=5)),
             columns=['Road', 'RCat'])

Unnamed: 0,Road,RCat
0,A5,TR
1,A5,TR
2,A40,TR
3,A40,TR
4,A40,TR


In [9]:
sorted((c, label_of[l, c]) for l, c in label_of if l == 'RCat')

[('BR', 'Rural B road'),
 ('BU', 'Urban B road'),
 ('CR', 'Rural C road'),
 ('CU', 'Urban C road'),
 ('PM', 'Principal motorway'),
 ('PR', 'Rural principal road'),
 ('PU', 'Urban principal road'),
 ('TM', 'Trunk motorway'),
 ('TR', 'Rural trunk road'),
 ('TU', 'Urban trunk road'),
 ('UR', 'Rural U road'),
 ('UU', 'Urban U road')]

In [10]:
def normalise_road_name(road_class, road_number):
    if road_number == 0:
        return None
    if road_class == 1:
        return 'M' + str(road_number)
    elif road_class == 2:
        if road_number == 1:
            return 'A1(M)'
        else:
            return 'A' + str(road_number) + 'M'
    elif road_class >= 3 and road_class <= 5:
        return label_of['2nd_Road_Class', road_class] + str(road_number)
    else:
        return None


In [12]:
# test it works
[(normalise_road_name(a['1st_Road_Class'], a['1st_Road_Number']),
  normalise_road_name(a['2nd_Road_Class'], a['2nd_Road_Number']))
 for a in accidents.find(limit=20)]

[('A308', None),
 ('B412', None),
 ('A3220', None),
 (None, None),
 ('B325', None),
 ('A308', 'A3220'),
 ('A3216', 'A4'),
 ('B450', None),
 (None, None),
 (None, None),
 ('A315', None),
 ('A315', 'A3220'),
 ('A402', 'A4206'),
 ('B415', 'B450'),
 ('B450', 'B412'),
 ('A3217', None),
 (None, None),
 ('A3220', 'A3220'),
 ('B316', None),
 ('A4204', None)]

### Activity 2
When given an accident, find the closest road census document for the same road. Return `None` if there isn't one for this road.

Note that the relevant road could be for either the accident's first or second recorded road.

Also note that you'll need to add

```
from bson.son import SON
```

and use the direct MongoDB command `geoNear` to find the nearest road segment to a given point. 

```
road_result = db.command(SON([('geoNear', 'roads'), 
                ('near', <the given point>),
                ('spherical', True),
                ('query', <document for additional features on road segment>),
                ('limit', 1)]))
```
For example, the cell below will pick an arbitrary accident, then find the motorway segment nearest to it.

In [13]:
from bson.son import SON

a = accidents.find_one()
print(a['Accident_Index'], a['loc'])

nearest_road_result = db.command(SON([('geoNear', 'roads'), 
                ('near', a['loc']),                      
                ('spherical', True),
                ('query', {'RCat': 'TM'}),
                ('limit', 1)]))

print(nearest_road_result['results'][0]['obj']['CP'], 
      nearest_road_result['results'][0]['obj']['ONS LA Name'],
      nearest_road_result['results'][0]['obj']['Road'])

201201BS70001 {'coordinates': [-0.169101, 51.493429], 'type': 'Point'}
47892 Hounslow M4


In [15]:
accidents.find_one()

{'1st_Road_Class': 3,
 '1st_Road_Number': 308,
 '2nd_Road_Class': 5,
 '2nd_Road_Number': 0,
 'Accident_Index': '201201BS70001',
 'Accident_Severity': 3,
 'Carriageway_Hazards': 0,
 'Casualties': [{'Age_Band_of_Casualty': 7,
   'Bus_or_Coach_Passenger': 0,
   'Car_Passenger': 0,
   'Casualty_Class': 1,
   'Casualty_Home_Area_Type': 1,
   'Casualty_Reference': 1,
   'Casualty_Severity': 3,
   'Casualty_Type': 1,
   'Pedestrian_Location': 0,
   'Pedestrian_Movement': 0,
   'Pedestrian_Road_Maintenance_Worker': 0,
   'Sex_of_Casualty': 1,
   'Vehicle_Reference': 2}],
 'Date': '19/01/2012',
 'Datetime': datetime.datetime(2012, 1, 19, 20, 35),
 'Day_of_Week': 5,
 'Did_Police_Officer_Attend_Scene_of_Accident': 1,
 'Junction_Control': 2,
 'Junction_Detail': 6,
 'LSOA_of_Accident_Location': 'E01002821',
 'Latitude': 51.493429,
 'Light_Conditions': 4,
 'Local_Authority_(District)': 12,
 'Local_Authority_(Highway)': 'E09000020',
 'Location_Easting_OSGR': 527200,
 'Location_Northing_OSGR': 178760,