# VI: Second Practical Work

**Authors:** Gerard Comas & Marc Franquesa.


## Data Processing
Processing all datasets in this notebook

In [None]:
# Initial imports
import pandas as pd
import numpy as np
import altair as alt
import geopandas as gpd
import warnings
from shapely.geometry import shape, Point

warnings.simplefilter(action="ignore", category=FutureWarning)

### Collisions dataset

In [2]:
# read the dataset
collisions = pd.read_csv("./original-data/collisions.csv")

In [3]:
# select only the values from 2018
collisions = collisions[collisions['CRASH DATETIME'] < '2019-01-01'] 

# select only the columns we need
collisions = collisions[['CRASH DATETIME', 'BOROUGH', 'ORIGINAL VEHICLE']]

In [4]:
collisions['ORIGINAL VEHICLE'].unique()

array(['Sedan', 'Pick-up Truck', 'Station Wagon/Sport Utility Vehicle',
       'Taxi', 'Tractor Truck Diesel', 'Bus', 'Bike', 'Box Truck', 'PK',
       'Motorcycle', 'Van', 'Garbage or Refuse', 'Dump', 'Flat Bed',
       'Carry All', 'SCOOT', 'Convertible', 'Tractor Truck Gasoline',
       'unknown', 'FREIG', 'Chassis Cab', 'Lift Boom', 'Ambulance',
       'Multi-Wheeled Vehicle', 'Tanker', 'AMBUL', 'VAN T', 'MINI',
       'Tow Truck / Wrecker', 'Garba', 'motor', 'Lunch Wagon', 'E-Bik',
       'Ambul', 'FDNY', 'SCHOO', 'Motorbike', 'Flat Rack',
       'Concrete Mixer', 'Open Body', 'Stake or Rack', 'DUMP', 'Comm',
       'Armored Truck', 'FLAT', 'Fire', 'Motorscooter', 'van', 'Sanit',
       'mail', 'RV', 'Tow Truck', 'AMBU', 'GARBA', 'ambul', 'FIRET',
       'TRUCK', 'FIRE', 'Van Camper', 'SELF', 'WORKH', 'FORKL', 'Tract',
       '3-Door', 'bus', 'freig', 'DELIV', 'DELV', 'Refrigerated Van',
       'Beverage Truck', 'trail', 'PICKU', 'Dumps', 'forkl', 'fire',
       'TRK', 'Moped', 'E

In [5]:
vehicles = [
    'Sedan', 'Pick-up Truck', 'Station Wagon/Sport Utility Vehicle',
    'Taxi', 'Tractor Truck Diesel', 'Bus', 'Bike', 'Box Truck', 'PK',
    'Motorcycle', 'Van', 'Garbage or Refuse', 'Dump', 'Flat Bed',
    'Carry All', 'SCOOT', 'Convertible', 'Tractor Truck Gasoline',
    'unknown', 'FREIG', 'Chassis Cab', 'Lift Boom', 'Ambulance',
    'Multi-Wheeled Vehicle', 'Tanker', 'AMBUL', 'VAN T', 'MINI',
    'Tow Truck / Wrecker', 'Garba', 'motor', 'Lunch Wagon', 'E-Bik',
    'Ambul', 'FDNY', 'SCHOO', 'Motorbike', 'Flat Rack',
    'Concrete Mixer', 'Open Body', 'Stake or Rack', 'DUMP', 'Comm',
    'Armored Truck', 'FLAT', 'Fire', 'Motorscooter', 'van', 'Sanit',
    'mail', 'RV', 'Tow Truck', 'AMBU', 'GARBA', 'ambul', 'FIRET',
    'TRUCK', 'FIRE', 'Van Camper', 'SELF', 'WORKH', 'FORKL', 'Tract',
    '3-Door', 'bus', 'freig', 'DELIV', 'DELV', 'Refrigerated Van',
    'Beverage Truck', 'trail', 'PICKU', 'Dumps', 'forkl', 'fire',
    'TRK', 'Moped', 'ELECT', '2- to', 'Pedicab', 'BROOM', 'USPS',
    'TRAIL', 'EBIKE', 'Trail', 'Glass Rack', 'Motorized Home',
    'US POSTAL', 'BLOCK', 'pas', 'COM', 'CONCR', 'CHERR', 'UNK', 'UTV',
    'MOTOR', 'MTA B', 'TRACT', 'NYC', 'MOPED', 'UHAUL', 'School Bus',
    'TRAILER', 'scoot', 'FED E', 'Livestock Rack', 'COMME', 'TRLR',
    'LOADE', 'Minibike', 'rv', 'TOWER', 'Pick', 'SWT', 'SCOOTER',
    'AMB', 'NS AM', 'UNKNO', 'NEW Y', 'TOW T', 'GRAY', 'tract',
    'STREE', 'MAIL', 'e-bik', 'unk', 'PSD', 'box t', 'CRANE', 'garba',
    'Pickup with mounted Camper', 'FRONT', 'Sprin', 'delv', 'POWER',
    'CAMP', 'Enclosed Body - Removable Enclosure', 'RGS', 'GOVER',
    'FORK', 'UTILI', 'POSTO', 'firet', 'WORK', 'R/V C', 'sgws',
    'Cat 9', 'BACKH', 'STAK', 'E-MOT', 'MACK', 'SPC', 'fork', 'OMR',
    'moped', 'semi', 'FORK-', 'Wheel', 'Utili', 'E-BIK', 'fd tr',
    'SWEEP', 'BOX T', 'CASE', 'FD TR', 'Work', 'LIBER', 'fdny', 'COMB',
    'HEAVY', 'DUMPS', 'MTA b', 'Hopper', 'UTIL', 'R/V', 'FOOD',
    'FD tr', 'Spc', 'Bulk Agriculture', 'BED T', 'Pallet', 'comme',
    'UPS T', 'Minicycle', 'PAS', 'BICYC', 'Subn', 'WHEEL', 'truck',
    'Util', 'ACCES', 'e sco', 'BOBCA', 'TANK', 'TRACK', 'utili',
    'DEMA-', 'tow', 'dump', 'Elect', 'deliv', 'Backh', '99999',
    'BULLD', 'seagr', 'schoo', 'CONST', 'self', 'BK', 'Semi', 'Scoot',
    'NYPD'
]

categories = {
    'Taxi': ['Taxi'],
    'Ambulance': ['Ambulance', 'AMBUL', 'Ambul', 'ambul', 'AMB', 'AMBU', 'AMBULANCE'],
    'Fire truck': ['Fire', 'FIRET', 'FIRE', 'FDNY', 'fdny', 'FD tr', 'fd tr', 'firet', 'fire'],
    'Other': []
}

for vehicle in vehicles:
    agregado = False
    for category, keys in categories.items():
        for key in keys:
            if key.lower() in vehicle.lower():
                categories[category].append(vehicle)
                agregado = True
                break
        if agregado:
            break
    if not agregado:
        categories['Other'].append(vehicle)

classified_vehicles = {}

for category, vehicles in categories.items():
    for vehicle in vehicles:
        classified_vehicles[vehicle] = category


collisions["VEHICLE"] = collisions["ORIGINAL VEHICLE"].replace(classified_vehicles)

In [6]:
collisions['VEHICLE'].unique()

array(['Other', 'Taxi', 'Ambulance', 'Fire truck'], dtype=object)

In [7]:
collisions.to_csv("./processed-data/collisions.csv", index=False)

### Weather dataset

In [8]:
weather = pd.read_csv("./original-data/weather2018.csv")

In [9]:
weather = weather[['datetime', 'icon']]
weather['icon'].unique()

array(['rain', 'partly-cloudy-day', 'clear-day', 'cloudy'], dtype=object)

In [10]:
weather.to_csv("./processed-data/weather.csv", index=False)