## NYC Collision Analysis - Part 2

### For each borough, find out distribution of collision scale. (One car involved? Two? Three? or more?) (From 2015 to present)

In [1]:
import numpy as np
import pandas as pd
import os
import datetime

In [2]:
# Read collision data from csv into a dataframe
df = pd.read_csv('Data/vehicle_collisions.csv', index_col = 3) 
df.head()

Unnamed: 0_level_0,UNIQUE KEY,DATE,TIME,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,VEHICLE 1 TYPE,VEHICLE 2 TYPE,VEHICLE 3 TYPE,VEHICLE 4 TYPE,VEHICLE 5 TYPE,VEHICLE 1 FACTOR,VEHICLE 2 FACTOR,VEHICLE 3 FACTOR,VEHICLE 4 FACTOR,VEHICLE 5 FACTOR
BOROUGH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
QUEENS,3146911,1/1/15,0:20,11358.0,40.751847,-73.787862,"(40.7518471, -73.787862)",47 AVENUE,193 STREET,,...,SPORT UTILITY/STATION WAGON,,,,,TRAFFIC CONTROL DISREGARDED,,,,
,3146180,1/1/15,0:20,,40.771289,-73.946693,"(40.7712888, -73.9466928)",,,,...,PASSENGER VEHICLE,,,,,ANIMALS ACTION,,,,
BROOKLYN,3146384,1/1/15,0:21,11205.0,40.689445,-73.955121,"(40.6894449, -73.9551212)",BEDFORD AVENUE,LAFAYETTE AVENUE,,...,PASSENGER VEHICLE,UNKNOWN,,,,FATIGUED/DROWSY,UNSPECIFIED,,,
BROOKLYN,3146013,1/1/15,0:30,11213.0,40.673845,-73.92508,"(40.6738445, -73.9250801)",BUFFALO AVENUE,SAINT MARKS AVENUE,,...,BUS,PASSENGER VEHICLE,,,,LOST CONSCIOUSNESS,,,,
,3146120,1/1/15,0:33,,,,,RICHMOND TERRACE,SOUTH AVENUE,,...,UNKNOWN,PASSENGER VEHICLE,,,,UNSPECIFIED,UNSPECIFIED,,,


In [3]:
# Creating dataframe with relevant columns conaining the Boroughs and the vehicles involved during collisions for each borough.
df = df[['VEHICLE 1 TYPE', 'VEHICLE 2 TYPE', 'VEHICLE 3 TYPE', 'VEHICLE 4 TYPE', 'VEHICLE 5 TYPE']]
df.head()

Unnamed: 0_level_0,VEHICLE 1 TYPE,VEHICLE 2 TYPE,VEHICLE 3 TYPE,VEHICLE 4 TYPE,VEHICLE 5 TYPE
BOROUGH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
QUEENS,SPORT UTILITY/STATION WAGON,,,,
,PASSENGER VEHICLE,,,,
BROOKLYN,PASSENGER VEHICLE,UNKNOWN,,,
BROOKLYN,BUS,PASSENGER VEHICLE,,,
,UNKNOWN,PASSENGER VEHICLE,,,


In [4]:
# Summing up the number of vehicles for each row
df['TOTAL_VEHICLES'] = df.notnull().sum(axis=1)
df.head()

Unnamed: 0_level_0,VEHICLE 1 TYPE,VEHICLE 2 TYPE,VEHICLE 3 TYPE,VEHICLE 4 TYPE,VEHICLE 5 TYPE,TOTAL_VEHICLES
BOROUGH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
QUEENS,SPORT UTILITY/STATION WAGON,,,,,1
,PASSENGER VEHICLE,,,,,1
BROOKLYN,PASSENGER VEHICLE,UNKNOWN,,,,2
BROOKLYN,BUS,PASSENGER VEHICLE,,,,2
,UNKNOWN,PASSENGER VEHICLE,,,,2


In [5]:
df = df.reset_index()[['BOROUGH','TOTAL_VEHICLES']]
df.head()

Unnamed: 0,BOROUGH,TOTAL_VEHICLES
0,QUEENS,1
1,,1
2,BROOKLYN,2
3,BROOKLYN,2
4,,2


In [6]:
# Creating a pivot table to aggregate the number of collisions for each borough with respect to number of vehicles involved
df = df.pivot_table(index='BOROUGH', columns='TOTAL_VEHICLES', aggfunc=len, fill_value=0)
df = df.rename(index=str, columns={0:"NO_VEHICLE_INVOLVED", 1:"ONE_VEHICLE_INVOLVED", 2:"TWO_VEHICLES_INVOLVED", 3:"THREE_VEHICLES_INVOLVED",4:"FOUR_VEHICLES_INVOLVED",5:"FIVE_VEHICLES_INVOLVED"})

In [7]:
df

TOTAL_VEHICLES,NO_VEHICLE_INVOLVED,ONE_VEHICLE_INVOLVED,TWO_VEHICLES_INVOLVED,THREE_VEHICLES_INVOLVED,FOUR_VEHICLES_INVOLVED,FIVE_VEHICLES_INVOLVED
BOROUGH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BRONX,525,8627,34385,1962,452,173
BROOKLYN,787,17375,80207,4980,1151,461
MANHATTAN,801,13318,66958,2024,372,129
QUEENS,515,12962,70260,4498,1106,314
STATEN ISLAND,105,2343,10676,759,133,32


In [8]:
os.makedirs('Output', exist_ok=True)

In [9]:
df.to_csv('Output/Q1Part2.csv')

In [10]:
print('Completed the analysis for calculating the distribution of collision scale to show the number of vehicles involved for each collision on each borough. Output is saved as a csv file in the location Output/Q1Part2.csv')

Completed the analysis for calculating the distribution of collision scale to show the number of vehicles involved for each collision on each borough. Output is saved as a csv file in the location Output/Q1Part2.csv
