# Elevator Exploration and Visualization

The objective of this notebook is to explore and visual the eleva

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

from mpl_toolkits.basemap import Basemap

import seaborn as sns
sns.set(style="white", palette="muted", color_codes=True)

init_notebook_mode()


## 1. Read data and basic exploration

Using Panda to read the input files and explore the basic contents of the file.

In [None]:
df = pd.read_csv("../input/nyc-elevators.csv", low_memory = False)
df.head(2)

The null value for some columns already show in the top 2 records. Let's check the missing values by each data point / coulmn.

In [None]:
def missing_values_table(df): 
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum()/len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(columns = {0 : 'Missing Values', 1 : '% of Total Values'})
    return mis_val_table_ren_columns 
    
missing_values_table(df)

The % of missing show that there's one column which can remove entirely (**Unnamed: 26**) as there're no single data presented, I will drop this column. Other columns are also missing with high ratio (between 60 and 80% => *DV_MANUFACTURER*).

The rest of the data will be remained and will be explored in next sections.


In [None]:
df = df.drop('Unnamed: 26', axis=1)

## 2. Data Exploration

In this section, I will drill down to understand the data as well as digging the insight of this dataset using visualization technique.

Let's first examine the type of the elevator.

In [None]:
def group_by_cnt(df, col_nm):
    out = df.groupby(col_nm).size().reset_index(name='Counts').sort_values(['Counts'], ascending = True).reset_index(drop=True)
    out['Percent'] = out['Counts']/sum(out['Counts']) * 100
    out['Text'] = out[col_nm] + ": " + out['Counts'].astype(str)
    return(out)

In [None]:
df['ELEVATOR_TYPE'] = df['Device Type'].str.rstrip(')').str.split('(').str[0]
ele_type = group_by_cnt(df, 'ELEVATOR_TYPE')
ele_type

In [None]:
data = [go.Bar(x=ele_type['Counts'],
               y=ele_type['ELEVATOR_TYPE'], 
               text = ele_type['Text'],
               textposition = 'auto',
               marker=dict(color = 'rgba(55, 128, 191, 0.7)'),
               orientation = 'h')]
layout = dict(
    title='The elevator type in New York',
    xaxis=dict(
        type='log',
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=False,
        autotick=True,
        ticks='',
        showticklabels=False
    ), 
    yaxis=dict(
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=False,
        autotick=True,
        ticks='',
    ),
    font = dict( color = "black", size = 10 ),
    autosize = True)
fig = dict(data=data, layout=layout )
iplot(fig, filename='Elevator-type')

We can see clearly that mostly of the elevators (88%) are Passenger elevator, this is somewhat predictable as NY mostly contains lots of skyscrape with apartment, residential areas and offices.

Next, one variable which may worth exploring is the **status**.  The ***Device Status*** and ***DV_DEVICE_STATUS_DESCRIPTION*** are eventually the same meaning, I will use the description instead so it is more understandable.

In [None]:
ele_status = group_by_cnt(df, 'DV_DEVICE_STATUS_DESCRIPTION')
ele_status

In [None]:
data = [go.Bar(x=ele_status['Counts'],
               y=ele_status['DV_DEVICE_STATUS_DESCRIPTION'], 
               text = ele_status['Text'],
               textposition = 'auto',
               marker=dict(color = 'rgba(55, 128, 191, 0.7)'),
               orientation = 'h')]
layout = dict(
    title='The elevator status in New York',
    xaxis=dict(
        type='log',
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=False,
        autotick=True,
        ticks='',
        showticklabels=False
    ), 
    yaxis=dict(
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=False,
        autotick=True,
        ticks='',
    ),
    font = dict( color = "black", size = 10 ),
    autosize = True)
fig = dict(data=data, layout=layout )
iplot(fig, filename='Elevator-status')

Look at the status, how much of each elevator type are still operating (or what are the status).

In [None]:
x = df.groupby(['ELEVATOR_TYPE', 'DV_DEVICE_STATUS_DESCRIPTION']).size().reset_index(name='Counts')
data = [go.Heatmap(z=x['Counts'], 
                   x=x['ELEVATOR_TYPE'],
                   y=x['DV_DEVICE_STATUS_DESCRIPTION'],
                   colorscale=[[0.0, 'rgb(165,0,38)'], [0.1111111111111111, 'rgb(215,48,39)'], [0.2222222222222222, 'rgb(244,109,67)'], [0.3333333333333333, 'rgb(253,174,97)'], [0.4444444444444444, 'rgb(254,224,144)'], [0.5555555555555556, 'rgb(224,243,248)'], [0.6666666666666666, 'rgb(171,217,233)'], [0.7777777777777778, 'rgb(116,173,209)'], [0.8888888888888888, 'rgb(69,117,180)'], [1.0, 'rgb(49,54,149)']])
       ]

layout = go.Layout(title='Operating status of each elevator type in NY',
                   xaxis = dict(ticks='', nticks=45),
                   yaxis = dict(ticks='' )
                  )

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='datetime-heatmap')

## 3. Plot the map

Let's see some visualize on the location of the elevator. I pick up some of the active elevator type out to see where they are and the density.,

In [None]:
west, south, east, north = -74.03, 40.63, -73.77, 40.85
tmp = df
tmp = tmp[(tmp.LATITUDE> south) & (tmp.LONGITUDE < north)]
tmp = tmp[(tmp.LATITUDE> west) & (tmp.LONGITUDE < east)]

First, let's see the convenient options and most common one, passenger elevator and escalator.

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(20,10))
tmp.loc[(tmp['Device Status'] == 'A') & 
        (tmp['ELEVATOR_TYPE'] == 'Passenger Elevator ')].plot(kind='scatter', 
                                                              x='LONGITUDE', 
                                                              y='LATITUDE',
                                                              color='black', s=.05, alpha=.75,
                                                              subplots=True, ax=ax1)
ax1.set_title("Active Passenger Elevator Only", fontsize=18)
ax1.set_facecolor('#f9f9f9') 

tmp.loc[(tmp['Device Status'] == 'A') & 
        (tmp['ELEVATOR_TYPE'] == 'Escalator ')].plot(kind='scatter', 
                                                   x='LONGITUDE', 
                                                   y='LATITUDE',
                                                   color='black', s=.5, alpha=.75,
                                                    subplots=True, ax=ax2)
ax2.set_title("Active Escalator Only", fontsize=18)
ax2.set_facecolor('#f9f9f9') 


plt.show()

Next, I'm interested in: 

* Active public and private elevators
* Active handicap lift only

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(20,10))

tmp.loc[(tmp['Device Status'] == 'A') & 
        ((tmp['ELEVATOR_TYPE'] == 'Private Elevator ') | 
         (tmp['ELEVATOR_TYPE'] == 'Public Elevator '))].plot(kind='scatter', 
                                                              x='LONGITUDE', 
                                                              y='LATITUDE',
                                                              color='black', 
                                                              subplots=True, ax=ax1)
ax1.set_title("Active Public and Private Elevators", fontsize=18)
ax1.set_facecolor('#f9f9f9') 

tmp.loc[(tmp['Device Status'] == 'A') & 
        (tmp['ELEVATOR_TYPE'] == 'Handicap Lift ')].plot(kind='scatter', 
                                                         x='LONGITUDE', 
                                                         y='LATITUDE',
                                                         color='black',
                                                         subplots=True, ax=ax2)
ax2.set_title("Active Handicap Lift Only", fontsize=18)
ax2.set_facecolor('#f9f9f9')

plt.show();