# This Notebook contains all the data cleaning and preprocessing on the dataset "VACANT BUILDINGS" which was grabbed from the open baltimore website

# IMPORT BLOCK 

In [1]:
#All the imports for the notebook can be found here 
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import folium as folium
import gmaps
import gmaps.datasets

In [2]:
df = pd.read_csv('Vacant_Buildings.csv') # Reading the data into the dataframe df

In [3]:
df.head()        #checking the first 5 rows in the dataset

Unnamed: 0,ReferenceID,Block,Lot,BuildingAddress,NoticeDate,Neighborhood,PoliceDistrict,CouncilDistrict,Location
0,0002 019 031016,2,19,1909 W NORTH AVE,03/10/2016,EASTERWOOD,WESTERN,7,"(39.30952601, -76.64848892)"
1,0007 057 031116,7,57,1734 APPLETON ST,03/11/2016,EASTERWOOD,WESTERN,7,"(39.30856758, -76.64921157)"
2,0125 012 030816,125,12,522 N CAREY ST,03/08/2016,HARLEM PARK,WESTERN,9,"(39.29482152, -76.63877572)"
3,0151 009 030716,151,9,317 N GILMOR ST,03/07/2016,FRANKLIN SQUARE,WESTERN,9,"(39.29261888, -76.64231678)"
4,0074 010 030918,74,10,945 N MOUNT ST,03/09/2018,Sandtown-Winchester,Western,9,"(39.29975212, -76.64415784)"


In [4]:
len(df) #Total 16.6k rows in the dataset 

16692

# Each column description with null values

In [5]:
df.isnull().sum(axis=0)

ReferenceID        0
Block              0
Lot                0
BuildingAddress    1
NoticeDate         0
Neighborhood       0
PoliceDistrict     0
CouncilDistrict    0
Location           0
dtype: int64

There are not many null values in the dataset only one row in which there is not building address, so lets go ahead and delete the row which has a null value. To make the dataset clean.

In [6]:
df.describe()

Unnamed: 0,CouncilDistrict
count,16692.0
mean,9.364186
std,2.775971
min,1.0
25%,7.0
50%,9.0
75%,12.0
max,14.0


In [7]:
df[df['BuildingAddress'].isnull()] 

Unnamed: 0,ReferenceID,Block,Lot,BuildingAddress,NoticeDate,Neighborhood,PoliceDistrict,CouncilDistrict,Location
5193,0626 026 080217,626,26,,08/02/2017,Poppleton,Western,11,"(39.28929887, -76.62864465)"


In [8]:
df = df[df['BuildingAddress'].notnull()]

In [9]:
df.isnull().sum(axis=0)

ReferenceID        0
Block              0
Lot                0
BuildingAddress    0
NoticeDate         0
Neighborhood       0
PoliceDistrict     0
CouncilDistrict    0
Location           0
dtype: int64

In [10]:
df['CouncilDistrict'].unique()

array([ 7,  9, 11, 10, 12, 13,  8,  6,  5,  4, 14,  2,  1,  3],
      dtype=int64)

In [11]:
df['PoliceDistrict'].unique()   #its considering the coloumn as type sensitive,convert into a single case for no confusion 

array(['WESTERN', 'Western', 'SOUTHERN', 'Southwestern', 'CENTRAL',
       'Southern', 'EASTERN', 'SOUTHEASTERN', 'SOUTHWESTERN', 'Eastern',
       'Southeastern', 'Northwestern', 'NORTHWESTERN', 'NORTHERN',
       'Notheastern', 'Central', 'Northern', 'NORTHEASTERN'], dtype=object)

In [12]:
df['PoliceDistrict']=df['PoliceDistrict'].str.lower()

In [13]:
df['Neighborhood'].unique()

array(['EASTERWOOD', 'HARLEM PARK', 'FRANKLIN SQUARE',
       'Sandtown-Winchester', 'CARROLLTON RIDGE', 'Boyd-Booth',
       'DOWNTOWN', 'WASHINGTON VILLAGE/PIGTOWN', 'Union Square',
       'Hollins Market', 'OLIVER', 'BROADWAY EAST',
       'New Southwest/Mount Clare', 'MCELDERRY PARK',
       'ELLWOOD PARK/MONUMENT', 'Easterwood',
       'COPPIN HEIGHTS/ASH-CO-EAST', 'Franklin Square',
       'NORTHWEST COMMUNITY ACTION', 'Poppleton', 'ROSEMONT',
       'FRANKLINTOWN ROAD', 'EDGEWOOD', 'Penrose/Fayette Street Outreach',
       'Broadway East', 'Madison-Eastend', 'McElderry Park',
       'Ellwood Park/Monument', 'Franklintown Road', 'Rosemont',
       'East Baltimore Midway', 'Central Park Heights', 'Brooklyn',
       'IRVINGTON', 'CONCERNED CITIZENS OF FOREST PARK', 'FOREST PARK',
       'HANLON-LONGWOOD', 'PARK CIRCLE', 'WOODBERRY', 'BARCLAY',
       'PIMLICO GOOD NEIGHBORS', 'MID-GOVANS',
       'Coldstream Homestead Montebello', 'Oliver', 'BROOKLYN', 'Berea',
       'CARE', 'Bela

In [14]:
df['PoliceDistrict'].unique()

array(['western', 'southern', 'southwestern', 'central', 'eastern',
       'southeastern', 'northwestern', 'northern', 'notheastern',
       'northeastern'], dtype=object)

In [15]:
len(df['Lot'].unique())

505

In [16]:
df['Block'].unique()

array(['0002', '0007', '0125', ..., '8358', '8422', '8434F'], dtype=object)

In [None]:
mapit = None
for i in range(0,len(df)):
    LatLong  = df['Location'].iloc[i].split(",")
    Lat = LatLong[0][1:]
    Long = LatLong[1][:len(LatLong[1])-2]
    mapit = folium.Map( location=[ float(Lat), float(Long) ] )

In [None]:
mapit

In [None]:
df.head()

In [None]:
dfLocation = df[['Location','Lot','CouncilDistrict']]

In [None]:
dfLocation

In [None]:
for i in range(0,10):
    LatLong  = dfLocation['Location'].iloc[i].split(",")
    Lat = LatLong[0][1:]
    Long = LatLong[1][:len(LatLong[1])-2]
    print(i)
    dfLocation['Lot'].iloc[i]=float(Lat)
    dfLocation['CouncilDistrict'].iloc[i]=float(Long)

In [None]:
dfLocation

In [None]:
gmaps.configure(api_key="AIzaSyBJKywgJYuW5tSDTgMty-XHU-LhKJPg9tM")
location_df = dfLocation[['Lot', 'CouncilDistrict']][:10]

starbucks_layer = gmaps.symbol_layer(
    location_df, fill_color="green", stroke_color="green", scale=2
)
fig = gmaps.figure()
fig.add_layer(starbucks_layer)
fig

In [None]:
fig

In [None]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import gmaps
import gmaps.datasets
gmaps.configure(api_key="AI...") # Your Google API key

df = gmaps.datasets.load_dataset_as_df('starbucks_kfc_uk')

starbucks_df = df[df['chain_name'] == 'starbucks']
starbucks_df = starbucks_df[['latitude', 'longitude']]

starbucks_layer = gmaps.symbol_layer(
    starbucks_df, fill_color="green", stroke_color="green", scale=2
)
fig = gmaps.figure()
fig.add_layer(starbucks_layer)
fig

In [None]:
%matplotlib inline
fig

In [None]:
from matplotlib.cm import viridis
from matplotlib.colors import to_hex

In [None]:
type(fig)

In [None]:
import ipywidgets as widgets
widgets.IntSlider()

In [None]:
jupyter nbextension list

In [17]:
df

Unnamed: 0,ReferenceID,Block,Lot,BuildingAddress,NoticeDate,Neighborhood,PoliceDistrict,CouncilDistrict,Location
0,0002 019 031016,0002,019,1909 W NORTH AVE,03/10/2016,EASTERWOOD,western,7,"(39.30952601, -76.64848892)"
1,0007 057 031116,0007,057,1734 APPLETON ST,03/11/2016,EASTERWOOD,western,7,"(39.30856758, -76.64921157)"
2,0125 012 030816,0125,012,522 N CAREY ST,03/08/2016,HARLEM PARK,western,9,"(39.29482152, -76.63877572)"
3,0151 009 030716,0151,009,317 N GILMOR ST,03/07/2016,FRANKLIN SQUARE,western,9,"(39.29261888, -76.64231678)"
4,0074 010 030918,0074,010,945 N MOUNT ST,03/09/2018,Sandtown-Winchester,western,9,"(39.29975212, -76.64415784)"
5,0275 060 030716,0275,060,319 S MONROE ST,03/07/2016,CARROLLTON RIDGE,southern,9,"(39.28383911, -76.64647246)"
6,0206 035 031418,0206,035,2023 W BALTIMORE ST,03/14/2018,Boyd-Booth,southwestern,9,"(39.28783131, -76.64939781)"
7,0599 008 031116,0599,008,235 PARK AVE,03/11/2016,DOWNTOWN,central,11,"(39.29280154, -76.6178644)"
8,0767 117 030816,0767,117,1329 SARGEANT ST,03/08/2016,WASHINGTON VILLAGE/PIGTOWN,southern,10,"(39.28035426, -76.63645611)"
9,0206 049 031418,0206,049,8 S PAYSON ST,03/14/2018,Boyd-Booth,southwestern,9,"(39.2875711, -76.64891445)"
