In [1]:
import matplotlib.pyplot as plt
import gmaps
import pandas as pd
import requests
import numpy as np
import datetime

In [2]:
url = "https://data.cityofchicago.org/resource/tdab-kixi.json"
r = requests.get(url)
r

<Response [200]>

In [3]:
# call json
json = r.json()

In [4]:
# check what type of data the json file has
type(json)

list

In [74]:
# create a data frame from the json file
df = pd.DataFrame(json)
df.head(2)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,:@computed_region_rpca_8um6,:@computed_region_vrxf_vc4k,:@computed_region_6mkv_f3dw,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,:@computed_region_awaf_s7ux,date_built,architect
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30T07:00:00.000Z,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",1,23,22535,240,40,24,,
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02T07:00:00.000Z,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",36,2,4301,220,10,1,1886.0,Adler & Sullivan


In [75]:
# drop uncecessary columns
# #1 -using column name
# df2= df.drop(columns= [':@computed_region_rpca_8um6', ':@computed_region_vrxf_vc4k', ':@computed_region_6mkv_f3dw', ':@computed_region_bdys_3d7i',':@computed_region_43wa_7qmu', ':@computed_region_awaf_s7ux' ], axis = 1)
# df2

#2 - using column index number - much easier since you don't have to copy/paste the namae of the column
df2 = df.drop(df.columns[[7,8,9,10,11,12]], axis = 1)
df2.head(2)


Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30T07:00:00.000Z,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",,
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02T07:00:00.000Z,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886.0,Adler & Sullivan


In [76]:
# convert datetime column to just date
df2['landmark_designation_date'] = pd.to_datetime(df2['landmark_designation_date']).dt.date
df2.head(2)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",,
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886.0,Adler & Sullivan


In [77]:
##

df2['designation_year'] =  pd.DatetimeIndex(df2['landmark_designation_date']).year
df2.head(2)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect,designation_year
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",,,2008.0
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886.0,Adler & Sullivan,1991.0


In [78]:
#remove decimal from 'designation year'
df2['designation_year'] = df2['designation_year'].astype(str).apply(lambda x: x.replace('.0',''))
df2.head(2)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect,designation_year
0,Vassar Swiss Underwear Company Building,L-265,2543 - 2545 W Diversey Av,2008-07-30,41.9316266084,-87.6921000957,"{'latitude': '41.9316266084', 'longitude': '-8...",,,2008
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886.0,Adler & Sullivan,1991


In [79]:
#check how many rows in date_built have null values
count_nan = df['date_built'].isnull().sum()
print ('Count of NaN: ' + str(count_nan))
#results= 40

Count of NaN: 40


In [80]:
#drop the empty values
nan_value = float('NaN')
df2.replace('', nan_value, inplace = True)
df2.dropna(inplace = True)
df2.head(2)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect,designation_year
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886,Adler & Sullivan,1991
2,Manhattan Building,L-139,431 S Dearborn St,1978-07-07,41.8760657234,-87.6289644505,"{'latitude': '41.8760657234', 'longitude': '-8...",1891,William LeBaron Jenney,1978


In [82]:
#make sure all the columns have the same number of rows
df2.count()

landmark_name                270
id                           270
address                      270
landmark_designation_date    270
latitude                     270
longitude                    270
location                     270
date_built                   270
architect                    270
designation_year             270
dtype: int64

In [83]:
#remove decimal from 'designation year'
df2['date_built'] = df2['date_built'].astype(str).apply(lambda x: x.replace(', ','-'))
df2['date_built'] = df2['date_built'].astype(str).apply(lambda x: x.replace('& ','-'))
df2['date_built'] = df2['date_built'].astype(str).apply(lambda x: x.replace('&','-'))
df2['date_built'] = df2['date_built'].astype(str).apply(lambda x: x.replace(',','-'))
df2['date_built'] = df2['date_built'].astype(str).apply(lambda x: x.replace('`','-'))
df2.head(4)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect,designation_year
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886,Adler & Sullivan,1991
2,Manhattan Building,L-139,431 S Dearborn St,1978-07-07,41.8760657234,-87.6289644505,"{'latitude': '41.8760657234', 'longitude': '-8...",1891,William LeBaron Jenney,1978
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004-05-26,41.8351614122,-87.6292212235,"{'latitude': '41.8351614122', 'longitude': '-8...",1901,"Patton, Fisher & Miller",2004
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979-03-21,41.808529769,-87.6172043949,"{'latitude': '41.808529769', 'longitude': '-87...",1903,Henry L. Newhouse,1979


In [84]:
#split the date_built column 
df2[['start_built_year', 'end_built_year', 'y3']] = df2["date_built"].apply(lambda x: pd.Series(str(x).split("-")))
df2.head(4)

Unnamed: 0,landmark_name,id,address,landmark_designation_date,latitude,longitude,location,date_built,architect,designation_year,start_built_year,end_built_year,y3
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991-10-02,41.819255751,-87.6027879992,"{'latitude': '41.819255751', 'longitude': '-87...",1886,Adler & Sullivan,1991,1886,,
2,Manhattan Building,L-139,431 S Dearborn St,1978-07-07,41.8760657234,-87.6289644505,"{'latitude': '41.8760657234', 'longitude': '-8...",1891,William LeBaron Jenney,1978,1891,,
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004-05-26,41.8351614122,-87.6292212235,"{'latitude': '41.8351614122', 'longitude': '-8...",1901,"Patton, Fisher & Miller",2004,1901,,
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979-03-21,41.808529769,-87.6172043949,"{'latitude': '41.808529769', 'longitude': '-87...",1903,Henry L. Newhouse,1979,1903,,


In [85]:
#rearrange the columns
df2 = df2.reindex(['landmark_name','id', 'address', 'designation_year', 'start_built_year','architect', 'location', 'latitude', 'longitude' , 'landmark_designation_date', 'date_built', 'end_built_year', 'y3'], axis=1)
df2.head(4)

Unnamed: 0,landmark_name,id,address,designation_year,start_built_year,architect,location,latitude,longitude,landmark_designation_date,date_built,end_built_year,y3
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991,1886,Adler & Sullivan,"{'latitude': '41.819255751', 'longitude': '-87...",41.819255751,-87.6027879992,1991-10-02,1886,,
2,Manhattan Building,L-139,431 S Dearborn St,1978,1891,William LeBaron Jenney,"{'latitude': '41.8760657234', 'longitude': '-8...",41.8760657234,-87.6289644505,1978-07-07,1891,,
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004,1901,"Patton, Fisher & Miller","{'latitude': '41.8351614122', 'longitude': '-8...",41.8351614122,-87.6292212235,2004-05-26,1901,,
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979,1903,Henry L. Newhouse,"{'latitude': '41.808529769', 'longitude': '-87...",41.808529769,-87.6172043949,1979-03-21,1903,,


In [86]:
#check the types of data for each column
df2.dtypes

landmark_name                object
id                           object
address                      object
designation_year             object
start_built_year             object
architect                    object
location                     object
latitude                     object
longitude                    object
landmark_designation_date    object
date_built                   object
end_built_year               object
y3                           object
dtype: object

In [87]:
#drop the columns that we don't need anymore
clean_df = df2.drop(['date_built','end_built_year','y3'], axis = 1)
clean_df.head(5)

Unnamed: 0,landmark_name,id,address,designation_year,start_built_year,architect,location,latitude,longitude,landmark_designation_date
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991,1886,Adler & Sullivan,"{'latitude': '41.819255751', 'longitude': '-87...",41.819255751,-87.6027879992,1991-10-02
2,Manhattan Building,L-139,431 S Dearborn St,1978,1891,William LeBaron Jenney,"{'latitude': '41.8760657234', 'longitude': '-8...",41.8760657234,-87.6289644505,1978-07-07
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004,1901,"Patton, Fisher & Miller","{'latitude': '41.8351614122', 'longitude': '-8...",41.8351614122,-87.6292212235,2004-05-26
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979,1903,Henry L. Newhouse,"{'latitude': '41.808529769', 'longitude': '-87...",41.808529769,-87.6172043949,1979-03-21
5,(Former) Pioneer Trust and Savings Bank Building,L-318,4000 W. North Ave.,2012,1924,Karl M. Vitzthum,"{'latitude': '41.9101921054', 'longitude': '-8...",41.9101921054,-87.7266173415,2012-06-06


In [88]:
#convert the columns designation_year and start_built_year from object to int64
clean_df['designation_year'] = pd.to_numeric(clean_df['designation_year'])
clean_df['start_built_year'] = pd.to_numeric(clean_df['start_built_year'])
clean_df.head(4)

Unnamed: 0,landmark_name,id,address,designation_year,start_built_year,architect,location,latitude,longitude,landmark_designation_date
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991,1886,Adler & Sullivan,"{'latitude': '41.819255751', 'longitude': '-87...",41.819255751,-87.6027879992,1991-10-02
2,Manhattan Building,L-139,431 S Dearborn St,1978,1891,William LeBaron Jenney,"{'latitude': '41.8760657234', 'longitude': '-8...",41.8760657234,-87.6289644505,1978-07-07
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004,1901,"Patton, Fisher & Miller","{'latitude': '41.8351614122', 'longitude': '-8...",41.8351614122,-87.6292212235,2004-05-26
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979,1903,Henry L. Newhouse,"{'latitude': '41.808529769', 'longitude': '-87...",41.808529769,-87.6172043949,1979-03-21


In [90]:
clean_df.dtypes

landmark_name                object
id                           object
address                      object
designation_year              int64
start_built_year              int64
architect                    object
location                     object
latitude                     object
longitude                    object
landmark_designation_date    object
dtype: object

In [94]:
#calculate how many years it took a landmark to be recognized as a landmark
clean_df['recognize_time'] = clean_df['designation_year'] - clean_df['start_built_year']
clean_df.head(5)

Unnamed: 0,landmark_name,id,address,designation_year,start_built_year,architect,location,latitude,longitude,landmark_designation_date,recognize_time
1,Mathilde Eliel House,L- 89,4122 S Ellis Av,1991,1886,Adler & Sullivan,"{'latitude': '41.819255751', 'longitude': '-87...",41.819255751,-87.6027879992,1991-10-02,105
2,Manhattan Building,L-139,431 S Dearborn St,1978,1891,William LeBaron Jenney,"{'latitude': '41.8760657234', 'longitude': '-8...",41.8760657234,-87.6289644505,1978-07-07,87
3,Machinery Hall at Illinois Institute of Techno...,L- 12,100 W 33rd St,2004,1901,"Patton, Fisher & Miller","{'latitude': '41.8351614122', 'longitude': '-8...",41.8351614122,-87.6292212235,2004-05-26,103
4,Melissa Ann Elam House,L- 88,4726 S Dr Martin Luther King Jr Dr,1979,1903,Henry L. Newhouse,"{'latitude': '41.808529769', 'longitude': '-87...",41.808529769,-87.6172043949,1979-03-21,76
5,(Former) Pioneer Trust and Savings Bank Building,L-318,4000 W. North Ave.,2012,1924,Karl M. Vitzthum,"{'latitude': '41.9101921054', 'longitude': '-8...",41.9101921054,-87.7266173415,2012-06-06,88


In [95]:
#calculate the longest recognision time and transform it into in
longest = int(clean_df['recognize_time'].max())
shortest = int(clean_df['recognize_time'].min())
average= int(clean_df['recognize_time'].mean())


In [96]:
print(f"the longest period of time for a landmark to be recognized was {longest} years")


the longest period of time for a landmark to be recognized was 164 years


In [97]:
print(f"the shortest period of time for a landmark to be recognized was {shortest} years")


the shortest period of time for a landmark to be recognized was 29 years


In [98]:
print(f"the average period of time for a landmark to be recognized was {average} years")

the average period of time for a landmark to be recognized was 90 years


In [99]:
#print the landmark that has the longest recongition time
longest_df=clean_df.loc[clean_df['recognize_time'].idxmax()]
longest_df

landmark_name                      Illinois-Indiana State Line Boundary Marker
id                                                                       L-118
address                                                Avenue G, near 103rd St
designation_year                                                          2002
start_built_year                                                          1838
architect                         Office of the United States Surveyor-General
location                     {'latitude': '41.7088646706', 'longitude': '-8...
latitude                                                        41.70886467060
longitude                                                      -87.52622268910
landmark_designation_date                                           2002-09-04
recognize_time                                                             164
Name: 100, dtype: object

In [100]:
##print the landmark that has the shortest recongition time
shortest_df=clean_df.loc[clean_df['recognize_time'].idxmin()]
shortest_df

landmark_name                 Site of the 1st Self-Sustain Cont. Nuclear Chain
id                                                                       L-184
address                        East Side of S Ellis Ave between 56th & 57th St
designation_year                                                          1971
start_built_year                                                          1942
architect                      Commern. sculpture,"Nuclear Energy" Henry Moore
location                     {'latitude': '41.7921621211', 'longitude': '-8...
latitude                                                        41.79216212110
longitude                                                      -87.60086989510
landmark_designation_date                                           1971-10-27
recognize_time                                                              29
Name: 215, dtype: object