## Imports

In [17]:
import pandas as pd
import numpy as np
import folium
import os
import tabula
import googlemaps

## Data read-in

In [2]:
dfs = tabula.read_pdf('118165938811.pdf',pages='all')

In [3]:
len(dfs)

7

In [4]:
cleaned_dfs = [df.replace({r'\r':' '}, regex=True) for df in dfs]

In [5]:
df = pd.concat(cleaned_dfs)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 76 entries, 0 to 7
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   No.                              69 non-null     float64
 1   Unnamed: 0                       0 non-null      float64
 2   Title/Description of Lease       69 non-null     object 
 3   Property Address                 69 non-null     object 
 4   Landlord / Counterparty          69 non-null     object 
 5   Unnamed: 1                       0 non-null      float64
 6   Landlord / CounterpartyAddress  69 non-null     object 
 7   RejectionEffective Date         69 non-null     object 
 8   Abandoned Personal Property      69 non-null     object 
dtypes: float64(3), object(6)
memory usage: 5.9+ KB


In [7]:
df.columns

Index(['No.', 'Unnamed: 0', 'Title/Description of Lease', 'Property Address',
       'Landlord / Counterparty', 'Unnamed: 1',
       'Landlord / Counterparty\rAddress', 'Rejection\rEffective Date',
       'Abandoned Personal Property'],
      dtype='object')

In [8]:
# Remove empty columns
drop_columns = ['Unnamed: 0','Unnamed: 1']
df = df.drop(columns=drop_columns)

# Replace '\r' with single whitespace
df.columns = [col.replace('\r',' ') for col in df.columns]

In [15]:
df = df.dropna(axis=0,subset='Property Address')

In [16]:
df

Unnamed: 0,No.,Title/Description of Lease,Property Address,Landlord / Counterparty,Landlord / Counterparty Address,Rejection Effective Date,Abandoned Personal Property
1,1.0,Terminated Lease with Signed Termination Agree...,"3000 S Robertson Blvd Los Angeles, CA 90034",3000 S Robertson Property Owner LLC,"4 Park Plaza, Suite 400, Irvine, CA, 92614",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
2,2.0,Unexpired Lease,"1 Little W 12th St New York, NY 10014",2 Ninth Avenue Partners LLC,"177 Christopher Street, New York, NY, 10014",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
3,3.0,Unexpired Lease,"1 Union Square West New York, NY 10003","Union Square Associates, LLC","One Union Square West, New York, NY, 10003",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
4,4.0,Unexpired Lease,"10 East 38th Street New York, NY 10016","10 East 38th Street Company, L.L.C.","34-09 Queens Boulevard, Long Island City, NY, ...",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
5,5.0,Unexpired Lease,"10 East 40th Street New York, NY 10016",Ronbet 40th Street LLC,"9 East 40th Street, 8th Floor, New York, NY, 1...",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
...,...,...,...,...,...,...,...
3,65.0,"Unexpired Lease For the avoidance of doubt, De...","77 Sands St Brooklyn, NY 11201",RFR/K 77 Sands Owner LLC,"375 Park Avenue, 10th Floor, New York, NY, 10152",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
4,66.0,Unexpired License Agreement,"54 W 40th St New York, NY 10018","Blue Bottle Coffee, Inc.","300 Webster Street, Oakland, CA, 94607",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
5,67.0,Unexpired Storage License,"311 W 43rd St. New York, NY 10036","DWF V 311 W 43rd, LLC","200 State Street, 12th Floor, Boston, MA, 02109",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."
6,68.0,Unexpired Sublease,"437 Madison Avenue New York, NY 10022","Santander Holdings USA, Inc.","2 Morrissey Boulevard, Dorchester, MA, 02125",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip..."


In [18]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [19]:
def geocode(add):
    g = gmaps_key.geocode(add)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df['geocoded'] = df['Property Address'].apply(geocode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geocoded'] = df['Property Address'].apply(geocode)


In [20]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geocoded'] = df['geocoded'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['lat', 'lon']] = df['geocoded'].str.strip('()').str.split(', ', expand=True)

In [23]:
df.head(1)

Unnamed: 0,No.,Title/Description of Lease,Property Address,Landlord / Counterparty,Landlord / Counterparty Address,Rejection Effective Date,Abandoned Personal Property,geocoded,lat,lon
1,1.0,Terminated Lease with Signed Termination Agree...,"3000 S Robertson Blvd Los Angeles, CA 90034",3000 S Robertson Property Owner LLC,"4 Park Plaza, Suite 400, Irvine, CA, 92614",11/6/2023,"Miscellaneous Furniture, Fixtures and/or Equip...","(34.0324217, -118.3893975)",34.032422,-118.389398


In [22]:
df.columns

Index(['No.', 'Title/Description of Lease', 'Property Address',
       'Landlord / Counterparty', 'Landlord / Counterparty Address',
       'Rejection Effective Date', 'Abandoned Personal Property', 'geocoded',
       'lat', 'lon'],
      dtype='object')

In [24]:
def popup_html(row):
    lease_desc = row['Title/Description of Lease']
    prop_address = row['Property Address']
    landlord_or_counterparty = row['Landlord / Counterparty']
    counterparty_address = row['Landlord / Counterparty Address']
    rejection_effective_date = row['Rejection Effective Date']
    abandoned_personal_property = row['Abandoned Personal Property']
    
    html = '''<!DOCTYPE html>
    <html>
    <strong>Description: </strong>{}'''.format(lease_desc) + '''<br>
    <strong>Property Address: </strong>{}'''.format(prop_address) + '''<br>
    <strong>Landlord or Counterparty: </strong>{}'''.format(landlord_or_counterparty) + '''<br>
    <strong>Counterparty Address: </strong>{}'''.format(counterparty_address) + '''<br>
    <strong>Rejection Effective Date: </strong>{}'''.format(rejection_effective_date) + '''<br>
    <strong>Abandoned Personal Property: </strong>${}'''.format(abandoned_personal_property) + '''<br>
    </html>
    '''
    return html

In [29]:
# Define the bounding box for NYC [min_lon, min_lat, max_lon, max_lat]
nyc_bounding_box = {
    'min_lat': 40.477399,
    'max_lat': 40.917577,
    'min_lon': -74.25909,
    'max_lon': -73.700009
}

# Filter the DataFrame for locations within the bounding box
df_nyc = df[
    (df['lat'] >= nyc_bounding_box['min_lat']) & 
    (df['lat'] <= nyc_bounding_box['max_lat']) & 
    (df['lon'] >= nyc_bounding_box['min_lon']) & 
    (df['lon'] <= nyc_bounding_box['max_lon'])
]

# Now, df_nyc contains only the locations within New York City


In [33]:
df_nyc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40 entries, 2 to 7
Data columns (total 10 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   No.                              40 non-null     float64
 1   Title/Description of Lease       40 non-null     object 
 2   Property Address                 40 non-null     object 
 3   Landlord / Counterparty          40 non-null     object 
 4   Landlord / Counterparty Address  40 non-null     object 
 5   Rejection Effective Date         40 non-null     object 
 6   Abandoned Personal Property      40 non-null     object 
 7   geocoded                         40 non-null     object 
 8   lat                              40 non-null     float64
 9   lon                              40 non-null     float64
dtypes: float64(3), object(7)
memory usage: 3.4+ KB


In [34]:
import folium
from folium.plugins import MarkerCluster

m = folium.Map(location=df_nyc[["lat", "lon"]].mean().to_list(), zoom_start=10)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'WeWork Lease Terminations in NYC')


### Create map container ###
m = folium.Map(location=df_nyc[["lat", "lon"]].mean().to_list(),zoom_start=9.5,tiles=None)

for index, row in df_nyc.iterrows():
    marker = folium.Marker(
        location=[row['lat'], row['lon']],
        radius=5,
        fill=True,
        popup=folium.Popup(popup_html(row), max_width=400))
    marker.add_to(m)

folium.TileLayer('OpenStreetMap',control=False).add_to(m)

# Add LayerControl to the map
folium.map.LayerControl(collapsed=False).add_to(m)
m.get_root().html.add_child(folium.Element(title_html))

# Display map
m

In [27]:
m.save('index.html')

In [28]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/NYC_WeWork_lease_cuts_Nov_2023
