## Imports

In [1]:
from geopy.geocoders import GoogleV3
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import numpy as np
import urllib3
import re
from textwrap import shorten
import os

In [2]:
df = pd.read_csv('Bay Area companies that have decided to put space on market for sublease, Jan. 1, 2022, to July 5, 2022 - Sheet1.csv')

In [3]:
df.columns

Index(['Company name ', 'Home city ', 'Category',
       'Amount of space company listed for sublease or plans to list for sublease, in sf ',
       'Address(es) of space company listed for sublease or plans to list for sublease ',
       'Sublease taken?', 'Sublease term', 'Notes on sublease',
       'Date TRD wrote about sublease', 'Link to story'],
      dtype='object')

## Image URL collector

In [4]:
# Drop last n rows of a df. In case there are sums that need clipped
# df.drop(df.tail(n).index, inplace=True)

In [5]:
df['image_links'] = ''
links = []
for url in df['Link to story']:
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        images = soup.find_all('img')
        counter = 0
        for image in images:
            counter += 1
            if 'jpg' in image['src']:
                links.append(image['src'])
                if counter == len(images):
                    links.append('no image found')
                break
    except:
        links.append('no_image')

df['image_links'] = links

## Article link formatter

In [6]:
len(links)

4

In [7]:
df['Company name ']

0                   Slack
1                   Yahoo
2               Autodesk 
3    Nektar Therapeutics 
Name: Company name , dtype: object

In [8]:
df["short_description"] = df["Company name "].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row : row['Company name '].replace(str(row['short_description']), ''), axis=1)
df['remaining_desc']

df["description_link"] = '<a href="' + df["Link to story"] + '" target="_blank" rel="noopener noreferrer">' + df["short_description"] + "</a>" + df["remaining_desc"]
df['description_link']

0    <a href="https://therealdeal.com/sanfrancisco/...
1    <a href="https://therealdeal.com/sanfrancisco/...
2    <a href="https://therealdeal.com/sanfrancisco/...
3    <a href="https://therealdeal.com/sanfrancisco/...
Name: description_link, dtype: object

In [9]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/02/03/slack-lists-more-than-200000-sf-of-sf-office-space-for-sublease/" target="_blank" rel="noopener noreferrer">Slack</a>'

## Google Maps API Geolocater Setup

In [10]:
%store -r google_maps_API_Key
geolocator = GoogleV3(api_key=google_maps_API_Key)

In [11]:
df['geo_address'] = df['Address(es) of space company listed for sublease or plans to list for sublease '] + ' CA'
df['loc'] = df['geo_address'].apply(geolocator.geocode, timeout=10)
df['point'] = df['loc'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['lat','lon','altitude']] = pd.DataFrame(df['point'].to_list(),index=df.index)

## Correction section

In [12]:
df.at[1,'Amount of space company listed for sublease or plans to list for sublease, in sf ']=('About 361K sf that’s on top of about 297K sf listed for sublease at these addresses in 2021.')

## HTML popup formatter

In [13]:
def popup_html(row):
    i = row
    address = df['Address(es) of space company listed for sublease or plans to list for sublease '].iloc[i]
    developer = df['Amount of space company listed for sublease or plans to list for sublease, in sf '].iloc[i]
    description = df['description_link'].iloc[i]
    image = df['image_links'].iloc[i]
    
    html = '''<!DOCTYPE html>
    <html>
    <img src={} width="256" height="156">'''.format(image) + '''<br>______________________________________<br>
    Company: <strong>{}'''.format(description) + '''</strong></html><br>
    Address: <em>{}'''.format(address) + '''</em><br>
    Size of sublease: <strong><em>{}'''.format(developer) + '''</strong></em><br>
    '''
    return html


### HTML reservoir

In [14]:
#     Developer(s): <strong><em>{}'''.format(developer) + '''</strong></em><br><br>

## Map Maker

In [15]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/02/03/slack-lists-more-than-200000-sf-of-sf-office-space-for-sublease/" target="_blank" rel="noopener noreferrer">Slack</a>'

In [16]:
import folium
import branca

f = folium.Figure(width=750, height=750)
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=9)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Bay Area companies that decided to put space on the market for sublease, January 2022 to July 2022')

for i in range(0,len(df)):
    html = popup_html(i)
    iframe = branca.element.IFrame(html=html)
    popup = folium.Popup(folium.Html(html, script=True))
    folium.Marker([df['lat'].iloc[i],df['lon'].iloc[i]],
                 popup=popup).add_to(m)

m.get_root().html.add_child(folium.Element(title_html))
m.fit_bounds(bounds=df[['lat','lon']].mean().to_list(),max_zoom=15)
# m.add_to(f)
m

In [17]:
m.save('index.html')

## Map URL snagger

Map template URL: `https://trd-digital.github.io/trd-news-interactive-maps/{map-folder-name}`

In [18]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [19]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/bay_area_company_sublease
