## Imports

In [2]:
from geopy.geocoders import GoogleV3
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import numpy as np
import urllib3
import re
from textwrap import shorten
import os

In [3]:
df = pd.read_csv('Bay Area companies expanding their footprints, Jan. 1, 2022 to July 5, 2022 - Sheet1.csv')

In [4]:
df.columns

Index(['Company name', 'Home city', 'Category ',
       'Address(es) company expanded into ',
       'Net size of expansion, in square feet',
       'Direct lease, purchase or sublease?', 'Sublessor (if sublease)',
       'Date TRD wrote about expansion', 'Link to story'],
      dtype='object')

## Image URL collector

In [63]:
# Drop last n rows of a df. In case there are sums that need clipped
# df.drop(df.tail(n).index, inplace=True)

In [5]:
df['image_links'] = ''
links = []
for url in df['Link to story']:
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        images = soup.find_all('img')
        counter = 0
        for image in images:
            counter += 1
            if 'jpg' in image['src']:
                links.append(image['src'])
                if counter == len(images):
                    links.append('no image found')
                break
    except:
        links.append('no_image')

df['image_links'] = links

## Article link formatter

In [6]:
len(links)

18

In [7]:
df['Company name']

0            Procept BioRobotics
1             Applied Materials 
2                  Graphite Bio 
3                Iconiq Capital 
4                  Pure Storage 
5                         Waymo 
6                         Apple 
7                         Amazon
8          InterVenn Biosciences
9          Third Wave Automation
10    Dependable Highway Express
11                        Amazon
12                       Twitter
13                PCC Logistics 
14           Eikon Therapeutics 
15                       TireHub
16                     Logitech 
17                  Google Cloud
Name: Company name, dtype: object

In [8]:
df["short_description"] = df["Company name"].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row : row['Company name'].replace(str(row['short_description']), ''), axis=1)
df['remaining_desc']

df["description_link"] = '<a href="' + df["Link to story"] + '">' + df["short_description"] + "</a>" + df["remaining_desc"]
df['description_link']

0     <a href="https://therealdeal.com/sanfrancisco/...
1     <a href="https://therealdeal.com/sanfrancisco/...
2     <a href="https://therealdeal.com/sanfrancisco/...
3     <a href="https://therealdeal.com/sanfrancisco/...
4     <a href="https://therealdeal.com/sanfrancisco/...
5     <a href="https://therealdeal.com/sanfrancisco/...
6     <a href="https://therealdeal.com/sanfrancisco/...
7     <a href="https://therealdeal.com/sanfrancisco/...
8     <a href="https://therealdeal.com/sanfrancisco/...
9     <a href="https://therealdeal.com/sanfrancisco/...
10    <a href="https://therealdeal.com/sanfrancisco/...
11    <a href="https://therealdeal.com/sanfrancisco/...
12    <a href="https://therealdeal.com/sanfrancisco/...
13    <a href="https://therealdeal.com/sanfrancisco/...
14    <a href="https://therealdeal.com/sanfrancisco/...
15    <a href="https://therealdeal.com/sanfrancisco/...
16    <a href="https://therealdeal.com/sanfrancisco/...
17    <a href="https://therealdeal.com/sanfranci

In [9]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/01/06/redwood-city-robotics-company-leases-158k-sf-for-new-san-jose-hq/">Procept BioRobotics</a>'

## Google Maps API Geolocater Setup

In [10]:
%store -r google_maps_API_Key
geolocator = GoogleV3(api_key=google_maps_API_Key)

In [11]:
df['geo_address'] = df['Address(es) company expanded into '] + ' CA'
df['loc'] = df['geo_address'].apply(geolocator.geocode, timeout=10)
df['point'] = df['loc'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['lat','lon','altitude']] = pd.DataFrame(df['point'].to_list(),index=df.index)

## HTML popup formatter

In [21]:
def popup_html(row):
    i = row
    address = df['Address(es) company expanded into '].iloc[i]
    developer = df['Net size of expansion, in square feet'].iloc[i]
    description = df['description_link'].iloc[i]
    image = df['image_links'].iloc[i]
    
    html = '''<!DOCTYPE html>
    <html>
    <img src={} width="256" height="156">'''.format(image) + '''<br>______________________________________<br>
    Company: <strong>{}'''.format(description) + '''</strong>
    Address(es) leased/subleased: <em>{}'''.format(address) + '''</em><br>
    Net size of expansion: <strong><em>{}'''.format(developer) + '''</strong></em><br><br>
    </html>
    '''
    return html


### HTML reservoir

In [72]:
#     Developer(s): <strong><em>{}'''.format(developer) + '''</strong></em><br><br>

## Map Maker

In [13]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/01/06/redwood-city-robotics-company-leases-158k-sf-for-new-san-jose-hq/">Procept BioRobotics</a>'

In [20]:
import folium
import branca

f = folium.Figure(width=750, height=750)
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=9)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Bay Area companies expanding their footprints, January 2022 to July 2022')

for i in range(0,len(df)):
    html = popup_html(i)
    iframe = branca.element.IFrame(html=html)
    popup = folium.Popup(folium.Html(html, script=True))
    folium.Marker([df['lat'].iloc[i],df['lon'].iloc[i]],
                 popup=popup).add_to(m)

m.get_root().html.add_child(folium.Element(title_html))
m.fit_bounds(bounds=df[['lat','lon']].mean().to_list(),max_zoom=15)
# m.add_to(f)
m

In [15]:
m.save('index.html')

## Map URL snagger

Map template URL: `https://trd-digital.github.io/trd-news-interactive-maps/{map-folder-name}`

In [16]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [17]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/bay_area_company_footprint_expansion
