## Imports

In [2]:
from geopy.geocoders import GoogleV3
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import numpy as np
import urllib3
import re
from textwrap import shorten
import os

In [3]:
df = pd.read_csv('Bay Area companies expanding their footprints, Jan. 1, 2022 to July 5, 2022 - Sheet1.csv')

In [4]:
df.columns

Index(['Company name', 'Home city', 'Category ',
       'Address(es) company expanded into ',
       'Net size of expansion, in square feet',
       'Direct lease, purchase or sublease?', 'Sublessor (if sublease)',
       'Date TRD wrote about expansion', 'Link to story'],
      dtype='object')

## Image URL collector

In [5]:
# Drop last n rows of a df. In case there are sums that need clipped
# df.drop(df.tail(n).index, inplace=True)

In [None]:
df['image_links'] = ''
links = []
for url in df['Link to story']:
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        images = soup.find_all('img')
        counter = 0
        for image in images:
            counter += 1
            if 'jpg' in image['src']:
                links.append(image['src'])
                if counter == len(images):
                    links.append('no image found')
                break
    except:
        links.append('no_image')

df['image_links'] = links

## Article link formatter

In [None]:
len(links)

In [None]:
df['Company name']

In [None]:
df["short_description"] = df["Company name"].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row : row['Company name'].replace(str(row['short_description']), ''), axis=1)
df['remaining_desc']

df["description_link"] = '<a href="' + df["Link to story"] + '">' + df["short_description"] + "</a>" + df["remaining_desc"]
df['description_link']

In [None]:
df.description_link[0]

## Google Maps API Geolocater Setup

In [None]:
%store -r google_maps_API_Key
geolocator = GoogleV3(api_key=google_maps_API_Key)

In [None]:
df['geo_address'] = df['Address(es) company expanded into '] + ' CA'
df['loc'] = df['geo_address'].apply(geolocator.geocode, timeout=10)
df['point'] = df['loc'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['lat','lon','altitude']] = pd.DataFrame(df['point'].to_list(),index=df.index)

## HTML popup formatter

In [None]:
def popup_html(row):
    i = row
    address = df['Address(es) company expanded into '].iloc[i]
    developer = df['Net size of expansion, in square feet'].iloc[i]
    description = df['description_link'].iloc[i]
    image = df['image_links'].iloc[i]
    
    html = '''<!DOCTYPE html>
    <html>
    <img src={} width="256" height="156">'''.format(image) + '''<br>______________________________________<br>
    Company: <strong>{}'''.format(description) + '''</strong><br>
    Address(es) leased/subleased: <em>{}'''.format(address) + '''</em><br>
    Net size of expansion: <strong><em>{}'''.format(developer) + '''</strong></em><br>
    </html>
    '''
    return html


### HTML reservoir

In [None]:
#     Developer(s): <strong><em>{}'''.format(developer) + '''</strong></em><br><br>

## Map Maker

In [None]:
df.description_link[0]

In [None]:
import folium
import branca

f = folium.Figure(width=750, height=750)
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=9)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Bay Area companies that expanded their footprints, January 2022 to July 2022')

for i in range(0,len(df)):
    html = popup_html(i)
    iframe = branca.element.IFrame(html=html)
    popup = folium.Popup(folium.Html(html, script=True))
    folium.Marker([df['lat'].iloc[i],df['lon'].iloc[i]],
                 popup=popup).add_to(m)

m.get_root().html.add_child(folium.Element(title_html))
m.fit_bounds(bounds=df[['lat','lon']].mean().to_list(),max_zoom=15)
# m.add_to(f)
m

In [None]:
m.save('index.html')

## Map URL snagger

Map template URL: `https://trd-digital.github.io/trd-news-interactive-maps/{map-folder-name}`

In [None]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [None]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)