## Imports

In [60]:
from geopy.geocoders import GoogleV3
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import numpy as np
import urllib3
import re
from textwrap import shorten
import os

In [61]:
df = pd.read_csv('Bay Area companies that decided to put property up for sale, Jan. 1, 2022, to July 5, 2022 - Sheet1.csv')

In [62]:
df.columns

Index(['Company name ', 'Home city ', 'Category ',
       'Address(es) company put or plans to put on the market for sale ',
       'Size of the property company put or plans to put on the market for sale, in sf',
       'Asking price ', 'Taker found? ',
       'Date TRD wrote about property coming to market for sale',
       'Link to story'],
      dtype='object')

## Image URL collector

In [63]:
# Drop last n rows of a df. In case there are sums that need clipped
# df.drop(df.tail(n).index, inplace=True)

In [64]:
df['image_links'] = ''
links = []
for url in df['Link to story']:
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        images = soup.find_all('img')
        counter = 0
        for image in images:
            counter += 1
            if 'jpg' in image['src']:
                links.append(image['src'])
                if counter == len(images):
                    links.append('no image found')
                break
    except:
        links.append('no_image')

df['image_links'] = links

## Article link formatter

In [65]:
links[0]

'https://therealdeal.com/sanfrancisco/wp-content/uploads/2022/05/main-SF-State-Bar-to-sell-250K-sf-HQ-building-in-San-Francisco-705x439.jpg'

In [66]:
df['Company name ']

0    State Bar of California 
1                Wells Fargo 
2                         UBS
3                    Chevron 
Name: Company name , dtype: object

In [84]:
df["short_description"] = df["Company name "].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row : row['Company name '].replace(str(row['short_description']), ''), axis=1)
df['remaining_desc']

df["description_link"] = '<a href="' + df["Link to story"] + '">' + df["short_description"] + "</a>" + df["remaining_desc"]
df['description_link']

0    <a href="https://therealdeal.com/sanfrancisco/...
1    <a href="https://therealdeal.com/sanfrancisco/...
2    <a href="https://therealdeal.com/sanfrancisco/...
3    <a href="https://therealdeal.com/sanfrancisco/...
Name: description_link, dtype: object

In [68]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/05/12/state-bar-to-sell-250k-sf-hq-building-in-san-francisco/">State</a> Bar of California '

## Google Maps API Geolocater Setup

In [85]:
%store -r google_maps_API_Key
geolocator = GoogleV3(api_key=google_maps_API_Key)

In [86]:
df['geo_address'] = df['Address(es) company put or plans to put on the market for sale '] + ' CA'
df['loc'] = df['geo_address'].apply(geolocator.geocode, timeout=10)
df['point'] = df['loc'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['lat','lon','altitude']] = pd.DataFrame(df['point'].to_list(),index=df.index)

## HTML popup formatter

In [87]:
def popup_html(row):
    i = row
    address = df['Address(es) company put or plans to put on the market for sale '].iloc[i]
    developer = df['Size of the property company put or plans to put on the market for sale, in sf'].iloc[i]
    description = df['description_link'].iloc[i]
    image = df['image_links'].iloc[i]
    
    html = '''<!DOCTYPE html>
    <html>
    <img src={} width="256" height="156">'''.format(image) + '''<br>______________________________________<br>
    Address: <em>{}'''.format(address) + '''</em><br>
    Size of property: <strong><em>{}'''.format(developer) + '''</strong></em><br><br>
    Company Name: <strong>{}'''.format(description) + '''</strong></html>
    '''
    return html


### HTML reservoir

In [72]:
#     Developer(s): <strong><em>{}'''.format(developer) + '''</strong></em><br><br>

## Map Maker

In [88]:
df.description_link[0]

'<a href="https://therealdeal.com/sanfrancisco/2022/05/12/state-bar-to-sell-250k-sf-hq-building-in-san-francisco/">State Bar of</a> California '

In [89]:
import folium
import branca

f = folium.Figure(width=750, height=750)
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(),zoom_start=11)

title_html = '''
              <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(f'Bay Area companies that decided to put property up for sale, Jan. 1, 2022, to July 5, 2022')

for i in range(0,len(df)):
    html = popup_html(i)
    iframe = branca.element.IFrame(html=html)
    popup = folium.Popup(folium.Html(html, script=True))
    folium.Marker([df['lat'].iloc[i],df['lon'].iloc[i]],
                 popup=popup).add_to(m)

m.get_root().html.add_child(folium.Element(title_html))
m.fit_bounds(bounds=df[['lat','lon']].mean().to_list(),max_zoom=15)
# m.add_to(f)
m

In [90]:
m.save('index.html')

## Map URL snagger

Map template URL: `https://trd-digital.github.io/trd-news-interactive-maps/{map-folder-name}`

In [91]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

In [92]:
cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name  + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/bay_area_properties_for_sale
