# Imports

In [1]:
import pandas as pd
import geopandas as gpd
import googlemaps
from shapely.geometry import Point
import os
import numpy as np
import re
from textwrap import shorten
from bs4 import BeautifulSoup
import gspread

## Data read-in

In [2]:
df = pd.read_csv('Mayweather_Map - Chicago.csv')

In [3]:
# # Drop the 'URL' column if you no longer need it
# df = df.drop(columns=['URL'])

In [4]:
# df['Description'].iloc[0]

## Article Link Formatter

In [3]:
# Assuming df is your dataframe
df["short_description"] = df["description"].apply(lambda s: shorten(s, width=20, placeholder=""))
df['remaining_desc'] = df.apply(lambda row: row['description'].replace(str(row['short_description']), ''), axis=1)

# Update 'description_link' with a conditional
df["description_link"] = df.apply(
    lambda row: (
        f'<a href="{row["story_link"]}" target="_blank" rel="noopener noreferrer">{row["short_description"]}</a>'
        + row["remaining_desc"]
        if pd.notna(row["story_link"]) else row["description"]
    ), 
    axis=1
)

df['description_link']

0    <a href="https://therealdeal.com/chicago/2024/...
1    <a href="https://therealdeal.com/chicago/2024/...
Name: description_link, dtype: object

## Geocode

In [4]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)

In [5]:
def geocode(add):
    g = gmaps_key.geocode(add)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df['geocoded'] = df['full_address'].apply(geocode)

In [6]:
df

Unnamed: 0,full_address,description,story_link,alt_link,short_description,remaining_desc,description_link,geocoded
0,"200 East Randolph Street, Chicago, IL",Mayweather is an investor in office landlord 6...,https://therealdeal.com/chicago/2024/07/13/urb...,,Mayweather is an,investor in office landlord 601W Companies.,"<a href=""https://therealdeal.com/chicago/2024/...","(41.8853579, -87.6213768)"
1,"433 West Van Buren Street, Chicago, IL",Mayweather is an investor in office landlord 6...,https://therealdeal.com/chicago/2024/11/11/viz...,,Mayweather is an,investor in office landlord 601W Companies.,"<a href=""https://therealdeal.com/chicago/2024/...","(41.8766444, -87.6386958)"


In [7]:
# Convert 'geocoded' column into Shapely Point objects
df['geometry'] = df['geocoded'].apply(lambda coords: Point(coords[1], coords[0]))

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Set the CRS to WGS84 (EPSG:4326)
gdf.set_crs(epsg=4326, inplace=True)

Unnamed: 0,full_address,description,story_link,alt_link,short_description,remaining_desc,description_link,geocoded,geometry
0,"200 East Randolph Street, Chicago, IL",Mayweather is an investor in office landlord 6...,https://therealdeal.com/chicago/2024/07/13/urb...,,Mayweather is an,investor in office landlord 601W Companies.,"<a href=""https://therealdeal.com/chicago/2024/...","(41.8853579, -87.6213768)",POINT (-87.62138 41.88536)
1,"433 West Van Buren Street, Chicago, IL",Mayweather is an investor in office landlord 6...,https://therealdeal.com/chicago/2024/11/11/viz...,,Mayweather is an,investor in office landlord 601W Companies.,"<a href=""https://therealdeal.com/chicago/2024/...","(41.8766444, -87.6386958)",POINT (-87.63870 41.87664)


In [8]:
# Strip whitespace from all string columns
gdf.columns = gdf.columns.str.strip()  # Trim whitespace from column names
gdf = gdf.applymap(lambda x: x.strip() if isinstance(x, str) else x)  # Trim whitespace from string values

  gdf = gdf.applymap(lambda x: x.strip() if isinstance(x, str) else x)  # Trim whitespace from string values


In [9]:
# Convert all data to strings if possible, or remove complex structures
for col in gdf.columns:
    if gdf[col].dtype == object:
        try:
            gdf[col] = gdf[col].astype(str)
        except Exception as e:
            print(f"Could not convert column {col}: {e}")
            gdf.drop(columns=[col], inplace=True)


In [10]:
gdf.to_file('map_data.geojson', driver='GeoJSON')

In [13]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/MayweatherMapChicago


In [12]:
gdf.columns

Index(['full_address', 'description', 'story_link', 'alt_link',
       'short_description', 'remaining_desc', 'description_link', 'geocoded',
       'geometry'],
      dtype='object')

<a href="https://therealdeal.com/chicago/2024/04/03/david-gassman-sells-chicago-apartment-portfolio-for-72m/"> 6954 North Sheridan Road, Chicago</a>