In [12]:
import json
import pandas as pd
import numpy as np
import os
import folium
import googlemaps

In [3]:
df = pd.read_json("final.json")

In [4]:
df = df[~df['subject_address'].str.contains('The article does not...')]
df = df[~df['subject_address'].str.contains('The address is not...')]
df = df[~df['sale_price_text'].str.contains('The article does not...')]

In [5]:
len(df)

148

In [9]:
df.columns

Index(['url', 'title', 'subhead', 'authors', 'pub_date', 'pub_date_dt',
       'content', 'related_links', 'market', 'geographic_market',
       'subject_address', 'sale_price_text', 'sale_price_int'],
      dtype='object')

In [11]:
with open('news_articles.txt', 'w', encoding='utf-8') as f:
    for _, row in df.iterrows():
        f.write('---\n')
        f.write(f"URL: {row['url']}\n")
        f.write(f"Title: {row['title']}\n")
        f.write(f"Subhead: {row['subhead']}\n")
        f.write(f"Authors: {row['authors']}\n")
        f.write(f"Published Date: {row['pub_date']}\n")
        f.write(f"Market: {row['market']}\n")
        f.write(f"Geographic Market: {row['geographic_market']}\n")
        f.write(f"Subject Address: {row['subject_address']}\n")
        f.write(f"Sale Price (text): {row['sale_price_text']}\n")
        f.write(f"Sale Price (int): {row['sale_price_int']}\n")
        
        f.write("\nContent:\n")
        f.write(f"{row['content']}\n")
        f.write("-----------------END OF ENTRY---------------------------")
        f.write('\n\n')


In [7]:
df.to_csv("Celebrity_Data_Without_Coords.csv")

In [1]:
%store -r google_maps_API_Key
gmaps_key = googlemaps.Client(key=google_maps_API_Key)


NameError: name 'googlemaps' is not defined

In [15]:
# Define the geocode function
def geocode(add):
    g = gmaps_key.geocode(add)
    if g:
        lat = g[0]["geometry"]["location"]["lat"]
        lng = g[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    else:
        return None

# Apply geocoding to the 'geo_address' column and store the results in 'geocoded' column
df['geocoded'] = df['subject_address'].apply(geocode)

In [17]:
df['geocoded'].isna().value_counts()

geocoded
False    148
Name: count, dtype: int64

In [19]:
df['geocoded'] = df['geocoded'].astype(str)
df[['lat', 'lon']] = df['geocoded'].apply(lambda x: (None, None) if x == 'None' else x.strip('()').split(', ', 1)).apply(pd.Series)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)

In [26]:
df['geographic_market'] = df['geographic_market'].str.replace("la",'Los Angeles')
df['geographic_market'] = df['geographic_market'].str.replace("new-york",'New York')
df['geographic_market'] = df['geographic_market'].str.replace("miami",'Miami')
df['geographic_market'] = df['geographic_market'].str.replace("san-francisco",'San Francisco')
df['geographic_market'] = df['geographic_market'].str.replace("texas",'Texas')
df['geographic_market'] = df['geographic_market'].str.replace("national",'National')

In [28]:
df.columns

Index(['url', 'title', 'subhead', 'authors', 'pub_date', 'pub_date_dt',
       'content', 'related_links', 'market', 'geographic_market',
       'subject_address', 'sale_price_text', 'sale_price_int', 'geocoded',
       'lat', 'lon'],
      dtype='object')

In [34]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster, Fullscreen, MiniMap

def create_popup_html(row: pd.Series) -> str:
    """
    Generates an HTML popup for a given DataFrame row.
    
    Parameters:
        row (pd.Series): A row from the DataFrame containing article information.
        
    Returns:
        str: HTML string for the popup.
    """
    title = row.get('title', 'No Title')
    subhead = row.get('subhead', 'N/A')
    authors = row.get('authors', 'Unknown')
    pub_date = row.get('pub_date', 'N/A')
    subject_address = row.get('subject_address', 'N/A')
    sale_price_text = row.get('sale_price_text', 'N/A')
    sale_price_int = row.get('sale_price_int', 'N/A')
    url = row.get('url', '#')
    
    html = f"""
    <div class="popup-content">
        <h4 style="margin-bottom:5px;">{title}</h4>
        <p><strong>Subhead:</strong> {subhead}</p>
        <p><strong>Authors:</strong> {authors}</p>
        <p><strong>Published:</strong> {pub_date}</p>
        <p><strong>Address:</strong> {subject_address}</p>
        <p><strong>Sale Price:</strong> {sale_price_text} ({sale_price_int})</p>
        <p><a href="{url}" target="_blank">Read more</a></p>
    </div>
    """
    return html

# Assuming df is already defined and contains your data
# Use the first row's latitude and longitude to center the map
first_row = df.iloc[0]
map_center = [first_row['lat'], first_row['lon']]
# Center map on the geographic center of the contiguous US
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4, scrollWheelZoom=False)

# Add a custom Mapbox tile layer (replace with your actual Mapbox access token)
folium.TileLayer(
    tiles='https://api.mapbox.com/styles/v1/mapbox/streets-v11/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoidHJkZGF0YSIsImEiOiJjamc2bTc2YmUxY2F3MnZxZGh2amR2MTY5In0.QlOWqB-yQNrNlXD0KQ9IvQ',
    attr='Mapbox',
    name='Streets',
    overlay=True,
    control=False,
    show=False,
    min_zoom=1,
    max_zoom=20
).add_to(m)

# Add custom CSS to style the popups
custom_css = """
<style>
    .popup-content {
        min-width: 300px;
        font-size: 14px;
        line-height: 1.4;
        color: #333;
        white-space: normal;
        word-wrap: break-word;
    }
    .leaflet-popup, .leaflet-popup-content-wrapper {
        background-color: #f9f9f9;
        border: 1px solid #bbb;
        border-radius: 5px;
        padding: 8px;
        box-shadow: 0 2px 6px rgba(0,0,0,0.1);
    }
    .leaflet-popup-tip {
        background: #f9f9f9;
    }
</style>
"""
m.get_root().html.add_child(folium.Element(custom_css))

# Add a title to the map
title_html = '''
    <h3 style="text-align:center; font-family:Arial, sans-serif; font-size:18px; color:#333; margin-top:10px;">
        <b>Celebrity Map</b>
    </h3>
'''
m.get_root().html.add_child(folium.Element(title_html))

# Add additional map controls
Fullscreen().add_to(m)
MiniMap(toggle_display=True).add_to(m)

# Create a marker cluster
marker_cluster = MarkerCluster().add_to(m)

# Loop through the DataFrame to add markers
for idx, row in df.iterrows():
    lat = row['lat']
    lon = row['lon']
    popup_html = create_popup_html(row)
    
    folium.Marker(
        location=[lat, lon],
        popup=folium.Popup(popup_html, max_width=300),
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(marker_cluster)

# Display the map
m


In [35]:
m.save('index.html')

In [36]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/Celebrity_Data_Map_03_20_24
