In [None]:
%matplotlib ipympl

import pyogrio as ogr
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
import geopandas as gpd
from lonboard import Map, PolygonLayer
from lonboard.colormap import apply_categorical_cmap
import contextily as ctx
import pandas as pd
import seaborn as sns
import numpy as np

In [None]:
photo_locs = ogr.read_dataframe(
    "/Users/sebastian/Locals/CDP/Data/photo_locations.csv"
)
photo_locs['geometry'] = gpd.points_from_xy(photo_locs.longitude, photo_locs.latitude)
photo_locs = gpd.GeoDataFrame(photo_locs, geometry='geometry')

In [None]:


ax = photo_locs.plot()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs='EPSG:4326')
ax.set_axis_off()

In [None]:
df_goog = pd.read_json('/Users/sebastian/Locals/CDP/Data/location-history.json')

In [None]:
len(df_goog)

In [None]:
df_goog['visit_lat'] = df_goog['visit'].map(lambda x: float(x.get('topCandidate', {}).get('placeLocation', '').split(':')[1].split(',')[0]) if pd.notna(x) else None)
df_goog['visit_lon'] = df_goog['visit'].map(lambda x: float(x.get('topCandidate', {}).get('placeLocation', '').split(':')[1].split(',')[1]) if pd.notna(x) else None)

# {
#     "hierarchyLevel": "0",
#     "topCandidate": {
#         "probability": "0.378811",
#         "semanticType": "Unknown",
#         "placeID": "ChIJpXnRDVTdrIkRIsD45T_bFg0",
#         "placeLocation": "geo:35.936674,-79.024357",
#     },
#     "probability": "0.650000",
# }

In [None]:
fig, ax = plt.subplots()
ax.scatter(df_goog['visit_lon'], df_goog['visit_lat'], alpha=0.5, c='red', edgecolors='none')
ax.set_title('Visit Locations')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs='EPSG:4326')
ax.set_axis_off()


In [None]:
# {
#     "end": "geo:39.923785,-75.146028",
#     "topCandidate": {"type": "unknown", "probability": "0.000000"},
#     "distanceMeters": "4975.540039",
#     "start": "geo:39.952406,-75.190887",
# }
import matplotlib.pyplot as plt

# Extract start and end coordinates from the 'activity' key in the df_goog dataframe
df_goog['start_lat'] = df_goog['activity'].map(lambda x: float(x.get('start', '').split(':')[1].split(',')[0]) if pd.notna(x) else None)
df_goog['start_lon'] = df_goog['activity'].map(lambda x: float(x.get('start', '').split(':')[1].split(',')[1]) if pd.notna(x) else None)
df_goog['end_lat'] = df_goog['activity'].map(lambda x: float(x.get('end', '').split(':')[1].split(',')[0]) if pd.notna(x) else None)
df_goog['end_lon'] = df_goog['activity'].map(lambda x: float(x.get('end', '').split(':')[1].split(',')[1]) if pd.notna(x) else None)

# Plot the lines with arrows on the map for the whole dataset
fig, ax = plt.subplots()
for i, row in df_goog.iterrows():
    if pd.notna(row['start_lat']) and pd.notna(row['start_lon']) and pd.notna(row['end_lat']) and pd.notna(row['end_lon']):
        ax.quiver(row['start_lon'], row['start_lat'], row['end_lon'] - row['start_lon'], row['end_lat'] - row['start_lat'], 
                  angles='xy', scale_units='xy', scale=1, color='blue', alpha=0.5)

ax.set_title('Routes from Start to End')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs='EPSG:4326')
ax.set_axis_off()




In [None]:

# Filter out rows that don't have visit data
df_goog_v = df_goog.dropna(subset=['visit_lat', 'visit_lon'])

# Plot the heatmap using seaborn
fig, ax = plt.subplots()
heatmap = sns.histplot(data=df_goog_v, x='visit_lon', y='visit_lat', bins=50, pmax=0.8, cmap='hot', ax=ax)

# Add colorbar and labels
# cbar = heatmap.collections[0].colorbar
# cbar.set_label('Number of Visits')
ax.set_title('Heatmap of Visit Data')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs='EPSG:4326')
ax.set_axis_off()




In [None]:
# Plot the heatmap using seaborn
fig, ax = plt.subplots()
heatmap = sns.kdeplot(data=df_goog_v, x='visit_lon', y='visit_lat', fill=True, cmap='hot', ax=ax, thresh=0, levels=100)

# Add colorbar and labels
# cbar = heatmap.collections[0].colorbar
# cbar.set_label('Number of Visits')
ax.set_title('Heatmap of Visit Data')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
plt.show()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs='EPSG:4326')
ax.set_axis_off()

In [None]:
import folium
from folium.plugins import MarkerCluster

# Define the bounding box for New York City
nyc_bbox = (-74.25909, 40.477399, -73.700272, 40.917577)

# Filter the data to include only points within the NYC bounding box
photo_locs_nyc = photo_locs.cx[nyc_bbox[0]:nyc_bbox[2], nyc_bbox[1]:nyc_bbox[3]]

# Plot the points on the map
fig, ax = plt.subplots()
# photo_locs_nyc.plot(ax=ax, marker='o', color='red', markersize=10, alpha=0.5)
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12)

# Add points to the map
marker_cluster = MarkerCluster().add_to(m)
for idx, row in photo_locs_nyc.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']]).add_to(marker_cluster)

# Save the map to an HTML file
m



In [None]:

df_goog_ac = df_goog.dropna(subset=['activity'])

In [None]:
# Extract all the activity types from df_goog

activity_types = df_goog_ac['activity'].apply(lambda x: x['topCandidate']['type'] if 'topCandidate' in x else None)
df_goog_ac['activity_type'] = activity_types
# Display the unique activity types
unique_activity_types = activity_types.unique()
print(unique_activity_types)

In [None]:
import matplotlib.pyplot as plt

# Plot a histogram of activity types
plt.figure(figsize=(10, 6))
activity_types.value_counts().plot(kind='bar')
plt.title('Histogram of Activity Types')
plt.xlabel('Activity Type')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()


In [None]:
x=df_nyc.groupby(['hex_id', 'activity'])

In [None]:
import geopandas as gpd
import h3
import numpy as np
import lonboard
from shapely.geometry import Polygon

# Filter the dataset for only New York City
nyc_bounds = [-74.25909, 40.477399, -73.700272, 40.917577]
df_nyc = df_goog_ac[(df_goog_ac['start_lon'] >= nyc_bounds[0]) & (df_goog_ac['start_lon'] <= nyc_bounds[2]) &
                    (df_goog_ac['start_lat'] >= nyc_bounds[1]) & (df_goog_ac['start_lat'] <= nyc_bounds[3])]

# Create hexagon bins
resolution = 8
df_nyc['hex_id'] = df_nyc.apply(lambda row: h3.geo_to_h3(row['start_lat'], row['start_lon'], resolution), axis=1)

# Compute the number of times traveled in each hexagon bin for different activity types
hex_activity_counts = df_nyc.groupby(['hex_id', 'activity_type']).size().unstack(fill_value=0)

# Add a total trip column summing all the others
hex_activity_counts['total_trips'] = hex_activity_counts.sum(axis=1)

hex_activity_counts = hex_activity_counts.reset_index()

In [None]:
hex_activity_counts.index

In [None]:
# Write geojson file
# Function to get polygon for H3 cell
def h3_to_polygon(h3_cell):
    boundary = h3.h3_to_geo_boundary(h3_cell, geo_json=True)
    return Polygon(boundary)

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(hex_activity_counts, geometry=hex_activity_counts['hex_id'].apply(h3_to_polygon))

# Convert GeoDataFrame to GeoJSON
geojson = gdf.to_json()

# Save GeoJSON to file
with open('h3_trip_data.geojson', 'w') as f:
    f.write(geojson)

print("GeoJSON file created successfully.")

In [None]:
hex_activity_counts.columns