In [69]:
import pandas as pd
import pydeck as pdk
import ipywidgets as widgets

In [2]:
df = pd.read_csv('/Users/carboni/Downloads/Leopard.csv')

In [3]:
df.head()

Unnamed: 0,Cluster,manifest_url,canvas_number,image_url,City,Country,Title,wkt,Date,Journal Type,Unnamed: 10
0,Bête Seule (Tigre),https://iiif.unige.ch/dhportal/ug796777/manifest,26.0,https://iiif.unige.ch/iiif/2/visualcontagions/...,"Washington, D.C.",United States of America,National Geographic Society,POINT(-77.036666666667 38.895),2019-01-01,Geography,
1,Fourrure / Habit Léopard,https://iiif.unige.ch/dhportal/ug802806/manifest,17.0,https://iiif.unige.ch/iiif/2/visualcontagions/...,"Washington, D.C.",United States of America,National Geographic Society,POINT(-77.036666666667 38.895),2018-01-01,Geography,
2,Fourrure / Habit Léopard,https://iiif.unige.ch/dhportal/ug615338/manifest,74.0,https://iiif.unige.ch/iiif/2/visualcontagions/...,"Washington, D.C.",United States of America,National Geographic Society,POINT(-77.036666666667 38.895),2018-01-01,Geography,
3,Fourrure / Habit Léopard,https://iiif.unige.ch/dhportal/ug771741/manifest,75.0,https://iiif.unige.ch/iiif/2/visualcontagions/...,"Washington, D.C.",United States of America,National Geographic Society,POINT(-77.036666666667 38.895),2018-01-01,Geography,
4,Bête et Corps Léopard,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,54.0,https://digi.ub.uni-heidelberg.de/iiif/2/porta...,Gdańsk,Poland,Porta aurea : rocznik Instytutu Historii Sztuk...,POINT(18.655 54.348055555555554),2018-01-01,Art History,


### only for wkt

In [4]:
df[['longitude', 'latitude']] = df['wkt'].str.extract(r'POINT\(([^ ]+) ([^ ]+)\)')
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

### time

In [64]:
df['Date'] = pd.to_datetime(df['Date'])

In [6]:
df = df.sort_values(by='Date')

### colors

In [65]:
def calculate_color(date, start_year=1800, end_year=2000):
    year = date.year
    scale = (year - start_year) / (end_year - start_year)
    red = int(255 * (1 - scale))
    green = int(128 * scale)
    blue = int(255 * scale)
    alpha = 255
    return [red, green, blue, alpha]

df['color'] = df['Date'].apply(calculate_color)

### create lines

In [66]:
lines = []
for i in range(len(df)-1):
    line = {
        'from': [df.iloc[i]['longitude'], df.iloc[i]['latitude']],
        'to': [df.iloc[i+1]['longitude'], df.iloc[i+1]['latitude']],
        'color': df.iloc[i]['color'],
        'time': str(df.iloc[i+1]['Date'])
    }
    lines.append(line)

### Scatterplot

In [36]:
unique_locations = df[['longitude', 'latitude']].drop_duplicates().to_dict('records')
scatter_data = [{'position': [loc['longitude'], loc['latitude']]} for loc in unique_locations]

In [37]:
scatter_layer = pdk.Layer(
    "ScatterplotLayer",
    data=scatter_data,
    get_position='position',
    get_radius=100,  # Radius of the scatter plot points
    get_color=[0, 0, 255],  # Blue color for the scatter plot points
    pickable=True
)

### Line Layer

In [67]:
line_layer = pdk.Layer(
    "LineLayer",
    data=lines,
    get_source_position='from',
    get_target_position='to',
    get_width=5,
    get_color="color",
    pickable=True
)


## Viz

In [68]:
view_state = pdk.ViewState(latitude=df['latitude'].mean(), longitude=df['longitude'].mean(), zoom=5
r = pdk.Deck(layers=[line_layer], initial_view_state=view_state)
r.to_html('line_layer_dynamic_color.html')

## Lines based on content

In [73]:
def generate_color_palette(num_colors):
    import colorsys

    colors = []
    for i in range(num_colors):
        hue = i / num_colors
        lightness = 0.5
        saturation = 0.9
        rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
        colors.append([int(c * 255) for c in rgb] + [255])  # Convert to RGBA
    return colors

In [76]:
unique_clusters = df["Cluster"].unique()
color_palette = generate_color_palette(len(unique_clusters))
cluster_colors = {cluster: color_palette[i] for i, cluster in enumerate(unique_clusters)}

In [84]:
lines = []
for cluster, group_df in df.groupby('Cluster'):
    sorted_df = group_df.sort_values(by='Date')
    for i in range(len(sorted_df) - 1):
        line = {
            'from': [sorted_df.iloc[i]['longitude'], sorted_df.iloc[i]['latitude']],
            'to': [sorted_df.iloc[i + 1]['longitude'], sorted_df.iloc[i + 1]['latitude']],
            'color': cluster_colors.get(cluster, [0, 0, 0, 255]),
            'time': sorted_df.iloc[i]['Date'].strftime('%Y-%m-%d'),
            'cluster': cluster,
            'origin': sorted_df.iloc[i]['City'],
            'destination': sorted_df.iloc[i + 1]['City']
        }
        lines.append(line)

In [85]:
# Tooltip to show time, cluster, origin, and destination information
tooltip = {
    'html': '<b>Time:</b> {time}<br><b>Cluster:</b> {cluster}<br><b>Origin:</b> {origin}<br><b>Destination:</b> {destination}',
    'style': {
        'color': 'white'
    }
}

In [86]:
# Create LineLayer
line_layer = pdk.Layer(
    'LineLayer',
    data=lines,
    get_source_position='from',
    get_target_position='to',
    get_width=5,
    get_color='color',
    pickable=True
)

# Define the view state
view_state = pdk.ViewState(latitude=df['latitude'].mean(), longitude=df['longitude'].mean(), zoom=5)

# Render the deck with the updated tooltip
r = pdk.Deck(layers=[line_layer], initial_view_state=view_state, tooltip=tooltip)
r.to_html('line_layer_clusters_with_enhanced_tooltip.html')