In [1]:
import pandas as pd
import geopandas as gpd
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.models import LabelSet, HoverTool
from bokeh.palettes import Viridis, Category20

In [None]:
# Load combined dataset
data = pd.read_csv('minard_fixed.csv')
print(data.head())

# Load the rivers data
rivers = gpd.read_file("rivers_europe.geojson")

   lonc  latc       city  lont  temp  days    date  lonp  latp  surviv direc  \
0  24.0  55.0      Kowno  37.6   0.0   6.0  Oct-18  24.0  54.9  340000     A   
1  25.3  54.7      Wilna  36.0   0.0   6.0  Oct-24  24.5  55.0  340000     A   
2  26.4  54.4   Smorgoni  33.2  -9.0  16.0  Nov-09  25.5  54.5  340000     A   
3  26.8  54.3  Molodexno  32.0 -21.0   5.0  Nov-14  26.0  54.7  320000     A   
4  27.7  55.2  Gloubokoe  29.2 -11.0  10.0       -  27.0  54.8  300000     A   

   division  
0         1  
1         1  
2         1  
3         1  
4         1  


In [3]:
# Parse the data into separate dataframes
cities = data[["lonc", "latc", "city"]].dropna()
cities.columns = ['Longitude', 'Latitude', 'City']

temperatures = data[["temp", "date"]].dropna()

troops = data[["lonp", "latp", "surviv", "direc", "division"]].dropna()
troops_attack = troops[troops.direc == 'A']
troops_retreat = troops[troops.direc == 'R']




In [None]:
# Helper function to extract coordinates from a geometry
def extract_coordinates(geometry):
    if geometry.geom_type == 'LineString':
        return [list(geometry.coords.xy[0]), list(geometry.coords.xy[1])]
    elif geometry.geom_type == 'MultiLineString':
        xs, ys = [], []
        for line in geometry.geoms:  
            xs.append(list(line.coords.xy[0]))
            ys.append(list(line.coords.xy[1]))
        return xs, ys
    else:
        return None, None



In [5]:
# Extracting xs and ys from the river geometries
xs, ys = [], []
for geom in rivers.geometry:
    geom_xs, geom_ys = extract_coordinates(geom)
    if geom_xs and geom_ys:
        xs.extend(geom_xs)
        ys.extend(geom_ys)



In [6]:
# Set up the Bokeh plot
output_file("minard_plot.html")
p = figure(title="Napoleon's Russian Campaign", 
           x_range=(23, 38.2), 
           y_range=(53.3, 57), 
           width=1000, 
           height=600)

# Add the rivers
p.multi_line(xs=xs, ys=ys, color='#e5e5e5', line_width=2)

# Add the cities
city_source = ColumnDataSource(cities)
p.scatter(x='Longitude', y='Latitude', size=10, source=city_source, color='#475768', legend_label='Cities', level='overlay')

# Add city labels
city_labels = LabelSet(x='Longitude', y='Latitude', text='City', 
                       source=city_source, text_font_size='10pt',
                       text_align='left', text_baseline='right')
p.add_layout(city_labels)

# Add the temperature plot
temp_source = ColumnDataSource(temperatures)
p.line(x='date', y='temp', color='#EAC260', line_width=3, line_dash='dotted')

# Add temperature labels
temp_labels = LabelSet(x='date', y='temp', text='temp', 
                       source=temp_source, text_font_size='10pt',
                       text_align='center', text_baseline='middle')
p.add_layout(temp_labels)



In [7]:
# Create color palettes for attack and retreat
attack_colors = Viridis[3]
retreat_colors = Viridis[4]

# Use defined colors
attack_colors = ['#F0E68C', '#FFDAB9', '#D2B48C', '#f5f600']
retreat_colors = ['#20B2AA', '#87CEFA', '#6495ED', '#f5f600']

# Add the troops with updated logic for line widths
for div in (1, 2):
    attack_data = troops_attack[troops_attack.division == div]
    retreat_data = troops_retreat[troops_retreat.division == div]

    if attack_data.empty or retreat_data.empty:
        continue
    
    # Ensure there are no index issues
    line_widths = [x / 6000 for x in attack_data.surviv] if len(attack_data.surviv) > 0 else [1]  # Default to width 1 if empty

    p.multi_line(xs=[list(attack_data.lonp)], 
                 ys=[list(attack_data.latp)], 
                 color=attack_colors[div - 1], 
                 line_width=line_widths, 
                 legend_label=f"Div {div} Attack")
    
    line_widths_retreat = [x / 6000 for x in retreat_data.surviv] if len(retreat_data.surviv) > 0 else [1]
    p.multi_line(xs=[list(retreat_data.lonp)], 
                 ys=[list(retreat_data.latp)], 
                 color=retreat_colors[div - 1], 
                 line_width=line_widths_retreat, 
                 legend_label=f"Div {div} Retreat")





In [8]:
# Update Div 1 attack path to extend to Moscou
if not troops_attack.empty and not troops_retreat.empty:
    div1_attack_data = troops_attack[troops_attack.division == 1]
    div1_retreat_data = troops_retreat[troops_retreat.division == 1]

    # Check if the attack path is missing the last point to connect to the retreat path
    if div1_attack_data.iloc[-1]['lonp'] != div1_retreat_data.iloc[0]['lonp'] or \
       div1_attack_data.iloc[-1]['latp'] != div1_retreat_data.iloc[0]['latp']:
        # Add a new row to the attack data to extend the path to the first retreat point
        new_row = {'lonp': div1_retreat_data.iloc[0]['lonp'],
                   'latp': div1_retreat_data.iloc[0]['latp'],
                   'surviv': div1_attack_data.iloc[-1]['surviv'],
                   'direc': 'A',
                   'division': 1}
        div1_attack_data = pd.concat([div1_attack_data, pd.DataFrame([new_row])], ignore_index=True)

    p.multi_line(xs=[list(div1_attack_data.lonp)], 
                 ys=[list(div1_attack_data.latp)], 
                 color=attack_colors[0], 
                 line_width=[x / 6000 for x in div1_attack_data.surviv], 
                 legend_label="Div 1 Attack")
    
    p.multi_line(xs=[list(div1_retreat_data.lonp)], 
                 ys=[list(div1_retreat_data.latp)], 
                 color=retreat_colors[0], 
                 line_width=[x / 6000 for x in div1_retreat_data.surviv], 
                 legend_label="Div 1 Retreat")
    




In [None]:
# Update Div 2 attack path to extend to Polotzk
if not troops_attack.empty and not troops_retreat.empty:
    div2_attack_data = troops_attack[troops_attack.division == 2]
    div2_retreat_data = troops_retreat[troops_retreat.division == 2]

    # Check if the attack path is missing the last point to connect to the retreat path
    if div2_attack_data.iloc[-1]['lonp'] != div2_retreat_data.iloc[0]['lonp'] or \
       div2_attack_data.iloc[-1]['latp'] != div2_retreat_data.iloc[0]['latp']:
        # Add a new row to the attack data to extend the path to the first retreat point
        new_row = {'lonp': div2_retreat_data.iloc[0]['lonp'],
                   'latp': div2_retreat_data.iloc[0]['latp'],
                   'surviv': div2_attack_data.iloc[-1]['surviv'],  
                   'direc': 'A',
                   'division': 2}
        div2_attack_data = pd.concat([div2_attack_data, pd.DataFrame([new_row])], ignore_index=True)
    
    # Calculate line widths dynamically based on survivors
    div2_attack_line_widths = [x / 6000 for x in div2_attack_data.surviv]
    div2_retreat_line_widths = [x / 6000 for x in div2_retreat_data.surviv]
    
    p.multi_line(xs=[list(div2_attack_data.lonp)], 
                 ys=[list(div2_attack_data.latp)], 
                 color=attack_colors[1],  
                 line_width=div2_attack_line_widths, 
                 legend_label="Div 2 Attack")
    
    p.multi_line(xs=[list(div2_retreat_data.lonp)], 
                 ys=[list(div2_retreat_data.latp)], 
                 color=retreat_colors[1],  
                 line_width=div2_retreat_line_widths, 
                 legend_label="Div 2 Retreat")
    





In [None]:
p.xaxis.axis_label = "Longitude"
p.yaxis.axis_label = "Latitude"

p.add_tools(HoverTool(tooltips=[("Longitude", "$x"), ("Latitude", "$y")]))
p.legend.click_policy = "hide"
p.legend.location = "top_left"
p.legend.label_height = 10
p.legend.label_width = 10
p.legend.spacing = 15

show(p)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='date' [no close matches], y='temp' [no close matches] {renderer: GlyphRenderer(id='p1074', ...)}
