# Finalizing the update on the pre-columbian map 
## New version: Murdock polygons at the ends of America 

In [26]:
import os
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.colors as mcolors
import mapclassify
from matplotlib.patches import Patch
from pathlib import Path
import matplotlib.patches as mpatches

In [27]:
# Set base project path
base_path = Path("C:/Users/juami/Dropbox/RAships/2-Folklore-Nathan-Project/EA-Maps-Nathan-project/Measures_work")

# Set file paths
precol_path = base_path / "maps" / "interim" / "pre_columbian_update" / "world_thiessen_UPDATE" / "EAESW_XY_World_Thiessen_Update.shp"
basemap_path = base_path / "maps" / "interim" / "World" / "world_dissolve_nolakes.shp"

EA_csv_path = base_path / "data" / "raw" / "ethnographic_atlas" / "ethnographic_atlas_east_siberia_wes_vfinal_input_ethnclusters.csv"
precol_csv_path = base_path / "data" / "interim" / "Motifs_EA_WESEE_humanvsnature_all.csv"

export_path = base_path / "deliveries" / "maps_presentation_12052025"

## Importing shapes and data 

In [28]:
# Read the shapefiles and CSV files
precol_shape = gpd.read_file(precol_path)
basemap_shape = gpd.read_file(basemap_path)

EA_df = pd.read_csv(EA_csv_path)
precol_df = pd.read_csv(precol_csv_path)

In [29]:

#Merging EA variables that are not in the precol_df
precol_df = precol_df.merge(EA_df, left_on='v107', right_on='v107', how='left')

#Fixing identifier in the shapefile
precol_df.head()

Unnamed: 0,v91_x,v92_x,v93_x,v107,v114_x,atlas,group_berezkin,n_motifs,n_triplets_scl,n_triplets_excl_scl,...,v98,v99,v100,v102,v104,v106,v112,v113,v114_y,v114_order
0,A,a,5.0,MBUTI . .,1.0,MBUTI,Mbuti & Efe Pigmies,29.0,116.0,102.0,...,3,2,0,1930.0,2,28,8.0,2.0,1.0,1.0
1,A,a,1.0,KUNG. . .,2.0,KUNG,Bushmen,60.0,201.0,187.0,...,1,0,0,1950.0,-20,21,1.0,2.0,2.0,1.0
2,A,a,7.0,NARON . .,2.0,NARON,Bushmen,60.0,201.0,187.0,...,1,0,0,1910.0,-20,24,0.0,0.0,2.0,1.0
3,A,a,8.0,XAM . . .,2.0,XAM,Bushmen,60.0,201.0,187.0,...,1,0,0,1850.0,-30,23,0.0,0.0,2.0,1.0
4,A,a,4.0,BERGDAMA.,3.0,BERGDAMA,Khoikhoi,70.0,274.0,255.0,...,1,0,0,1920.0,-22,16,1.0,0.0,3.0,1.0


## Creating one unique identifier from all the other v107 per world region

In [30]:

# Initialize v107_master with EAW_XY_Ame
precol_shape['v107_master'] = precol_shape['EAW_XY_Ame']

# Sequentially fill missing values from fallback columns
fallback_columns = ['EAW_XY_Isl', 'EAESW_XY_E', 'EAW_XY_Afr', 'v107']

for col in fallback_columns:
    precol_shape['v107_master'] = precol_shape['v107_master'].fillna(precol_shape[col])

# Filter and show rows where 'v107_master' is not null
precol_shape[precol_shape['v107_master'].isna()]

Unnamed: 0,EAW_XY_Ame,ethnicity,matched_EA,distance_t,final_ethn,v107,EA_v107_cl,EAW_XY_Isl,EAESW_XY_E,EAW_XY_Afr,Shape_Leng,Shape_Area,geometry,v107_master
287,,Algonkin,,3.486645,,,,,,,26.144084,31.907416,"POLYGON ((-75.09148 47.98179, -74.85682 47.932...",
288,,Antena,,0.946706,,,,,,,11.747592,6.784199,"POLYGON ((-144.03291 61.18249, -144.58041 61.0...",
294,,Beothuk,,9.053301,,,,,,,99.310478,13.277803,"POLYGON ((-55.89334 51.62297, -55.89134 51.616...",
296,,Bo,,4.505095,,,,,,,21.976111,4.914605,"POLYGON ((-82.27888 64.738, -82.25465 64.72163...",
301,,Comok,,0.164759,,,,,,,1.721294,0.02581,"MULTIPOLYGON (((-122.824 49.58358, -122.58263 ...",
308,,Hare,,10.535413,,,,,,,24.444795,18.840526,"MULTIPOLYGON (((-124.61987 66.21565, -124.6193...",
317,,Lake,,0.524181,,,,,,,9.799024,5.224354,"POLYGON ((-117.86534 51.67423, -117.84588 51.4...",
320,,Malecite,,0.107364,,,,,,,0.627659,9.1e-05,"POLYGON ((-69.93548 47.57414, -69.93584 47.574...",
321,,Meckenzie Eskimo,,8.607264,,,,,,,115.055372,37.959786,"MULTIPOLYGON (((-117.69488 68.97889, -117.6189...",
324,,Mountain,,9.205445,,,,,,,16.652563,13.191665,"POLYGON ((-124.79178 65.9327, -124.20238 65.70...",


In [31]:
precol_shape = precol_shape.drop(columns=['EAW_XY_Ame', 'EAW_XY_Isl', 'EAESW_XY_E', 'EAW_XY_Afr', 'v107','distance_t', 'final_ethn','EA_v107_cl', 'matched_EA'], errors='ignore')
precol_shape.rename(columns={'ethnicity': 'ethnicity_murdock'}, inplace=True)
precol_shape.head()

Unnamed: 0,ethnicity_murdock,Shape_Leng,Shape_Area,geometry,v107_master
0,,14.354424,11.523335,"POLYGON ((-100.5 20.5, -100 19.5, -100 17, -10...",TARASCO .
1,,18.556068,13.586026,"POLYGON ((-102.85135 26.98649, -106.96064 23.9...",TEPEHUAN.
2,,14.434656,11.755602,"POLYGON ((-103.61111 29.38889, -103.07143 27.5...",TARAHUMAR
3,,10.220881,2.938494,"MULTIPOLYGON (((-114.94139 27.83792, -115.0046...",KILIWA. .
4,,21.987951,6.508624,"MULTIPOLYGON (((-111.80491 26.89018, -111.8450...",SERI. . .


## Merging shape with the EA information data 

In [32]:

# Merge the shapefiles with the CSV data
precol_shape_merged = precol_shape.merge(precol_df, left_on='v107_master', right_on='v107', how='left')

precol_shape_merged.head()

Unnamed: 0,ethnicity_murdock,Shape_Leng,Shape_Area,geometry,v107_master,v91_x,v92_x,v93_x,v107,v114_x,...,v98,v99,v100,v102,v104,v106,v112,v113,v114_y,v114_order
0,,14.354424,11.523335,"POLYGON ((-100.5 20.5, -100 19.5, -100 17, -10...",TARASCO .,N,j,8.0,TARASCO .,339.0,...,99.0,0.0,0.0,1600.0,19.0,-101.0,1.0,0.0,339.0,1.0
1,,18.556068,13.586026,"POLYGON ((-102.85135 26.98649, -106.96064 23.9...",TEPEHUAN.,N,i,9.0,TEPEHUAN.,333.0,...,0.0,0.0,0.0,1920.0,27.0,-107.0,0.0,0.0,333.0,1.0
2,,14.434656,11.755602,"POLYGON ((-103.61111 29.38889, -103.07143 27.5...",TARAHUMAR,N,i,1.0,TARAHUMAR,336.0,...,46.0,0.0,0.0,1930.0,28.0,-107.0,1.0,0.0,336.0,1.0
3,,10.220881,2.938494,"MULTIPOLYGON (((-114.94139 27.83792, -115.0046...",KILIWA. .,N,c,34.0,KILIWA. .,302.0,...,43.0,53.0,0.0,1880.0,31.0,-115.0,0.0,0.0,302.0,1.0
4,,21.987951,6.508624,"MULTIPOLYGON (((-111.80491 26.89018, -111.8450...",SERI. . .,N,i,4.0,SERI. . .,334.0,...,99.0,0.0,0.0,1900.0,29.0,-112.0,0.0,0.0,334.0,1.0


## Exporting the final shape (is in GPKG to avoind problems in ArcGIS)

In [33]:
# Export the final GeoDataFrame to a shapefile (GPKG format to make it compatible with ArcGIS)
precol_shape_merged.to_file(base_path / "maps" / "interim" / "pre_columbian_update" / "world_thiessen_UPDATE" / "EAESW_XY_World_Thiessen_MurdockUpdate_EAinfo.gpkg", driver="GPKG")