In [1]:
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import Jitter
from bokeh.io import output_notebook, output_file, show
from bokeh.tile_providers import STAMEN_TERRAIN
from bokeh.palettes import magma, inferno, plasma, viridis

import os

import pandas as pd
import numpy as np

import pyproj
import geocoder

output_notebook()

In [2]:
fn = "Cucurbitae_Ethanol_collections_PBARC_SELECTIONS_Feb2016_SIM_selections_ms.xlsx"

In [3]:
s1 = pd.read_excel(fn, sheetname="Sheet1")
s2 = pd.read_excel(fn, sheetname="Sheet2")

In [4]:
fns = {fn.strip().split(".csv")[0]:fn for fn in os.listdir(".") if fn in ["assign.csv", "eig.csv", "ind.coord.csv", "posterior.csv", "grp.csv"]}
dfs = {n:pd.read_csv(fn) for n,fn in fns.items()}

for n,df in dfs.items():
    df.rename(columns={df.columns[0]: 'key'}, inplace=True)

posterior = dfs["posterior"].rename(columns=lambda x: x.split(".")[-1] if "posterior." in x else x)
posterior = posterior.join(dfs["assign"]["assign"])
posterior["posterior_assign"] = posterior.apply(lambda row: row[row["assign"]], axis=1)
posterior = posterior.join(dfs["grp"]["grp"])
posterior["posterior_grp"] = posterior.apply(lambda row: row[row["grp"]], axis=1)

df = dfs["ind.coord"].rename(columns=lambda x: x.split(".")[-1] if "ind.coord." in x else x)
df = df.join([dfs["assign"]["assign"], dfs["grp"]["grp"],
              posterior["posterior_assign"], posterior["posterior_grp"]])

In [5]:
df["MS"] = df["key"].apply(lambda x: int(x.split("_")[-1]) if x.split("_")[-1].isdigit() else -1)

In [6]:
def code(ms):
    for index, row in s2.iterrows():
        if row["MS_start"] <= ms <= row["MS_end"]:
            return int(row["Code"])
    return -1

In [7]:
df["Code"] = df["MS"].apply(code)

In [8]:
df = df.sort_values(by=["Code", "key"])

In [9]:
df = df.merge(s2[["Code", "Country/Territory", "Island/Province/State"]], on="Code", how="left")
df = df.merge(s1[["Code", "Locality", "Decimal Latitude", "Decimal Longitude", "Elevation", "Date collected", "Collector", "Attractant", "Sex", "Comments", "Comments 2"]], on="Code", how="left")

In [10]:
g_df = df.loc[(pd.isnull(df["Decimal Latitude"])) & (pd.notnull(df["Country/Territory"]))]
g = {name:geocoder.google(name).latlng for name in g_df["Country/Territory"].unique()}

In [11]:
df["Decimal Latitude"] = df.apply(lambda x: g[x["Country/Territory"]][0]
                                  if (pd.isnull(x["Decimal Latitude"]) & pd.notnull(x["Country/Territory"]))
                                  else x["Decimal Latitude"], axis=1)

df["Decimal Longitude"] = df.apply(lambda x: g[x["Country/Territory"]][1]
                                   if (pd.isnull(x["Decimal Longitude"]) & pd.notnull(x["Country/Territory"]))
                                   else x["Decimal Longitude"], axis=1)

In [17]:
# color and size pallets
SIZES = list(range(6, 22, 3))
COLORS = plasma(max(len(df["grp"].unique()), len(df["assign"].unique())))

# size column
groups = pd.qcut(df["posterior_assign"].values, len(SIZES))
sz = [SIZES[xx] for xx in groups.codes]

# color column
values = sorted(df["assign"].unique(), key=lambda x: int(x))
codes = dict(zip(values, range(len(values))))
groups = [codes[val] for val in df["assign"].values]
c = [COLORS[xx] for xx in groups]

# transform coords to map projection
wgs84 = pyproj.Proj(init="epsg:4326")
webMer = pyproj.Proj(init="epsg:3857")
df["easting"] = np.nan
df["northing"] = np.nan
df.loc[pd.notnull(df["Decimal Longitude"]), "easting"], df.loc[pd.notnull(df["Decimal Latitude"]), "northing"] = zip(
    *df.loc[pd.notnull(df["Decimal Longitude"])].apply(
        lambda x: pyproj.transform(wgs84, webMer, x["Decimal Longitude"], x["Decimal Latitude"]), axis=1))

# load data to a ColumnDataSource
cds_df = pd.DataFrame({"xs":df["easting"], "ys":df["northing"], "c":c, "sz":sz})
cds = ColumnDataSource(cds_df.loc[pd.notnull(cds_df["xs"])])

bound = 20000000 # meters
fig = figure(tools='pan, wheel_zoom', x_range=(-bound, bound), y_range=(-bound, bound))
fig.axis.visible = False
fig.add_tile(STAMEN_TERRAIN)
fig.circle(x={'field': "xs", 'transform': Jitter(width=}, y={'field': "ys", 'transform': Jitter(width=)}, color="c", size="sz", source=cds)
output_file("map.html")
show(fig)

TypeError: float() argument must be a string or a number, not 'Range1d'