In [8]:
import pandas as pd
import plotnine as pn

In [9]:
# Constants for paper
params = [
    0, 1, 2, 4, 8, 16,
]

num_trials = 6

In [10]:
# Filepaths

#############################################################################
# Analysis files
#############################################################################

# Pareto frontier
curve_fn = f"/Users/nathanielimel/lps/projects/rdsg/outputs/states=10/signals=10/distortion=squared_dist/curve_points.csv"

# Explored points
sampled_fn = f"/Users/nathanielimel/lps/projects/rdsg/outputs/states=10/signals=10/distortion=squared_dist/sampled_points.csv"

# Counterpart points
counterpart_fn = f"/Users/nathanielimel/lps/projects/rdsg/outputs/states=10/signals=10/distortion=squared_dist/counterpart_points.csv"

# Counterpart trajectories
counterpart_traj_fn = f"/Users/nathanielimel/lps/projects/rdsg/outputs/states=10/signals=10/distortion=squared_dist/counterpart_trajectories.csv"

#############################################################################
# Simulation points
#############################################################################

# Reinforcement learning path
rl_fns = {
    f"{float(param)}":f"/Users/nathanielimel/lps/projects/rdsg/multirun/states=10/signals=10/distortion=squared_dist/similarity=nosofsky_normed/dynamics=reinforcement_learning/num_trials={num_trials}/sim_param={param}/simulation_points.csv"
    for param in params
}

rl_traj_fns = {
    key: val.replace("simulation_points", "mean_points") for key, val in rl_fns.items()
}

# spillover generalization learning
gl_fns = {
    key: value.replace("reinforcement_learning", "spillover_learning") for key, value in rl_fns.items()
}

gl_traj_fns = {
    key: val.replace("simulation_points", "mean_points") for key, val in gl_fns.items()
}

# Replicator dynamic path
rep_fns = {
    key: value.replace("reinforcement_learning", "replicator_dynamic") for key, value in rl_fns.items()
}

rep_traj_fns = {
    key: val.replace("simulation_points", "mean_points") for key, val in rep_fns.items()
}

# Noisy replicator dynamic path
nrd_fns = {
    key: value.replace("replicator_dynamic", "noisy_replicator_dynamic") for key, value in rep_fns.items()
}

nrd_traj_fns = {
    key: val.replace("simulation_points", "mean_points") for key, val in nrd_fns.items()
}

In [11]:
# load dataframes
curve_data = pd.read_csv(curve_fn)
sampled_data = pd.read_csv(sampled_fn)
counterpart_final_data = pd.read_csv(counterpart_fn)
counterpart_traj_data = pd.read_csv(counterpart_traj_fn)

rl_sim_dfs = {k: pd.read_csv(rl_fns[k]) for k in rl_fns}
rl_trajs_dfs = {k: pd.read_csv(rl_traj_fns[k]) for k in rl_traj_fns}

gl_sim_dfs = {k: pd.read_csv(gl_fns[k]) for k in gl_fns}
gl_trajs_dfs = {k: pd.read_csv(gl_traj_fns[k]) for k in gl_traj_fns}

rep_sim_dfs = {k: pd.read_csv(rep_fns[k]) for k in rep_fns}
rep_trajs_dfs = {k: pd.read_csv(rep_traj_fns[k]) for k in rep_traj_fns}

nrd_sim_dfs = {k: pd.read_csv(nrd_fns[k]) for k in nrd_fns}
nrd_trajs_dfs = {k: pd.read_csv(nrd_traj_fns[k]) for k in nrd_traj_fns}

In [12]:
rl = "reinforcement_learning"
gl = "spillover_learning"
rep = "replicator_dynamic"
nrd = "noisy_replicator_dynamic"

sim = "single_trial"
traj = "trajectory"

ba = "blahut_arimoto"

# label each df to be concatenated with a columns
def concat_with_columns(dfs: dict[str, pd.DataFrame], point_type: str, dynamic: str) -> pd.DataFrame:
    # add alpha for each dataframe
    for key in dfs:
        df = dfs[key]
        df["alpha"] = key # use string for category

    df = pd.concat(dfs.values())

    # annotate simulation-level info
    df["point_type"] = point_type
    df["dynamic"] = dynamic
    return df


# handle counterpart differently

counterpart_final_data["point_type"] = sim
counterpart_traj_data["point_type"] = traj
counterpart_final_data["dynamic"] = ba
counterpart_traj_data["dynamic"] = ba

rl_sim_data = concat_with_columns(rl_sim_dfs, sim, rl)
rl_traj_data = concat_with_columns(rl_trajs_dfs, traj, rl)

gl_sim_data = concat_with_columns(gl_sim_dfs, sim, gl)
gl_traj_data = concat_with_columns(gl_trajs_dfs, traj, gl)

rep_sim_data = concat_with_columns(rep_sim_dfs, sim, rep)
rep_traj_data = concat_with_columns(rep_trajs_dfs, traj, rep)

nrd_sim_data = concat_with_columns(nrd_sim_dfs, sim, nrd)
nrd_traj_data = concat_with_columns(nrd_trajs_dfs, traj, nrd)

# annotate sampled data
sampled_data["dynamic"] = "sampling"

In [13]:
# concatenate all simulation-related (e.g. NOT sampled) data into one dataframe
data = pd.concat([
    counterpart_final_data,
    counterpart_traj_data,
    rl_sim_data, 
    rl_traj_data,
    gl_sim_data,
    gl_traj_data,
    rep_sim_data, 
    rep_traj_data,
    nrd_sim_data, 
    nrd_traj_data,
]).reset_index()
data

Unnamed: 0,index,rate,distortion,beta,alpha,point_type,dynamic,round
0,0,3.321928e+00,0.000000,1000.000000,0.0,single_trial,blahut_arimoto,
1,1,1.861867e+00,0.485263,1.000000,1.0,single_trial,blahut_arimoto,
2,2,9.631310e-01,1.822883,0.250000,2.0,single_trial,blahut_arimoto,
3,3,5.753089e-02,7.616357,0.062500,4.0,single_trial,blahut_arimoto,
4,4,1.559873e-03,8.380513,0.015625,8.0,single_trial,blahut_arimoto,
...,...,...,...,...,...,...,...,...
1202876,1,2.468148e-09,16.307320,,16.0,trajectory,noisy_replicator_dynamic,1.0
1202877,2,4.692543e-14,16.308037,,16.0,trajectory,noisy_replicator_dynamic,2.0
1202878,3,4.440892e-16,16.308040,,16.0,trajectory,noisy_replicator_dynamic,3.0
1202879,4,0.000000e+00,16.308040,,16.0,trajectory,noisy_replicator_dynamic,4.0


## Generate plot for points and trajectories, for each dynamic

In [14]:
# let alpha just be integer valued (and then string valued for categorical var)
data["alpha"] = data["alpha"].astype(float)
data["alpha"] = data["alpha"].astype(int)
data["alpha"] = data["alpha"].astype(str)

data = data.assign(
    alpha=pd.Categorical(
        data["alpha"],
        categories=[str(param) for param in params]
    )
)
data # N.B.: nans are expected for 'round' column

Unnamed: 0,index,rate,distortion,beta,alpha,point_type,dynamic,round
0,0,3.321928e+00,0.000000,1000.000000,0,single_trial,blahut_arimoto,
1,1,1.861867e+00,0.485263,1.000000,1,single_trial,blahut_arimoto,
2,2,9.631310e-01,1.822883,0.250000,2,single_trial,blahut_arimoto,
3,3,5.753089e-02,7.616357,0.062500,4,single_trial,blahut_arimoto,
4,4,1.559873e-03,8.380513,0.015625,8,single_trial,blahut_arimoto,
...,...,...,...,...,...,...,...,...
1202876,1,2.468148e-09,16.307320,,16,trajectory,noisy_replicator_dynamic,1.0
1202877,2,4.692543e-14,16.308037,,16,trajectory,noisy_replicator_dynamic,2.0
1202878,3,4.440892e-16,16.308040,,16,trajectory,noisy_replicator_dynamic,3.0
1202879,4,0.000000e+00,16.308040,,16,trajectory,noisy_replicator_dynamic,4.0


In [15]:
# now concat sampled data 
data = pd.concat([data, sampled_data])
data

Unnamed: 0,index,rate,distortion,beta,alpha,point_type,dynamic,round
0,0.0,3.321928,0.000000,1000.000000,0,single_trial,blahut_arimoto,
1,1.0,1.861867,0.485263,1.000000,1,single_trial,blahut_arimoto,
2,2.0,0.963131,1.822883,0.250000,2,single_trial,blahut_arimoto,
3,3.0,0.057531,7.616357,0.062500,4,single_trial,blahut_arimoto,
4,4.0,0.001560,8.380513,0.015625,8,single_trial,blahut_arimoto,
...,...,...,...,...,...,...,...,...
994,,0.204579,15.741024,,,,sampling,
995,,0.219027,15.459583,,,,sampling,
996,,0.697477,16.150000,,,,sampling,
997,,0.136390,15.709083,,,,sampling,


In [16]:
rl_points = data[data["dynamic"] == rl]
rl_emergent = rl_points[rl_points["point_type"] == sim]

gl_points = data[data["dynamic"] == gl]
gl_emergent = gl_points[gl_points["point_type"] == sim]

rep_points = data[data["dynamic"] == rep]
rep_emergent = rep_points[rep_points["point_type"] == sim]

nrd_points = data[data["dynamic"] == nrd]
nrd_emergent = nrd_points[nrd_points["point_type"] == sim]

rl_emergent["language"] = "learned"
gl_emergent["language"] = "noisy_learned"
rep_emergent["language"] = "evolved"
nrd_emergent["language"] = "noisy_evolved"

sampled_points = data[data["dynamic"] == "sampling"]
sampled_points["language"] = "hypothetical"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [17]:
# extract the trajectory data
rl_trajs = rl_points[rl_points["point_type"] == traj]
gl_trajs = gl_points[gl_points["point_type"] == traj]
rep_trajs = rep_points[rep_points["point_type"] == traj]
nrd_trajs = nrd_points[nrd_points["point_type"] == traj]

# including 'counterparts' from B-A optimization
counterpart_data = data[data["dynamic"] == "blahut_arimoto"]
# set the language column for counterparts
counterpart_data["language"] = "optimal \ncounterpart"
counterpart_points = counterpart_data[counterpart_data["point_type"] == sim]
counterpart_trajs = counterpart_data[counterpart_data["point_type"] == traj]

# set the final rounds
rl_final_round_data = rl_trajs[rl_trajs["round"] == rl_trajs["round"].max()]
gl_final_round_data = gl_trajs[gl_trajs["round"] == gl_trajs["round"].max()]
rep_final_round_data = rep_trajs[rep_trajs["round"] == rep_trajs["round"].max()]
nrd_final_round_data = nrd_trajs[nrd_trajs["round"] == nrd_trajs["round"].max()]
counterpart_final_round_data = counterpart_trajs[counterpart_trajs["round"] == counterpart_trajs["round"].max()]

rl_final_round_data["language"] = "learned"
gl_final_round_data["language"] = "noisy_learned"
rep_final_round_data["language"] = "evolved"
nrd_final_round_data["language"] = "noisy_evolved"

display(rl_trajs)
display(rep_final_round_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Unnamed: 0,index,rate,distortion,beta,alpha,point_type,dynamic,round
1242,0.0,0.008551,16.635750,,0,trajectory,reinforcement_learning,0.0
1243,1.0,0.008551,16.635750,,0,trajectory,reinforcement_learning,1.0
1244,2.0,0.008551,16.635750,,0,trajectory,reinforcement_learning,2.0
1245,3.0,0.008551,16.635750,,0,trajectory,reinforcement_learning,3.0
1246,4.0,0.008654,16.603641,,0,trajectory,reinforcement_learning,4.0
...,...,...,...,...,...,...,...,...
601237,99995.0,0.022911,14.879220,,16,trajectory,reinforcement_learning,99995.0
601238,99996.0,0.022911,14.879195,,16,trajectory,reinforcement_learning,99996.0
601239,99997.0,0.022910,14.879220,,16,trajectory,reinforcement_learning,99997.0
601240,99998.0,0.022910,14.879237,,16,trajectory,reinforcement_learning,99998.0


Unnamed: 0,index,rate,distortion,beta,alpha,point_type,dynamic,round,language
1201572,199.0,1.835807,1.000006,,2,trajectory,replicator_dynamic,199.0,evolved
1201772,199.0,1.16362,3.026176,,4,trajectory,replicator_dynamic,199.0,evolved
1201972,199.0,0.57776,6.030484,,8,trajectory,replicator_dynamic,199.0,evolved
1202172,199.0,1.6e-05,14.499798,,16,trajectory,replicator_dynamic,199.0,evolved


In [None]:
# Define a float-valued column in data to get color scale, called "imprecision"

def add_imprecision_column(df):
    df["imprecision"] = df["alpha"]

[
    add_imprecision_column(df) for df in 
    [rl_emergent, rl_trajs, rl_final_round_data] + 
    [gl_emergent, gl_trajs, gl_final_round_data] +
    [rep_emergent, rep_trajs, rep_final_round_data] + 
    [nrd_emergent, nrd_trajs, nrd_final_round_data]
]


# add_imprecision_column(rl_emergent)
# add_imprecision_column(rl_trajs)
# add_imprecision_column(rl_final_round_data)

# add_imprecision_column(gl_emergent)
# add_imprecision_column(gl_trajs)
# add_imprecision_column(gl_final_round_data)

# add_imprecision_column(rep_emergent)
# add_imprecision_column(rep_trajs)
# add_imprecision_column(rep_final_round_data)


add_imprecision_column(counterpart_points)

# rl_emergent["imprecision"] = rl_emergent["alpha"]
# rep_emergent["imprecision"] = rep_emergent["alpha"]
# rl_trajs["imprecision"] = rl_trajs["alpha"]
# rep_trajs["imprecision"] = rep_trajs["alpha"]
# rl_final_round_data["imprecision"] = rl_final_round_data["alpha"]
# rep_final_round_data["imprecision"] = rep_final_round_data["alpha"]
# counterpart_points["imprecision"] = counterpart_points["alpha"]

### Discrete-Time Replicator Dynamic

#### Points

In [None]:
plot_rep = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point(  # hypothetical langs
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(size=1) # pareto data                   
    + pn.geom_jitter( # emergent langs
        data=rep_emergent,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        alpha=0.3,
        size=5,
    )
    + pn.geom_point( # theoretical bound langs last
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )    
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")
    # + pn.ggtitle("Discrete-time Replicator Dynamic")
    + pn.theme_classic()
)
print(plot_rep)

#### Trajectories

In [None]:
plot_rep_traj = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point(
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(  # simulation langs
        data=rep_trajs,
        mapping=pn.aes(color="imprecision"),
        alpha=1.0,
        size=1,
    )
    + pn.geom_line(size=1) # pareto     
    + pn.geom_point(  # final langs
        data=rep_final_round_data,
        mapping=pn.aes(fill="imprecision", shape="language"),
        # shape="X",
        size=5,
    ) 
    + pn.geom_point( # theoretical bound langs
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )               
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")
    + pn.theme_classic()
)
print(plot_rep_traj)

### Roth-Erev learning

#### Points

In [None]:
plot_rl = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point(  # hypothetical langs
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(size=1) # pareto data                   
    + pn.geom_jitter( # emergent
        data=rl_emergent,
        mapping=pn.aes(
            fill="imprecision",
            shape="language", 
            ),
        alpha=0.3,
        size=5,
    )
    + pn.geom_point( # theoretical bound langs last
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )    
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")    
    + pn.theme_classic()    
)
print(plot_rl)

#### Trajectories

In [None]:
plot_rl_traj = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point( 
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(  # simulation langs
        data=rl_trajs,
        mapping=pn.aes(color="imprecision"),
        alpha=1.0,
        size=1,
    )
    + pn.geom_line(size=1) # pareto
    + pn.geom_point(  # final langs
        data=rl_final_round_data,
        mapping=pn.aes(fill="imprecision", shape="language"),
        size=5,
    )
    + pn.geom_point( # theoretical bound langs
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")        
    + pn.theme_classic()
)
print(plot_rl_traj)

### Spillover Learning

#### Points

In [None]:
plot_gl = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point(  # hypothetical langs
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(size=1) # pareto data                   
    + pn.geom_jitter( # emergent
        data=gl_emergent,
        mapping=pn.aes(
            fill="imprecision",
            shape="language", 
            ),
        alpha=0.3,
        size=5,
    )
    + pn.geom_point( # theoretical bound langs last
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )    
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")    
    + pn.theme_classic()    
)
print(plot_gl)

#### Trajectories

In [None]:
plot_gl_traj = (
    # Set data and the axes
    pn.ggplot(
        data=curve_data, mapping=pn.aes(x="rate", y="distortion")
    )  
    + pn.geom_point( 
        data=sampled_points,
        mapping=pn.aes(shape="language"),
        color="gray",
        size=3,
        alpha=0.1,
    )
    + pn.geom_line(  # simulation langs
        data=gl_trajs,
        mapping=pn.aes(color="imprecision"),
        alpha=1.0,
        size=1,
    )
    + pn.geom_line(size=1) # pareto
    + pn.geom_point(  # final langs
        data=gl_final_round_data,
        mapping=pn.aes(fill="imprecision", shape="language"),
        size=5,
    )
    + pn.geom_point( # theoretical bound langs
        data=counterpart_points,
        mapping=pn.aes(
            fill="imprecision",
            shape="language",
            ),
        size=5,
    )
    + pn.xlab("Complexity $I(S;\hat{S})$")
    + pn.ylab("Communicative Cost $D[S, \hat{S}]$")        
    + pn.theme_classic()
)
print(plot_gl_traj)

#### Save plots

In [None]:
# def save_plot(fn: str, plot: pn.ggplot, width=10, height=10, dpi=300) -> None:
#     """Save a plot with some default settings."""
#     plot.save(fn, width=10, height=10, dpi=300)

# save_plot("rep_main", plot_rep)
# save_plot("rep_traj", plot_rep_traj)
# save_plot("rl_main", plot_rl)
# save_plot("rl_traj", plot_rl_traj)