# SRS model

This notebook provides analysis for the SRS model using results produced by Mesa simulations.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm_notebook

from model.model import SrsModel
from model.constants import HYPOTHESIS_TRUTH

## Set up plotting environment

Before getting into the model, we set up plot styles for seaborn.

In [None]:
sns.set(style="whitegrid")
sns.set_palette("colorblind")

## Parameters

First we need to set up the following model parameters:

| parameter | description | permissible values |
| --- | --- | --- |
| $a$ | number of agents | integral, $n > 0$ |
| $d$ | number of agents sampled for Expansion and Retirement | integral, $0 \leq r \leq 1$ |
| $b$ | probability for novel hypotheses being true | $0 \leq b \leq 1$ |
| $j_{0, N+}$ | base probability for positive novel results being published | $0 \leq j_{0, N+} \leq 1$ |
| $j_{0, N-}$ | base probability for negative novel results being published | $0 \leq j_{0, N-} \leq 1$ |
| $j_{0, R+}$ | base probability for positive replication results being published | $0 \leq j_{0, R+} \leq 1$ |
| $j_{0, R-}$ | base probability for negative replication results being published | $0 \leq j_{0, R-} \leq 1$ |
| $V_{N+}$ | payoff for publishing a novel hypothesis with a positive result | $V_{N+} \geq 0$ |
| $V_{N-}$ | payoff for publishing a novel hypothesis with a negative result | $V_{N-} \geq 0$ |
| $V_{0, R+}$ | base payoff for publishing a replication with a positive result | $V_{0, R+} \geq 0$ |
| $V_{0, R-}$ | base payoff for publishing a replication with a negative result | $V_{0, R-} \geq 0$ |
| $V_{RS}$ | payoff for having an original hypothesis successfully replicated | $V_{RS} \geq 0 $ |
| $V_{RF}$ | payoff for having an original hypothesis unsuccessfully replicated | $V_{RF} \leq 0 $ |
| $\eta_{s}$ | influence of rigour on Productivity Checks | $\eta_s > 0$ |
| $\eta_{r}$ | influence of tallies on replication payoffs | $\eta_r > 0$ |
| $\eta_{j}$ | influence of reputation on publication probabilities | $\eta_j > 0$ |
| $\sigma_t$ | standard deviation for targeted tally replication | $\sigma_{t} > 0$ |
| $\sigma_\gamma $ | standard deviation for $\gamma$ mutation magnitude | $\sigma_{\gamma} > 0$ |
| $\sigma_\tau $ | standard deviation for $\tau$ mutation magnitude | $\sigma_{\tau} > 0$ |
| $\sigma_r $ | standard deviation for $r$ mutation magnitude | $\sigma_{r} > 0$ |
| $\sigma_{c_{N+}}$ | standard deviation for $c_{N+}$ mutation magnitude | $\sigma_{c_{N+}} > 0$ |
| $\sigma_{c_{N-}}$ | standard deviation for $c_{N-}$ mutation magnitude | $\sigma_{c_{N-}} > 0$ |
| $\sigma_{c_{R+}}$ | standard deviation for $c_{R+}$ mutation magnitude | $\sigma_{c_{R+}} > 0$ |
| $\sigma_{c_{R-}}$ | standard deviation for $c_{R-}$ mutation magnitude | $\sigma_{c_{R-}} > 0$ |

In [None]:
a = 100
d = 10
b = 0.1
j_0_N_pos = 0.9
j_0_N_neg = 0.3
j_0_R_pos = 0.2
j_0_R_neg = 0.1
v_N_pos = 1.0
v_N_neg = 1.0
v_0_R_pos = 0.5
v_0_R_neg = 0.5
v_RS = 0.1
v_RF = -100
eta_s = 0.1
eta_r = 0.2
eta_j = 0.00005
sigma_t = 2.0
sigma_gamma = 0.01
sigma_tau = 1.0
sigma_r = 0.01
sigma_c_N_pos = 0.02
sigma_c_N_neg = 0.02
sigma_c_R_pos = 0.02
sigma_c_R_neg = 0.02

## Intializing the SRS model simulation

Next, we initialize an SRS model instance with the parameters we've set.

In [None]:
model = SrsModel(
    a,
    d,
    b,
    j_0_N_pos,
    j_0_N_neg,
    j_0_R_pos,
    j_0_R_neg,
    v_N_pos,
    v_N_neg,
    v_0_R_pos,
    v_0_R_neg,
    v_RS,
    v_RF,
    eta_s,
    eta_r,
    eta_j,
    sigma_t,
    sigma_gamma,
    sigma_tau,
    sigma_r,
    sigma_c_N_pos,
    sigma_c_N_neg,
    sigma_c_R_pos,
    sigma_c_R_neg,
)


## Initializing agents

Now we need to initialize a set of agents in our model. To keep things simple, we start all agents with the same set of parameters for their research strategies. These parameters are as follows:

| parameter | description | permissible values |
| --- | --- | --- |
| $\gamma$ | the power of the agents | $0 \leq \gamma \leq 1$ |
| $\tau$ | the rigour of the agents | $\tau > 0$ |
| $r$ | probability that an agent chooses to replicate a published hypothesis | $0 \leq r \leq 1$ |
| $c_{N+}$ | probability that an agent chooses to publish a  positive novel result | $0 \leq c_{N+} \leq 1$ |
| $c_{N-}$ | probability that an agent chooses to publish a  negative novel result | $0 \leq c_{N-} \leq 1$ |
| $c_{R+}$ | probability that an agent chooses to publish a  positive replication result | $0 \leq c_{R+} \leq 1$ |
| $c_{R-}$ | probability that an agent chooses to publish a  negative replication result | $0 \leq c_{R-} \leq 1$ |

In [None]:
gamma = 0.8
tau = 75
r = 0.2
c_N_pos = 1
c_N_neg = 1
c_R_pos = 1
c_R_neg = 1

In [None]:
model.initialize_agents(gamma, tau, r, c_N_pos, c_N_neg, c_R_pos, c_R_neg)

## Running the model

Here we run the model, while keeping track of mean replication rate, mean power, and mean type I error.

In [None]:
num_time_steps = 10000

mean_rs = []
mean_gammas = []
mean_alphas = []

for _ in tqdm_notebook(range(num_time_steps), leave=False):
    model.step()

    # Keep track of mean values for research strategy parameters
    mean_rs.append(np.mean([agent.r for agent in model.agent_map.values()]))
    mean_gammas.append(np.mean([agent.gamma for agent in model.agent_map.values()]))
    mean_alphas.append(np.mean([agent.alpha for agent in model.agent_map.values()]))

## Plotting evolution of research strategy parameters

In [None]:
# Put data into a dataframe
d = {
    "val": mean_rs + mean_gammas + mean_alphas,
    "param": ["r"] * num_time_steps
    + ["gamma"] * num_time_steps
    + ["alpha"] * num_time_steps,
    "t": list(range(1, num_time_steps + 1)) * 3,
}

df = pd.DataFrame(data=d)

# Generate the plot
fig, ax = plt.subplots(figsize=(10, 6))
sns.lineplot(x="t", y="val", hue="param", data=df, ax=ax)

# Adjust legend
_, labels = ax.get_legend_handles_labels()
ax.legend(labels=[r"$\bar{r}$", r"$\bar{\gamma}$", r"$\bar{\alpha}$"])

# Adjust axes
ax.set_ylim(0, 1)
ax.set_yticks(np.arange(0, 1 + 0.1, 0.1))
ax.set_ylabel("")
ax.set_xlim(0, num_time_steps)
ax.set_xlabel("time step")

# Save the plot
# fig.savefig('param_plt.png', bbox_inches='tight')

## Functions for precision, sensitivity, and specificity

Here, as in the Model of Scientific Discovery, we will be interested in precision, sensitivity, and specificity. We provide functions for calculating these quantities. The variable names here reflect the names used when analysing the Model of Scientific Discovery.

In [None]:
# Max and min tallies to consider
max_tally = 100
min_tally = -100

# This is a function to convert tally numbers to indices in the above
# array
tally_idx = lambda x: x - min_tally


def calculate_ns():
    """Calculate n_Ts and n_Fs for any given time step.

    This should be run *after* a time step has finished.
    """
    # Initialize arrays
    n_Ts = [0] * (max_tally - min_tally + 1)
    n_Fs = [0] * (max_tally - min_tally + 1)

    # Get a map with tallies as keys and lists of hypothesis indices as
    # values
    hyp_map = model.hypothesis_manager.hypothesis_map

    for s, idxs in hyp_map.items():
        n_T = 0
        n_F = 0

        for idx in idxs:
            if model.hypothesis_manager.hypotheses[idx][HYPOTHESIS_TRUTH]:
                n_T += 1
            else:
                n_F += 1

        n_Ts[tally_idx(s)] = n_T
        n_Fs[tally_idx(s)] = n_F

    return (n_Ts, n_Fs)


def calculate_precision(n_Ts, n_Fs):
    """Calculate precision array Fs."""
    return [
        0 if x + n_Fs[i] == 0 else x / (x + n_Fs[i])
        for i, x in enumerate(n_Ts)
    ]


def calculate_sensitivity(n_Ts):
    """Calculate sensitivity array Gs."""
    sum_n_Ts = sum(n_Ts)
    return [x / sum_n_Ts for x in n_Ts]


def calculate_specificity(n_Fs):
    """Calculate specificity array Hs."""
    sum_n_Fs = sum(n_Fs)
    return [x / sum_n_Fs for x in n_Fs]

## Plotting precision, sensitivity, and specificity

In [None]:
# Calculate F, G, H
n_Ts, n_Fs = calculate_ns()

Fs = calculate_precision(n_Ts, n_Fs)
Gs = calculate_sensitivity(n_Ts)
Hs = calculate_specificity(n_Fs)

In [None]:
# First determine the range of tallies we want to plot
min_tally_plt = -5
max_tally_plt = 5
tally_plt_vals = list(range(min_tally_plt, max_tally_plt + 1))

# Get the corresponding indices
min_tally_idx = tally_idx(min_tally_plt)
max_tally_idx = tally_idx(max_tally_plt)

# Put data into a dataframe
d = {"proportion": [], "property": [], "s": []}

for s, f, g, h in zip(
    tally_plt_vals,
    Fs[min_tally_idx : max_tally_idx + 1],
    Gs[min_tally_idx : max_tally_idx + 1],
    Hs[min_tally_idx : max_tally_idx + 1],
):
    d["s"] += [s] * 3
    d["property"] += ["F"]
    d["property"] += ["G"]
    d["property"] += ["H"]
    d["proportion"] += [f]
    d["proportion"] += [g]
    d["proportion"] += [h]

df = pd.DataFrame(data=d)

# Generate the plot
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x="s", y="proportion", hue="property", data=df, ax=ax)

# Adjust legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles, labels=["precision", "sensitivity", "specificity"])

# Adjust axes
ax.set_yticks(np.arange(0, 1 + 0.1, 0.1))
ax.set_ylabel("")
ax.set_xlabel("tally")

# Save the plot
#fig.savefig('tally_plt.png', bbox_inches='tight')