# Data Analysis and Sweep Parameter File Automation

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import os

In [None]:
# setting directories and globals
runfile_directory = 'Runfiles/'
results_directory = 'Results/'
template_file = 'template.txt'

# TWEAKS MADE 4/28/24
HEY I MADE SOME TWEAKS TO THE ORIGINAL PARAMETER VALUES
Mean_tolerance dropped from 1.0 -> 0.5, 1.0 setting is too high for variation. 0.5 is more indicative of realistic thresholds for expectation of effort, std deviations also dropped to be half of  the mean

# Thinking through an issue 5/28
Why don't agents ever rejoin a group? Simply put, and agent will never rejoin because the bounds for their tolerance is essentially overlapped with their bounds for effort

How can I adjust the model to include to benefit of working in a team? The data show that students retain information better in a group; I think that we should increase the variation/stdev for the individual agent effort output normal curve

Should also tweak the standard mean_tolerance to be lower, so that it matches agent_std_value, with the intuition that the average person is going to be forgiving within 1 std dev of someone's normal output

In [None]:
# variables of interest, and file name specifications

# new parameter dict with new model
parameter_dict = {'*fname': ['Python/Results/'],
                  'steps': ['100'],
                 'reps': ['1'],
                 'letter_grades' : ['true', 'false'],
                  'prefix' : ["new_model_test"],
                  'grading_error_alpha' : ['2.5'],
                  'grading_error_beta' : ['2.5'],
                  'divorce_constant' : ['1000'],
                  'max_strikes' : ['3'],
                  'num_agents' : ['60'],
                  'agent_tolerance_alpha' : ['0.1'],
                  'agent_tolerance_beta' : ['0.1'],
                  'agent_effort_alpha' : ['0.1'],
                  'agent_effort_beta' : ['0.1'],
                  'agent_std_effort' : ['0.5'],
                  'min_agents_per_group' : ['3'],
                  'max_agents_per_group' : ['4'],
                  
                 }

In [None]:
# function to modify the sweep file based on the parameters in parameteer_dict

def design_runfile(new_fname, parameter_dict=parameter_dict):
    with open(template_file) as template:
        template_lines = template.readlines()

    parameter_dict['*fname'] = [parameter_dict['*fname'][0] + new_fname.strip('.txt')]
    
    with open(runfile_directory + new_fname, 'w+') as new_file:
        for line in template_lines:
            param = line.split()[0]
            if param not in parameter_dict:
                new_file.write(line)
            else:
                fixer_upper = line.strip('\n').split()[:2]
                new_line = ' '.join(fixer_upper + (parameter_dict[param])) + '\n'
                new_file.write(new_line)

In [None]:
design_runfile('new_model_test.txt', parameter_dict)

# Actually Analyzing the Files after Sweeping

In [None]:
# function for pulling from results. returns spefic dfs for each level of granularity
def get_results(result_prefix):
    time_df = pd.read_csv(results_directory + result_prefix + 'timeresults.txt', skiprows=6)
    end_df = pd.read_csv(results_directory + result_prefix + 'endresults.txt', skiprows=6)
    return time_df, end_df

In [None]:
# example
result_prefix = "new_model_test"

time_df, end_df = get_results(result_prefix)

In [None]:
interested_independent_variable = 'deviant_mean_tolerance'
interested_dependent_variable = 'avg_deviant_payoff'
interested_dependent_variable_two = 'avg_standard_payoff'

def sort_by_sweep(time_df, interested_independent_variable, interested_dependent_variable, steps):
    dfs = [time_df[i:i+steps] for i in range(0, len(time_df), steps)]
    
    plt.figure()
    
    for df in dfs:
        x = df["Timestep"]
        y = df[interested_dependent_variable]
        y_two = df[interested_dependent_variable_two]
        
        interested_var_value = df[interested_independent_variable].iloc[0]
        
        plt.plot(x, y, linestyle='-', label=interested_dependent_variable)
        plt.plot(x, y_two, linestyle='-', label=interested_dependent_variable_two)

        # Add title and labels
        plt.title(f"{interested_independent_variable}={interested_var_value}")
        plt.xlabel("Timesteps")
        plt.ylabel("Average payoffs")
        
        plt.legend()
        plt.show()

def sort_by_sweep_but_relational(time_df, interested_independent_variable, interested_dependent_variable, steps):
    dfs = [time_df[i:i+steps] for i in range(0, len(time_df), steps)]
    
    plt.figure()
    
    for df in dfs:
        x = df["Timestep"].copy()[1:]
        y = df[interested_dependent_variable].copy()[1:]
        shift = [0] + list(y[:-1])
        
        y = [y_curr - y_prev for y_curr, y_prev in zip(y,shift)]
        
        
        y_two = df[interested_dependent_variable_two].copy()[1:]
        shift_two = [0] +  list(y_two[:-1])
        
        y_two = [y_curr - y_prev for y_curr, y_prev in zip(y_two,shift_two)]
        
        interested_var_value = df[interested_independent_variable].copy().iloc[0]
        
        plt.plot(x, y, linestyle='-', label=interested_dependent_variable)
        plt.plot(x, y_two, linestyle='-', label=interested_dependent_variable_two)

        # Add title and labels
        plt.title(f"Relative payoffs when {interested_independent_variable}={interested_var_value}")
        plt.xlabel("Timesteps")
        plt.ylabel("Average relative payoff")
        
        plt.ylim(0.10, 0.4)
        
        plt.legend()
        plt.show()

In [None]:
sort_by_sweep(time_df,interested_independent_variable, interested_dependent_variable, 101)

In [None]:
sort_by_sweep_but_relational(time_df,interested_independent_variable, interested_dependent_variable, 101)

# Analyzing Agent Files
I've  hacked together some semblance of agent data collection with raw file writing, oh and 4/29 right now I've hacked group data

In [None]:
# parameter variables
result_prefix = "new_model_test"
agent_directory = "Agent_data/"
agent_file_prefix = result_prefix + "new_model_test_Agent-"
group_directory = "Group_data/"
group_file_prefix = result_prefix + "new_model_test_Group-"

In [None]:
def get_agent_results(agent_file_prefix, time_steps):
    agent_dfs = {}
    for filename in os.listdir(agent_directory):
        file_path = os.path.join(agent_directory, filename)
        # Check if it's a regular file (not a directory)
        if os.path.isfile(file_path):
            temp = open(file_path, 'r')
            temp_lines = temp.readlines()
            num_sections = len(temp_lines)//(time_steps+1)
            df_set = []
            for i in range(num_sections):
                df = pd.read_csv(file_path, skiprows = i*(time_steps+1), nrows = time_steps, comment="#")
                df_set.append(df)
            agent_dfs[filename.strip(agent_file_prefix).strip(".txt")] = df_set
    return agent_dfs

def get_group_results(group_file_prefix, time_steps):
    group_dfs = {}
    for filename in os.listdir(group_directory):
        file_path = os.path.join(group_directory, filename)
        # Check if it's a regular file (not a directory)
        if os.path.isfile(file_path):
            temp = open(file_path, 'r')
            temp_lines = temp.readlines()
            num_sections = len(temp_lines)//(time_steps+1)
            df_set = []
            for i in range(num_sections):
                df = pd.read_csv(file_path, skiprows = i*(time_steps+1), nrows = time_steps, comment="#")
                df_set.append(df)
            group_dfs[filename.strip(group_file_prefix).strip(".txt")] = df_set
    return group_dfs

In [None]:
agent_dfs = get_agent_results(agent_file_prefix, 100)
group_dfs = get_group_results(group_file_prefix, 100)

In [None]:
# I just need the data for deviant_mean_tolerance = 0.5, actually, feeling a lil brain-dead
# 0 = standard, 1 = deviant

# group_compositions[group_id] = [[group_count], [num_standards], [num_deviants]]
group_compositions = {}

for group, dfset in group_dfs.items():
    data_group_count = []
    data_num_standards = []
    data_num_deviants = []
    for df in dfset:
        data_group_count.append(df['group_count'].values)
        data_num_standards.append(df['num_deviants'].values)
        data_num_deviants.append(df['num_standards'].values)
        group_compositions[group] = {'group_count':data_group_count, 'num_standards':data_num_standards, 'num_deviants':data_num_deviants}

In [None]:
#show the grphs of deviant v standard counts over timesteps
def plot_group_comps(group_dfs, figures):
    for group, dfs in group_dfs.items():
        time_steps = range(len(dfs['group_count'][0]))
        
        # Plot on specific figures for each label
        for label, df in dfs.items():
            for d in df[:1]:
                # Select the specific figure corresponding to the label
                plt.figure(figures[label].number)

                # Plot the data
                plt.plot(time_steps, d, label=f"{group} {label}")

                # Adding labels and title
                plt.xlabel("Timesteps")
                plt.ylabel("Counts")
                plt.title(f"{label} per Group over Time for First Run")

                # Add legend to the plot
                plt.legend()

    plt.show()

In [None]:
figures = {'group_count': plt.figure(), 'num_deviants': plt.figure(), 'num_standards': plt.figure()}
plot_group_comps(group_compositions, figures)

# grabbing a representative Agent

In [None]:
def grab_high_agent(agent_dfs):
    max_dicts = {}
    num_runs = len(list(agent_dfs.values())[0])
    for agent, data in agent_dfs.items():
        for i in range(num_runs):
            if i not in max_dicts:
                max_dicts[i] = (f"Agent with highest ending payoff = {agent}", data[i].copy())
            else:
                if max_dicts[i][1].iloc[-1]['accumulated_payoff'] < data[i].iloc[-1]['accumulated_payoff']:
                    max_dicts[i] = (f"Agent with highest ending payoff = {agent}", data[i].copy())
                    print(f"Updated highest agent during run {i} is {agent} with payoff {data[i].iloc[-1]['accumulated_payoff'].copy()}")
    return max_dicts

def grab_low_agent(agent_dfs):
    low_dicts = {}
    num_runs = len(list(agent_dfs.values())[0])
    for i in range(num_runs):
        for agent, data in agent_dfs.items():
            if i not in low_dicts:
                low_dicts[i] = (f"Agent with lowest ending payoff = {agent}", data[i].copy())
            else:
                if low_dicts[i][1].iloc[-1]['accumulated_payoff'] > data[i].iloc[-1]['accumulated_payoff']:
                    low_dicts[i] = (f"Agent with lowest ending payoff = {agent}", data[i].copy())
                    print(f"Updated lowest agent during run {i} is {agent} with payoff {data[i].iloc[-1]['accumulated_payoff'].copy()}")
    return low_dicts

In [None]:
max_dicts = grab_high_agent(agent_dfs)
low_dicts = grab_low_agent(agent_dfs)

# plotting the representative agent against the other agents
Need to show the representative agent stats, and how they move around

In [None]:
import numpy as np

def plot_representative(agent_dfs, rep_dict, string_param): # rep_dict = max or low_dict depending on what you want, string_param = what we looking at
    for run, rep_data in rep_dict.items():
        plt.figure()
        averages = {}
        standards = 0
        deviants = 0
        
        # also going to get upper and lower bounds of tolerance, effort to compare to
        
        
        for agent, data in agent_dfs.items(): # getting the other agents, and comparing
            if agent != rep_data[0].split()[-1]:
                if data[run].iloc[0]['type'] == 0:
                    standards += 1
                    if 'standard' not in averages:
                        averages['standard'] = np.array(data[run]['accumulated_payoff'].copy())
                    else:
                        averages['standard'] += np.array(data[run]['accumulated_payoff'].copy())
                elif data[run].iloc[0]['type'] == 1:
                    deviants += 1
                    if 'deviant' not in averages:
                        averages['deviant'] = np.array(data[run]['accumulated_payoff'].copy())
                    else:
                        averages['deviant'] += np.array(data[run]['accumulated_payoff'].copy())
            else:
                plt.plot(data[run].index, data[run]['accumulated_payoff'].copy(), label=f"Agent {agent}")
        
        averages['standard'] = averages['standard']/standards
        
        averages['deviant'] = averages['deviant']/deviants
        plt.plot(data[run].index, averages['deviant'], label="Average deviant payoff")
        plt.plot(data[run].index, averages['standard'], label="Average standard payoff")
        plt.legend()
        plt.xlabel('Timesteps')  # Set x-axis label
        plt.ylabel('Payoffs/Effort')  # Set y-axis label
        plt.title(f"Average Agent Payoffs versus Agent with {string_param} Payoff, Run: {run}")
        
        # printing run parameters
        print(f"Run {run} Parameters:")
        print(f"Deviant Mean Tolerance: {agent_dfs['1'][run].iloc[0]['deviant_mean_tolerance']}")
        print("Standard Mean Tolerance: 0.5") # hard-coded but it shouldn't change
        print(f"Global Mean Effort: 1.0") # hard-coded for now, but will update
        
        plt.show()
        print(f"Outstanding Agent {rep_data[0].split()[-1]} Run {run} Stats:")
        print(f"Agent Type = {agent_dfs[rep_data[0].split()[-1]][run]['type'].iloc[0]}")
        print(f"Mean Effort = {agent_dfs[rep_data[0].split()[-1]][run]['mean_value'].iloc[0]}")
        print(f"Mean Tolerance = {agent_dfs[rep_data[0].split()[-1]][run]['tolerance'].iloc[0]}")
        print()

In [None]:
plot_representative(agent_dfs, max_dicts, "Highest")

In [None]:
plot_representative(agent_dfs, low_dicts, "Lowest")

In [None]:
# getting number of times an agent left their group, and what time step that occurred
def get_group_change(agent_df): # singular agent df input
    group_change_tracker = {}
    last_group = -1
    for i, row in agent_df.iterrows():
        if i == 0:
            last_group = row['group_id'].copy()
        if row['group_id'] != last_group:
            group_change_tracker[i] = row['group_id'].copy()
            last_group = row['group_id'].copy()
    return group_change_tracker

Creating a divorce dict, with all divorces that occurs during each run

In [None]:
divorce_dict = {}

for agent, data in agent_dfs.items():
    for run, run_data in enumerate(data):
        woo = get_group_change(run_data)
        if woo:
            if run not in divorce_dict:
                divorce_dict[run] = {}
            divorce_dict[run][agent] = woo
            print(f"Change in Agent {agent} during Run {run}")
            print(woo)

In [None]:
# Plotting the agent payoffs relative to prior steps
import numpy as np

# also going to add divorce time for the agent of interest

def relative_representative(agent_dfs, rep_dict, string_param, divorce_dict = None): # rep_dict = max or low_dict depending on what you want, string_param = what we looking at
    for run, rep_data in rep_dict.items():
        plt.figure()
        averages = {}
        standards = 0
        deviants = 0
        
        # also going to get upper and lower bounds of tolerance, effort to compare to
        
        
        for agent, data in agent_dfs.items(): # getting the other agents, and comparing
            if agent != rep_data[0].split()[-1]:
                if data[run].iloc[0]['type'] == 0:
                    standards += 1
                    if 'standard' not in averages:
                        averages['standard'] = np.array(data[run]['accumulated_payoff'].copy())
                    else:
                        averages['standard'] += np.array(data[run]['accumulated_payoff'].copy())
                elif data[run].iloc[0]['type'] == 1:
                    deviants += 1
                    if 'deviant' not in averages:
                        averages['deviant'] = np.array(data[run]['accumulated_payoff'].copy())
                    else:
                        averages['deviant'] += np.array(data[run]['accumulated_payoff'].copy())
            else:
                agent_shift = data[run]['accumulated_payoff'][1:].copy().tolist()
                
                relatives = [agent_shift[i] - data[run]['accumulated_payoff'][i].copy() for i in range(len(data[run]['accumulated_payoff']) - 1)]
                
                plt.plot(data[run].index[:-1], relatives, label=f"Agent {agent}")
                
                 #  if we have a divorce dict, then add the point at which the agent divorces
                if divorce_dict:
                    check = rep_data[0].split()[-1]
                    if check in divorce_dict[run]:
                        print("hi!")
                        print(divorce_dict[run][check])
                        plt.scatter(list(divorce_dict[run][check].keys())[0], relatives[int(list(divorce_dict[run][check].keys())[0])], s=50, zorder=5, color='black')
                        plt.text( list(divorce_dict[run][check].keys())[0], relatives[int(list(divorce_dict[run][check].keys())[0])], f"Agent {agent} divorce", fontsize=6, color='black', ha='right')
                    else:
                        print("Outstanding agent did not divorce.")
        averages['standard'] = averages['standard']/standards
        averages['deviant'] = averages['deviant']/deviants
        
        relative = {}
        
        shift = averages['standard'][1:].tolist()
        
        relative['standard'] = [shift[i] - averages['standard'][i]  for i in range(len(averages['standard']) - 1)]
        
        shift = averages['deviant'][1:].tolist()
        relative['deviant'] = [shift[i] - averages['deviant'][i] for i in range(len(averages['deviant']) - 1)]
        
        plt.plot(data[run].index[:-1], relative['deviant'], label="Relative Average deviant payoff")
        plt.plot(data[run].index[:-1], relative['standard'], label="Relative Average standard payoff")
        plt.legend()
        plt.xlabel('Timesteps')  # Set x-axis label
        plt.ylabel('Relative Payoffs/Effort')  # Set y-axis label
        plt.title(f"Average Agent Payoffs versus Agent with {string_param} Payoff, Relative to Prior Step, Run: {run}")
        
        # printing run parameters
        print(f"Run {run} Parameters:")
        print(f"Deviant Mean Tolerance: {agent_dfs['1'][run].iloc[0]['deviant_mean_tolerance']}")
        print(f"Standard Mean Tolerance: {parameter_dict['mean_tolerance'][0]}") # hard-coded but it shouldn't change
        print(f"Global Mean Effort: {parameter_dict['mean_value'][0]}") # hard-coded for now, but will update
        
        plt.show()
        print(f"Outstanding Agent {rep_data[0].split()[-1]} Run {run} Stats:")
        print(f"Agent Type = {agent_dfs[rep_data[0].split()[-1]][run]['type'].iloc[0]}")
        print(f"Mean Effort = {agent_dfs[rep_data[0].split()[-1]][run]['mean_value'].iloc[0]}")
        print(f"Mean Tolerance = {agent_dfs[rep_data[0].split()[-1]][run]['tolerance'].iloc[0]}")
        print()

In [None]:
relative_representative(agent_dfs, low_dicts, "Lowest", divorce_dict)

In [None]:
relative_representative(agent_dfs, max_dicts, "Highest", divorce_dict)

# Low effort agent search
Find the agents that are "social loafers", by checking agent mean_values and picking out those below 2.5 std dev away from the global mean_values

Issue of interpretatation here though; are we modeling average effort or social-loafing? I.e., if an agent just happens to contribute less, it means that they contribute less in general; it is not specific to social-loafing. We would need to describe a social-loafin type agent, with unique behavior when they are in a group, vs when they are alone.

Not enough agents 2.5 std away, sample size too small, set to 2 std dev away

In [None]:
def find_loafers(agent_dfs, mean_value, std_value):
    lower = mean_value - (2 * std_value)
    print(lower)
    loafers = {} # loafers [run #] = agent[]
    
    lowest = 100
    
    for agent, data_dfs in agent_dfs.items():
        for run, data in enumerate(data_dfs):
            if data.iloc[0]['mean_value'] < lowest:
                lowest = data.iloc[0]['mean_value']
                print(lowest)
            if data.iloc[0]['mean_value'] < lower:
                print("got one")
                print(data.iloc[0]['mean_value'])
                if run not in loafers:
                    loafers[run] = [agent]
                else:
                    loafers[run].append(agent)
    return loafers

In [None]:
loafers = find_loafers(agent_dfs, mean_value = float(parameter_dict['mean_value'][0]), std_value = float(parameter_dict['std_value'][0]))

In [None]:
for run, agents in list(loafers.items()):
    print(run)
    print(agents)
    print()

# Now we have the loafing agents, now what?
Once we've found social loafing agents, what can that tell us about other agent interactions with them? Check their group, and see what happened the agents in their group.

Expectation: All other agents should leave, and they should see an increase in payoff rate after divorce. The loafing agent should see a decrease in payoff.

In [None]:
from matplotlib.pyplot import figure

# grabbing the group the loafing agent is in, and printing the graphs for each agent with labels
def loafer_representation(agent_dfs, loafer_id, run, divorce_dict):
    loafer_group = agent_dfs[loafer_id][run].iloc[0]['group_id'] # group they started 
    
    unfortunate_souls = []
    
    for agent_id, dfs in agent_dfs.items():
        if dfs[run].iloc[0]['group_id'] == loafer_group and agent_id != loafer_id:
            unfortunate_souls.append(agent_id)
    
    print(f"Prithee tell me, what became of those unfortunate souls with agent {loafer_id} in run {run}?")
    
    # Showing the relative results for loafer agent
    loafer_shift = agent_dfs[loafer_id][run]['accumulated_payoff'][1:].copy().tolist()
                
    relatives = [loafer_shift[i] - agent_dfs[loafer_id][run]['accumulated_payoff'][i].copy() for i in range(len(agent_dfs[loafer_id][run]['accumulated_payoff']) - 1)]
                
    plt.plot(agent_dfs[loafer_id][run].index[:-1], relatives, label=f"Agent {loafer_id}")
    
    # going through the unfortunate souls and plotting their stuff on the same graph
    for us in unfortunate_souls:
        us_shift = agent_dfs[us][run]['accumulated_payoff'][1:].copy().tolist()
                
        relatives = [us_shift[i] - agent_dfs[us][run]['accumulated_payoff'][i].copy() for i in range(len(agent_dfs[us][run]['accumulated_payoff']) - 1)]
        
        plt.plot(agent_dfs[loafer_id][run].index[:-1], relatives, label=f"Agent {us}")
        
        last_breakpoint = 0
        
        if us in divorce_dict[run]:
            plt.scatter(list(divorce_dict[run][us].keys())[0], relatives[int(list(divorce_dict[run][us].keys())[0])], s=50, zorder=5, color='black')
            plt.text( list(divorce_dict[run][us].keys())[0], relatives[int(list(divorce_dict[run][us].keys())[0])], f"Agent {us} divorce", fontsize=6, color='black', ha='right')
            
            for i, breakpoint in enumerate(list(divorce_dict[run][us].keys()) + [agent_dfs[us][run].index[-1]]):
                # create some lines of best fits bounded by divorce points
                coefficients = np.polyfit(agent_dfs[us][run].index[last_breakpoint:breakpoint], relatives[last_breakpoint:breakpoint], 1)
                poly = np.poly1d(coefficients)
                y_fit = poly(agent_dfs[us][run].index[last_breakpoint:breakpoint])
                plt.plot(agent_dfs[us][run].index[last_breakpoint:breakpoint], y_fit, label=f"Agent {us}, segment{i}")
                last_breakpoint = breakpoint
    
    plt.legend()
    plt.xlabel('Timesteps')  # Set x-axis label
    plt.ylabel('Relative Payoffs/Effort')  # Set y-axis label
    plt.title(f"Monsier ou Madame Loafer {loafer_id} avec sa amis")
    figure(figsize=(10, 12), dpi=120)
    plt.show()

In [None]:
loafer_representation(agent_dfs, '35', 11, divorce_dict)

In [None]:
loafer_representation(agent_dfs, '35', 6, divorce_dict)

In [None]:
loafer_representation(agent_dfs, '35', 1, divorce_dict)