# Analyse several runs

In [None]:
# imports

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
from sklearn.decomposition import PCA
import plotly.express as px

In [None]:
COLORS = ["blue", "green", "red", "magenta", "olive", "orange", "turquoise"]

In [None]:
# plt files of all runs of experiental set-up A-D

# A
plt_A1 = "PLTv16-05-02-24-13-43-28"
plt_A2 = "PLTv16-05-02-24-15-24-08"
plt_A3 = "PLTv16-05-02-24-15-33-15"
plt_A4 = "PLTv16-05-02-24-13-45-27"
plt_A5 = "PLTv16-05-02-24-21-02-40"
plt_A6 = "PLTv16-06-02-24-18-39-02"
plt_A7 = "PLTv16-07-02-24-16-58-40"
all_plt_A = [plt_A1, plt_A2, plt_A3, plt_A4, plt_A5, plt_A6, plt_A7]

# B
plt_B1 = "PLTv16-08-02-24-15-04-12"
plt_B2 = "PLTv16-06-02-24-10-53-51"
plt_B3 = "PLTv16-05-02-24-21-03-46"
plt_B4 = "PLTv16-05-02-24-17-02-00"
plt_B5 = "PLTv16-02-02-24-19-27-33"
all_plt_B = [plt_B1, plt_B2, plt_B3, plt_B4, plt_B5]

# C
plt_C1 = "PLTv17-14-02-24-12-58-39"
plt_C2 = "PLTv17-14-02-24-12-57-28"
plt_C3 = "PLTv17-13-02-24-18-25-53"
all_plt_C = [plt_C1, plt_C2, plt_C3]

# D
plt_D1 = "PLTv17-12-02-24-21-03-02"
plt_D2 = "PLTv17-12-02-24-19-03-20"
plt_D3 = "PLTv17-12-02-24-21-31-10"
all_plt_D = [plt_D1, plt_D2, plt_D3]

all_df_A = []
all_df_B = []
all_df_C = []
all_df_D = []


In [None]:
# create dataframes of all runs, add population intervals

def read_CSV(file):
    return pd.read_csv(file + ".csv").fillna(0)

def make_df(group, plt_group):
    for file in plt_group:
        group.append(read_CSV(file))

def make_intervals(all_df, shorten):
    for df in all_df:
        df['interval'] = (df.index // 3000)
        last_group = df['interval'].max()
        df.drop(df[df['interval'] == last_group].index, inplace=True) # last interval isn't complete and could distort data
        if (shorten):
            df.drop(df[df['interval'] > 35].index, inplace=True) # shorten all data to the same length

all_plt_groups = [all_plt_A, all_plt_B, all_plt_C, all_plt_D]
all_groups = [all_df_A, all_df_B, all_df_C, all_df_D]

for group, plt_group in zip(all_groups, all_plt_groups):
    make_df(group, plt_group)

for group in all_groups:
    make_intervals(group, shorten=True)


In [None]:
# create average plots of one or more groups

def create_average_plot(variable, y_label, groups, group_names):
    all_merged_df = []
    all_max_values = []

    # merge data of each group and safe the max values
    for all_df in groups:
        max_values = []
        variable_df = all_df[0].groupby("interval")[variable].mean().reset_index()
        max_values.append(variable_df[variable].max())
        merged_df = variable_df
        for i, df in enumerate(all_df[1:], start=2):
            variable_df = df.groupby("interval")[variable].mean().reset_index()
            max_values.append(variable_df[variable].max())
            suffix = f'_{i}'
            merged_df = pd.merge(merged_df, variable_df, on="interval", how='outer', suffixes=("", suffix))
        all_merged_df.append(merged_df)
        all_max_values.append(max_values)

    for i, merged_df in enumerate(all_merged_df):
        merged_df['average'] = merged_df.iloc[:, 1:].mean(axis=1)
        print(f"{group_names[i]}: \n{y_label} at 25: {round(merged_df['average'][25],2)} \n{y_label} max mean: {round(sum(all_max_values[i])/ len(all_max_values[i]),2)}")
    
    plt.figure(figsize=(8, 4))
    for i, merged_df in enumerate(all_merged_df):
        plt.plot(merged_df["interval"], merged_df['average'], alpha=0.5, color=COLORS[i], label=group_names[i])
    plt.xlabel('population interval')
    plt.ylabel(y_label)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

# define groups to compare
groups = [all_df_A, all_df_B, all_df_C, all_df_D]
group_names = ["A", "B", "C", "D"]

create_average_plot('fitness', "average performance", groups, group_names)
create_average_plot('survivedTime', "average survived time", groups, group_names)
create_average_plot('eatenTrees', "average eaten trees", groups, group_names)
create_average_plot('reproducedMutual', "average reproduced", groups, group_names)
create_average_plot('reproducedSolo', "average reproduced", groups, group_names)

In [None]:
# plot all runs of a group into one figure to compare DNA values

def create_mean_plot_group(variable, y_label, group):
    plt.figure(figsize=(8, 4))
    for i, df in enumerate(group):
        variable_df = df.groupby("interval")[variable].mean().reset_index()
        plt.plot(variable_df["interval"], variable_df[variable], alpha=0.5, color=COLORS[i], label=i)
    plt.xlabel('population interval')
    plt.ylabel(y_label)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

group = all_df_A
create_mean_plot_group('fitness','performance', group)
create_mean_plot_group('litterSize', 'litter size', group)
create_mean_plot_group('weight', 'weight', group)
create_mean_plot_group('movementSpeed', 'movement speed', group)
create_mean_plot_group('carnivore', 'carnivore', group)
create_mean_plot_group('mutationAmount', 'mutation amount', group)
create_mean_plot_group('mutationChance', 'mutation chance', group)
create_mean_plot_group('visualRadius', 'visual radius', group)
create_mean_plot_group('angleBetweenRaycasts', 'angle between raycast', group)

# Analyse a single Run

In [None]:
# plt file to analyse. Make sure the file is in the same directory
plt_file = "PLTv16-08-02-24-15-04-12"
single_plt = [plt_file]
single_df = []

In [None]:
# create dataframes of single run, add population intervals

make_df(single_df, single_plt)
make_intervals(single_df, shorten=False)
single_df = single_df[0]

In [None]:
# functions to plot the data of one run

# creates a mean plot of one variable of one run
def create_mean_plot(variable, y_label, df):
    plt.figure(figsize=(8, 4))
    variable_df = df.groupby("interval")[variable].mean().reset_index()
    plt.plot(variable_df["interval"], variable_df[variable], alpha=0.5, color=COLORS[0])
    plt.xlabel('population interval')
    plt.ylabel(y_label)
    plt.grid(True)
    plt.show()

# creates a mean plot of several variables of one run
def create_comparison_plot(variables, variable_labels, df):
    plt.figure(figsize=(8, 4))
    for i, variable in enumerate(variables):
        variable_df = df.groupby("interval")[variable].mean().reset_index()
        plt.plot(variable_df["interval"], variable_df[variable], alpha=0.5, color=COLORS[i], label=variable_labels[i])
    plt.xlabel('population interval')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

# creates a violin plot of one variable of one run
def create_violin_plot(variable, y_label, df):
    plt.figure(figsize=(14, 7))
    #variable_df = single_df.groupby("interval")[variable].mean().reset_index()
    #plt.plot(variable_df["interval"], variable_df[variable], alpha=0.5, color="blue")
    sns.violinplot(x=df['interval'], y=df[variable], inner='quartile')
    plt.xlabel('population interval')
    plt.ylabel(y_label)
    plt.show()

# creates a scatter plot of one variable of one run
def create_scatter_plot(variable, y_label, df):
    plt.figure(figsize=(8, 4))
    variable_df = df.groupby("interval")[variable].value_counts().reset_index(name='count')
    plt.scatter(variable_df["interval"], variable_df[variable], alpha=0.5, color="red")
    plt.xlabel('population interval')
    plt.ylabel(y_label)
    plt.grid(True)
    plt.show()

# creates a plot to see the percentage of each cause of death of one run
def create_cause_of_death_plot(df):
    plt.figure(figsize=(8, 4))
    drown = df.groupby('interval')['causeOfDeath'].apply(lambda x: (x == 2).sum() / len(x)).reset_index(name='count')
    starved = df.groupby('interval')['causeOfDeath'].apply(lambda x: (x == 1).sum() / len(x)).reset_index(name='count')
    decrepitude = df.groupby('interval')['causeOfDeath'].apply(lambda x: (x == 3).sum() / len(x)).reset_index(name='count')
    killed = df.groupby('interval')['causeOfDeath'].apply(lambda x: (x == 4).sum() / len(x)).reset_index(name='count')
    disaster = df.groupby('interval')['causeOfDeath'].apply(lambda x: (x == 5).sum() / len(x)).reset_index(name='count')
    plt.plot(drown["interval"], drown['count'], alpha=0.5, color="magenta", label="drown")
    plt.plot(starved["interval"], starved['count'], alpha=0.5, color="blue", label="starved")
    plt.plot(decrepitude["interval"], decrepitude['count'], alpha=0.5, color="green", label="decrepitude")
    plt.plot(killed["interval"], killed['count'], alpha=0.5, color="red", label="killed")
    plt.plot(disaster["interval"], disaster['count'], alpha=0.5, color="orange", label="natural disaster")
    plt.xlabel('population interval')
    plt.ylabel('cause of death')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

# creates a plot to see the survived time sorted by cause of death of one run
def create_sorted_survived_time_plot(df):
    plt.figure(figsize=(8, 4))
    drown = df[df['causeOfDeath'] == 2].copy()
    starved = df[df['causeOfDeath'] == 1].copy()
    decrepitude = df[df['causeOfDeath'] == 3].copy()
    killed = df[df['causeOfDeath'] == 4].copy()
    disaster = df[df['causeOfDeath'] == 5].copy()
    survived_drown = drown.groupby("interval")['survivedTime'].mean().reset_index()
    survived_starved = starved.groupby("interval")['survivedTime'].mean().reset_index()
    survived_killed = killed.groupby("interval")['survivedTime'].mean().reset_index()
    survived_decrepitude = decrepitude.groupby("interval")['survivedTime'].mean().reset_index()
    survived_disaster = disaster.groupby("interval")['survivedTime'].mean().reset_index()
    plt.plot(survived_drown["interval"], survived_drown['survivedTime'], alpha=0.5, color="magenta", label="drown")
    plt.plot(survived_starved["interval"], survived_starved['survivedTime'], alpha=0.5, color="blue", label="starved")
    #plt.plot(survived_decrepitude["interval"], survived_decrepitude['count'], alpha=0.5, color="green", label="decrepitude")
    plt.plot(survived_killed["interval"], survived_killed['survivedTime'], alpha=0.5, color="red", label="killed")
    #plt.plot(survived_disaster["interval"], survived_disaster['count'], alpha=0.5, color="orange", label="natural disaster")
    plt.xlabel('population interval')
    plt.ylabel('survived time')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

# creates a plot which compares reproduced and min. 3 eaten trees
def create_rep_3trees_comparison_plot(df, reproduction_type):
    plt.figure(figsize=(8, 4))
    trees_df = df.groupby('interval')['eatenTrees'].apply(lambda x: (x >= 3).sum() / len(x)).reset_index(name='count')
    plt.plot(trees_df["interval"], trees_df['count'], alpha=0.5, label="3 eaten trees", color="blue")
    if (reproduction_type == "solo"):
        reproduced_df = df.groupby('interval')['reproducedSolo'].mean().reset_index()
        plt.plot(reproduced_df["interval"], reproduced_df['reproducedSolo'], alpha=0.5, label="reproduced", color="green")
    if(reproduction_type == "Mutual"):
        reproduced_df = df.groupby('interval')['reproducedMutual'].mean().reset_index()
        plt.plot(reproduced_df["interval"], reproduced_df['reproducedMutual'], alpha=0.5, label="reproduced", color="green")
    plt.xlabel('population interval')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()


In [None]:
# Behaviour

create_mean_plot('fitness', 'performance', single_df)
create_mean_plot('eatenTrees', 'eaten trees', single_df)
#create_mean_plot('reproducedSolo', 'reproduced', single_df)
create_mean_plot('reproducedMutual', 'reproduced', single_df)
create_mean_plot('survivedTime','survived time', single_df)
create_comparison_plot(['eatenTrees','eatenAnimals'], ['eaten trees','eaten animals'], single_df)
#create_comparison_plot(['eatenTrees','reproducedSolo'], ['eaten trees','reproduced'], single_df)
create_comparison_plot(['eatenTrees','reproducedMutual'], ['eaten trees','reproduced'], single_df)
create_cause_of_death_plot(single_df)
create_sorted_survived_time_plot(single_df)
#create_rep_3trees_comparison_plot(single_df, "Mutual")
create_violin_plot('fitness', 'performance', single_df)
create_violin_plot('survivedTime','survived time', single_df)

In [None]:
# DNA Violin Plots

create_violin_plot('litterSize', 'litter size', single_df)
create_violin_plot('weight', 'weight', single_df)
create_violin_plot('movementSpeed', 'movement speed', single_df)
create_violin_plot('carnivore', 'carnivore', single_df)
create_violin_plot('mutationAmount', 'mutation amount', single_df)
create_violin_plot('mutationChance', 'mutation chance', single_df)
create_violin_plot('visualRadius', 'visual radius', single_df)
create_violin_plot('angleBetweenRaycasts', 'angle between raycast', single_df)

In [None]:
# DNA Mean Plots

create_mean_plot('litterSize', 'litter size', single_df)
create_mean_plot('weight', 'weight', single_df)
create_mean_plot('movementSpeed', 'movement speed', single_df)
create_mean_plot('carnivore', 'carnivore', single_df)
create_mean_plot('mutationAmount', 'mutation amount', single_df)
create_mean_plot('mutationChance', 'mutation chance', single_df)
create_mean_plot('visualRadius', 'visual radius', single_df)
create_mean_plot('angleBetweenRaycasts', 'angle between raycast', single_df)

In [None]:
# else

create_scatter_plot('key', 'key', single_df)
create_scatter_plot('color', 'color', single_df)

In [None]:
# Specification

df = pd.read_csv(plt_file + ".csv").fillna(0)
# only DNA data
df = df.iloc[:, 13:]
# add time dimension
df['Time'] = range(len(df))

# normalize data with min max scaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns, index=df.index)

# dimension reduction with pca
def apply_pca(dataframe):
    dataframe_without_order = dataframe.drop('Time', axis=1)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(dataframe_without_order)
    pca_df = pd.DataFrame(pca_data, columns=['PCA1', 'PCA2'])
    return pca_df

# plot pca reduced data in a 2d space, using color for the third dimension of time
def plot_pca_data_color(pca_dataframe, order_series, c_labels=None, c_center=None):
    df = pd.concat([pca_dataframe, order_series], axis=1)
    df['Cluster'] = c_labels
    df = df.sort_values(by='Cluster')
    df['Cluster'] = df['Cluster'].astype('str')
    fig = px.scatter(df, x='PCA1', y='PCA2', color='Time', size_max=20, hover_data={'Name': df.index})
    
    if c_center is not None:
        l, c = len(px.colors.qualitative.Plotly), len(c_center)
        df_centers = pd.DataFrame(c_center, columns=['PCA1', 'PCA2'])
        colors = (px.colors.qualitative.Plotly * ((c // l) + 1))[:c]
        fig.add_scatter(x=df_centers['PCA1'], y=df_centers['PCA2'], name="Cluster Center", mode='markers', marker={'symbol':
                       'cross-thin-open', 'size':10, 'color': colors, 'opacity':1})
    
    fig.show()

pca_df = apply_pca(scaled_df)
order_series = df['Time']
df = df.iloc[:, :-1]
plot_pca_data_color(pca_df, order_series)