In [228]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import random
import csv
import math
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import seaborn as sns
from scipy.stats import wilcoxon

In [153]:
label_to_shape = {0: "block", 1: "legged", 2: "other"}
label_to_gait = {0: "idle", 1: "gallop", 2: "crawl", 3: "vibrate"}

In [15]:
def process_static_posture(row):
    try:
        string = row["shape.static"].split("/")
    except:
        string = row["shape.static"].item().split("/")
    out = np.zeros((len(string), len(string[0])))
    for r in range(out.shape[0]):
        for c in range(out.shape[1]):
            out[r][c] = int(string[r][c])
    return out

In [16]:
def process_static_posture_pretty(row):
    static = process_static_posture(row)
    out = np.ones((5, 5, 3))
    for r in range(static.shape[0]):
        for c in range(static.shape[1]):
            if static[r, c]:
                out[r + 5 - static.shape[0], c, :] = (0, 1, 0)
    return out

In [17]:
def process_dynamic_posture(row):
    out = np.zeros((16, 16))
    try:
        s = row["shape.dynamic"].split("|")
    except:
        s = row["shape.dynamic"].item().split("|")
    for r in range(len(s)):
        for c in range(len(s[r])):
            if s[r][c] == "█" or s[r][c] == "+":
                out[r, c] = 1
    return out

In [18]:
def parse_compressed_spectrum(row):
    out = np.zeros(100)
    curr = ""
    num = 0
    try:
        sequence = row["compressed.frequency"].item()
    except:
        sequence = row["compressed.frequency"]
    for n in sequence.split("-"):
        if "E" in n:
            curr += n.replace("E", "e-")
        else:
            curr += n
            out[num] = float(curr)
            curr = ""
            num += 1
    return out

In [13]:
labeled_data = pd.read_csv("./labels.csv", sep=",")
labeled_data["all.label"] = labeled_data.apply(lambda row: row["behaviour.label"] + row["shape.label"] * 4, axis=1)

In [20]:
x = np.array([process_dynamic_posture(row).flatten() for _, row in labeled_data.iterrows()])
y = labeled_data["shape.label"].values
shape_clf = None
best = float("-inf")
accs = []
for seed in range(20):
    for train, test in KFold(5).split(x):
        clf = RandomForestClassifier(random_state=seed, n_jobs=-1)
        clf.fit(x[train], y[train])
        acc = accuracy_score(clf.predict(x[test]), y[test])
        if acc > best:
            best = acc
            shape_clf = clf
        accs.append(acc)
print("mean accuracy over 5-fold: ", np.mean(accs))
print("best: ", best)

mean accuracy over 5-fold:  0.6254423076923077
best:  0.7846153846153846


In [21]:
x = np.array([parse_compressed_spectrum(row) for _, row in labeled_data.iterrows()])
y = labeled_data["behaviour.label"].values
gait_clf = None
best = float("-inf")
accs = []
for seed in range(20):
    for train, test in KFold(5).split(x):
        clf = RandomForestClassifier(random_state=seed, n_jobs=-1)
        clf.fit(x[train], y[train])
        acc = accuracy_score(clf.predict(x[test]), y[test])
        if acc > best:
            best = acc
            gait_clf = clf
        accs.append(acc)
print("mean accuracy over 5-fold: ", np.mean(accs))
print("best: ", best)

mean accuracy over 5-fold:  0.7062211538461538
best:  0.7826923076923077


In [128]:
# dataframe with the best individuals of the last iteration of each run
bests = pd.DataFrame(columns=["evolver", "controller", "run", "distance", "shape.dynamic", "compressed.frequency",
                              "shape.label", "behaviour.label", "all.label", "serialized.genotype"])

# dataframe with all the individuals of the last iteration of each run
lasts = pd.DataFrame(columns=["evolver", "controller", "run", "distance", "shape.dynamic", "compressed.frequency", "shape.label", "behaviour.label", "all.label",
                             "shape.diversity", "behaviour.diversity", 'all.diversity', "serialized.genotype"])


# dataframe with iteration-wide measures computed every 1000 births for each run
df = pd.DataFrame(columns=["iteration", "evolver", "controller", "run", "shape_median", "behaviour_median", "all_median",
                          "best_distance"])

In [None]:
import warnings
warnings.filterwarnings('ignore')

controller_to_idx = {"homogeneous": 0, "heterogeneous": 1, "position": 2}
best_num = len(bests)
last_num = len(lasts)
df_num = len(df)
evolver = "ga"
path = "./VSREvolution/output/" + evolver
for file in os.listdir(path):
    if file == ".DS_Store":
        continue
    print(file)
    traj = pd.read_csv(os.path.join(path, file), sep=",")
    traj = traj[traj.iterations != "iterations"]
    traj.dropna(inplace=True)
    traj = traj.rename({"compressed.frequency.y": "compressed.frequency"}, axis=1)
    controller = controller_to_idx[file.split(".")[2]]
    run = int(file.split(".")[1])
    traj["distance"] = traj["distance"].astype(np.float64)
    traj["iterations"] = traj["iterations"].astype(np.int32)
    last_iter = traj["iterations"].max()
    if last_iter <= 290:
        continue
    births_count = 0
    births = 0
    old_births = 0
    for it, t in traj.groupby(["iterations"]):
        if births_count == 0 or it == last_iter:
            max_dist = t["distance"].max()
            t["shape.label"] = shape_clf.predict(np.array([process_dynamic_posture(row).flatten() for _, row in t.iterrows()]))
            t["behaviour.label"] = gait_clf.predict(np.array([parse_compressed_spectrum(row) for _, row in t.iterrows()]))
            t["all.label"] = t.apply(lambda row: row["behaviour.label"] + row["shape.label"] * 4, axis=1)
            shape_counts = t["shape.label"].value_counts()
            behaviour_counts = t["behaviour.label"].value_counts()
            all_counts = t["all.label"].value_counts()
            shape_median = 1 / ((shape_counts / len(t)) ** 2).sum()
            behaviour_median = 1 / ((behaviour_counts / len(t)) ** 2).sum()
            all_median = 1 / ((all_counts / len(t)) ** 2).sum()
            df.loc[df_num] = [births, evolver, controller, run, shape_median, behaviour_median, all_median, max_dist]
            births += 1000
            df_num += 1
        births_count += int(t["births"].median()) - old_births
        if births_count >= 1000:
            births_count = 0
        old_births = int(t["births"].median())
    if last_iter > 250 and len(t) >= 50:
        print(len(t))
        best = t[t["distance"] == max_dist]
        if len(best) > 1:
            best = best.sample(1)
        bests.loc[best_num] = [evolver, controller, run, best["distance"].item(), best["shape.dynamic"], best["compressed.frequency"], best["shape.label"],
                               best["behaviour.label"], best["all.label"], ""]
        best_num += 1
        for _, row in t.iterrows():
            lasts.loc[last_num] = [evolver, controller, run, row["distance"], row["shape.dynamic"], row["compressed.frequency"], row["shape.label"],
                                       row["behaviour.label"], row["all.label"], shape_median, 
                                       behaviour_median, all_median, ""]
            last_num += 1