# Assignments for Coalition
This notebook produces some parties using the Python code. It also produces some output files (embeddings, party file) so that data can go onto the website.
The cell "Set some parameters" is the one most likely to be changed during usage, as it sets the parameters for generating parties.

In [None]:
import numpy as np
import pandas as pd
from numpy.linalg import norm
from random import randint, seed
from time import time

from data import load_data, load_party_embeddings, save_party_embeddings
from embeddings import calculate_party_embeddings
from experiment import run_trials
from generate_possible_jobs import generate_possible_parties
from select_parties import select_parties_by_embeddings, select_parties_randomly

seed(a=None)  # Initialize the random seed

In [None]:
# Set some parameters
run_style = "Random"
duplicates = False
equip_factor = 0.5
should_calculate_party_embeddings = True
broken_jobs = ["Summoner", "Black Mage", "Chemist"]
if duplicates:
    party_embeddings_filename = f"data/embeddings_{run_style.lower()}_duplicates_eq{equip_factor}.csv"
else:
    party_embeddings_filename = f"data/embeddings_{run_style.lower()}_eq{equip_factor}.csv"

In [None]:
# Load data for each job
df_jobs, stat_cols = load_data("data_jobs/job_data_embeddings.csv")

In [None]:
# Generate all possible parties
start = time()
valid_parties = generate_possible_parties(run=run_style, df_jobs=df_jobs, duplicates=duplicates)
stop = time()
print(f"Needed {stop-start} seconds.")

In [None]:
# Calculate or load the party embeddings
start = time()
if should_calculate_party_embeddings:
    print("Calculating party embeddings...")
    valid_parties_embeddings = calculate_party_embeddings(valid_parties, df_jobs, stat_cols, broken_jobs, equip_factor)
    save_party_embeddings(party_embeddings_filename, valid_parties_embeddings)
else:
    print("Loading party embeddings...")
    valid_parties_embeddings = load_party_embeddings(party_embeddings_filename)
stop = time()
print(f"Needed {stop-start} seconds.")

In [None]:
# Try selecting jobs
start = time()
selected_parties = select_parties_by_embeddings(valid_parties_embeddings, num_parties=5, eps=10.0)
#selected_parties = select_parties_randomly(valid_parties_embeddings, num_parties=5, eps=10.0)
stop = time()
print(f"Needed {stop-start} seconds.")

for idx, party_tuple in enumerate(selected_parties):
    print(f"{idx} {party_tuple[0]}")

In [None]:
# Try some trials
trials = run_trials(valid_parties_embeddings, num_parties=5, num_trials=1000, eps=4.0, 
                    selector=select_parties_by_embeddings, verbose=True)

# Print the results (nicely)
if False:
    print("Mean of comparison matrices is")
    print(sum(trial[1] for trial in trials) / len(trials))
    for ps, m in trials:
        for p in ps:
            print(p)
        print(m)

In [None]:
# Write to output file
output = []
for t in trials:
    output.append(",".join(t[0]).split(","))
df_output = pd.DataFrame(output)
if duplicates:
    df_output.to_csv(f"output/{run_style.lower()}_duplicates.csv", index=False, header=False)
else:
    df_output.to_csv(f"output/{run_style.lower()}.csv", index=False, header=False)