## To Do
* Add more modes (Typhoon, Valcano, 750, no750, 375, Classic, Onion)
* Try NN based embeddings
* Make something available on the web (1st iteration, maybe with Google Forms or Sheets)
* Ping Discord Channel for feedback
* Do a proper analysis. Show means and stats for different styles.
* Give a proper write-up for how it all works

In [1]:
import numpy as np
import pandas as pd
from numpy.linalg import norm
from random import randint, seed
from time import time

from data import load_data, load_party_embeddings, save_party_embeddings
from embeddings import calculate_party_embeddings
from experiment import run_trials
from generate_possible_jobs import generate_possible_parties
from select_parties import select_parties_by_embeddings, select_parties_randomly

seed(a=None)  # Initialize the random seed

In [2]:
# Set some parameters
run_style = "Meteor"
duplicates = False
equip_factor = 0.5
should_calculate_party_embeddings = True
broken_jobs = ["Summoner", "Black Mage", "Chemist"]
if duplicates:
    party_embeddings_filename = f"data/embeddings_{run_style.lower()}_duplicates_eq{equip_factor}.csv"
else:
    party_embeddings_filename = f"data/embeddings_{run_style.lower()}_eq{equip_factor}.csv"

In [3]:
# Load data for each job
df_jobs, stat_cols = load_data("data_jobs/job_data_embeddings.csv")

In [4]:
# Generate all possible parties
start = time()
valid_parties = generate_possible_parties(run=run_style, df_jobs=df_jobs, duplicates=duplicates)
stop = time()
print(f"Needed {stop-start} seconds.")

Needed 0.4166896343231201 seconds.


In [5]:
# Calculate or load the party embeddings
start = time()
if should_calculate_party_embeddings:
    print("Calculating party embeddings...")
    valid_parties_embeddings = calculate_party_embeddings(valid_parties, df_jobs, stat_cols, broken_jobs, equip_factor)
    save_party_embeddings(party_embeddings_filename, valid_parties_embeddings)
else:
    print("Loading party embeddings...")
    valid_parties_embeddings = load_party_embeddings(party_embeddings_filename)
stop = time()
print(f"Needed {stop-start} seconds.")

Calculating party embeddings...
On 0 / 175560
On 10000 / 175560
On 20000 / 175560
On 30000 / 175560
On 40000 / 175560
On 50000 / 175560
On 60000 / 175560
On 70000 / 175560
On 80000 / 175560
On 90000 / 175560
On 100000 / 175560
On 110000 / 175560
On 120000 / 175560
On 130000 / 175560
On 140000 / 175560
On 150000 / 175560
On 160000 / 175560
On 170000 / 175560
Needed 1282.0336446762085 seconds.


In [6]:
# Try selecting jobs
start = time()
selected_parties = select_parties_by_embeddings(valid_parties_embeddings, num_parties=5, eps=10.0)
#selected_parties = select_parties_randomly(valid_parties_embeddings, num_parties=5, eps=10.0)
stop = time()
print(f"Needed {stop-start} seconds.")

for idx, party_tuple in enumerate(selected_parties):
    print(f"{idx} {party_tuple[0]}")

Needed 12.001895904541016 seconds.
0 Geomancer,Freelancer,Blue Mage,Bard
1 Black Mage,Samurai,Berserker,Mime
2 Mystic Knight,Beastmaster,Ninja,Dancer
3 Red Mage,Dancer,Time Mage,Bard
4 Berserker,White Mage,Knight,Thief


In [None]:
# Try some trials
trials = run_trials(valid_parties_embeddings, num_parties=5, num_trials=1000, eps=4.0, 
                    selector=select_parties_by_embeddings)

# Print the results (nicely)
if False:
    print("Mean of comparison matrices is")
    print(sum(trial[1] for trial in trials) / len(trials))
    for ps, m in trials:
        for p in ps:
            print(p)
        print(m)

In [None]:
# Write to output file
output = []
for t in trials:
    output.append(",".join(t[0]).split(","))
df_output = pd.DataFrame(output)
if duplicates:
    df_output.to_csv(f"output/{run_style.lower()}_duplicates.csv", index=False, header=False)
else:
    df_output.to_csv(f"output/{run_style.lower()}.csv", index=False, header=False)