## To Do
* Compare to randomly generated parties
* Clean up the code
* Add more modes (Normal, Typhoon, Valcano, Duplicates, 750, no750, 375, Classic?, Onion?)
* Try NN based embeddings
* Make something available on the web (1st iteration, maybe with Google Forms or Sheets)
* Ping Discord Channel for feedback

In [1]:
from copy import copy
import numpy as np
import pandas as pd
from numpy.linalg import norm
from random import randint, seed
from time import time

from data import load_data, load_party_embeddings, save_party_embeddings
from embeddings import calculate_party_embeddings
from generate_possible_jobs import generate_possible_jobs

seed(a=None)  # Initialize the random seed

In [2]:
# Set some parameters
equip_factor = 0.5
should_calculate_party_embeddings = True
party_embeddings_filename = f"data/embeddings_meteor_duplicates_eq{equip_factor}.csv"

In [3]:
#df_jobs = pd.read_csv("job_data_embeddings.csv", index_col="Job")
#stat_cols = ["Strength", "Agility", "Vitality", "Magic"]
df_jobs, stat_cols = load_data("job_data_embeddings.csv")

In [4]:
# Generate all possible parties
start = time()
valid_parties = generate_possible_jobs(style="Meteor", df_jobs=df_jobs, stat_cols=stat_cols)
stop = time()
print(f"Needed {stop-start} seconds.")

Needed 10.254088401794434 seconds.


In [5]:
# Calculate or load the party embeddings
start = time()
if should_calculate_party_embeddings:
    print("Calculating party embeddings...")
    valid_parties_embeddings = calculate_party_embeddings(valid_parties, df_jobs, stat_cols, equip_factor)
    save_party_embeddings(party_embeddings_filename, valid_parties_embeddings)
else:
    print("Loading party embeddings...")
    valid_parties_embeddings = load_party_embeddings(party_embeddings_filename)
stop = time()
print(f"Needed {stop-start} seconds.")

Calculating party embeddings...
On 0 / 234256
On 10000 / 234256
On 20000 / 234256
On 30000 / 234256
On 40000 / 234256
On 50000 / 234256
On 60000 / 234256
On 70000 / 234256
On 80000 / 234256
On 90000 / 234256
On 100000 / 234256
On 110000 / 234256
On 120000 / 234256
On 130000 / 234256
On 140000 / 234256
On 150000 / 234256
On 160000 / 234256
On 170000 / 234256
On 180000 / 234256
On 190000 / 234256
On 200000 / 234256
On 210000 / 234256
On 220000 / 234256
On 230000 / 234256
Needed 1015.4792678356171 seconds.


In [None]:
!!!! Continue refactoring from here! !!!!

In [None]:
def select_jobs(valid_parties, num_parties=10, eps=1.0):

    available_parties = copy(valid_parties)  # Maybe a copy is too much...
    selected_parties = []
    unavailable_parties = []
    #still_available_parties = []

    if num_parties > len(valid_parties):
        num_parties = len(valid_parties)
        print(f"Notice: num_parties was larger than the number of valid parties. Setting num_parties to {num_parties}.")
    
    for idx_party in range(0, num_parties):

        # Select a party
        chosen_party_idx = randint(0, len(available_parties)-1)
        selected_parties.append(available_parties[chosen_party_idx])
        available_parties[chosen_party_idx] = available_parties[-1]
        available_parties.pop()

        #print(selected_parties)
        
        # Organize the available parties by whether they are close to the chosen party or not
        close_parties, far_parties = organize_parties(selected_parties[-1][1], available_parties, eps)
        available_parties = far_parties
        unavailable_parties += close_parties

        # Make sure there there are still parties available. If not, decrease eps.
        if len(selected_parties) != len(valid_parties):
            while len(available_parties) == 0:
                eps *= 0.8
                available_parties = unavailable_parties  # Make all remaining parties available again
                unavailable_parties = []
    
                print("Notice: Available parties are too close to selected parties.")
                print(f"Trying eps = {eps} for party {idx_party+1}")
                
                # Make parties unavailable again if they are too close to an already selected party
                for selected_party in selected_parties:
                    close_parties, far_parties = organize_parties(selected_party[1], available_parties, eps)
                    available_parties = far_parties
                    unavailable_parties += close_parties

    return selected_parties
        

def organize_parties(chosen_party_embedding, available_parties, eps=1.0):
    close_parties = []
    far_parties = []

    for party_idx, party_embedding in available_parties:
        if norm(chosen_party_embedding - party_embedding, ord=2) < eps:
            close_parties.append((party_idx, party_embedding))
        else:
            far_parties.append((party_idx, party_embedding))

    return close_parties, far_parties

In [None]:
start = time()
selected_parties = select_jobs(valid_parties_embeddings, num_parties=5, eps=10.0)
stop = time()
print(f"Needed {stop-start} seconds.")

In [None]:
for idx, party_tuple in enumerate(selected_parties):
    print(f"{idx} {party_tuple[0]}")

In [None]:
def generate_comparison_matrix(selected_parties):

    comparison_matrix = np.zeros((len(selected_parties), len(selected_parties)), dtype=float)
    
    for row_idx, row_tuple in enumerate(selected_parties):
        for col_idx, col_tuple in enumerate(selected_parties):
            _, embedding_row = row_tuple
            _, embedding_col = col_tuple
            comparison_matrix[row_idx][col_idx] = norm(embedding_row - embedding_col, ord=2)
    return comparison_matrix

In [None]:
def run_trials(valid_parties_embeddings, num_parties, num_trials, eps):
    trials = []
    for t in range(num_trials):
        print(f"Trial {t} #######")
        selected_parties = select_jobs(valid_parties_embeddings, num_parties, eps)
        comparison_matrix = generate_comparison_matrix(selected_parties)
        
        trials.append(([p[0] for p in selected_parties], comparison_matrix))
    return trials

In [None]:
trials = run_trials(valid_parties_embeddings, num_parties=5, num_trials=5, eps=3.0)

In [None]:
trials

## Testing

In [None]:
# Bards vs physical
print(norm(valid_jobs[0][1] - valid_jobs[97145][1], ord=2))

# Bards vs almost bards
print(norm(valid_jobs[0][1] - valid_jobs[1][1], ord=2))