## To Do
* Compare to randomly generated parties
* Clean up the code
* Add more modes (Normal, Typhoon, Valcano, Duplicates, 750, no750, 375, Classic?, Onion?)
* Try NN based embeddings
* Make something available on the web (1st iteration, maybe with Google Forms or Sheets)
* Ping Discord Channel for feedback

In [1]:
from copy import copy
import numpy as np
import pandas as pd
from numpy.linalg import norm
from random import randint, seed
from time import time

from embeddings import calculate_job_embedding, calculate_stats_embedding
from generate_possible_jobs import generate_possible_jobs

seed(a=None)  # Initialize the random seed

In [2]:
df_raw = pd.read_csv("job_data_embeddings.csv", index_col="Job")
#print(df_raw)

# Normalize stat values
stat_cols = ["Strength", "Agility", "Vitality", "Magic"]
df_jobs = df_raw.copy()
#for col in stat_cols:
#    df_jobs[col] = df_raw[col] / abs(df_raw[col]).max()
#print(df_jobs)

In [3]:
# Generate all possible parties
start = time()
valid_parties = generate_possible_jobs(style="Meteor", df_jobs=df_jobs, stat_cols=stat_cols)
stop = time()
print(f"Needed {stop-start} seconds.")

Needed 14.025873899459839 seconds.


In [4]:
def calculate_embeddings(valid_parties, df_jobs, stat_cols, equip_factor=1.0):
    """ Go through each valid party and attach additional embeddings """

    counter = 0
    valid_parties_embeddings = []

    # For the stats embedding, calculate the max values of each stat
    # Divide by 4 to keep the values between -1 and 1
    max_values = abs(df_jobs[stat_cols]).max() * 4.0
    
    for party_str, _ in valid_parties:

        if counter % 10000 == 0:
            print(f"On {counter} / {len(valid_parties)}")
        counter += 1

        # Calculate the style and equipment embeddings
        chosen_party = party_str.split(",")
        style_equip_embedding = calculate_job_embedding(chosen_party, df_jobs, stat_cols, equip_factor)

        # Calculate the stats embeddings
        stats_embedding = calculate_stats_embedding(chosen_party, df_jobs, stat_cols, max_values)
        
        # Put the embeddings together
        valid_parties_embeddings.append((party_str, np.concatenate([stats_embedding, style_equip_embedding])))

    return valid_parties_embeddings

def embeddings_to_dataframe(embeddings):
    """ Converts embeddings from valid parties to a dataframe. Saving and loading is easier. """
    
    df = pd.DataFrame(data=[t[1] for t in valid_parties_embeddings], index=[t[0] for t in valid_parties_embeddings])
    return df

def dataframe_to_tuple_array(df_embeddings):
    tuple_array = []
    for idx, row in df_embeddings.iterrows():
        tuple_array.append((idx, row.to_numpy()))
    return tuple_array


In [5]:
equip_factor = 0.5

start = time()
valid_parties_embeddings = calculate_embeddings(valid_parties, df_jobs, stat_cols, equip_factor)
df_embeddings = embeddings_to_dataframe(valid_parties_embeddings)
df_embeddings.to_csv(f"embeddings_meteor_duplicates_eq{equip_factor}.csv")
stop = time()
print(f"Needed {stop-start} seconds.")

On 0 / 234256
On 10000 / 234256
On 20000 / 234256
On 30000 / 234256
On 40000 / 234256
On 50000 / 234256
On 60000 / 234256
On 70000 / 234256
On 80000 / 234256
On 90000 / 234256
On 100000 / 234256
On 110000 / 234256
On 120000 / 234256
On 130000 / 234256
On 140000 / 234256
On 150000 / 234256
On 160000 / 234256
On 170000 / 234256
On 180000 / 234256
On 190000 / 234256
On 200000 / 234256
On 210000 / 234256
On 220000 / 234256
On 230000 / 234256
Needed 1033.2380809783936 seconds.


In [6]:
valid_parties_embeddings[0]

('Bard,Bard,Bard,Bard',
 array([-0.30769231,  0.5       , -0.34615385,  0.33333333,  0.        ,
         0.        ,  0.        ,  0.25      ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.        ,
         0.        ,  0.        ,  0.25      ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.        ,
         0.        ,  0.75      ,  0.        ,  0.5       ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.5       ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  1.   

In [None]:
df_embeddings = embeddings_to_dataframe(valid_parties_embeddings)
df_embeddings.to_csv("embeddings_meteor_duplicates.csv")

In [None]:
df_embeddings = pd.read_csv("embeddings_meteor_duplicates.csv", index_col=0)
valid_parties_embeddings = dataframe_to_tuple_array(df_embeddings)

In [7]:
len(valid_parties_embeddings)

234256

In [8]:
def select_jobs(valid_parties, num_parties=10, eps=1.0):

    available_parties = copy(valid_parties)  # Maybe a copy is too much...
    selected_parties = []
    unavailable_parties = []
    #still_available_parties = []

    if num_parties > len(valid_parties):
        num_parties = len(valid_parties)
        print(f"Notice: num_parties was larger than the number of valid parties. Setting num_parties to {num_parties}.")
    
    for idx_party in range(0, num_parties):

        # Select a party
        chosen_party_idx = randint(0, len(available_parties)-1)
        selected_parties.append(available_parties[chosen_party_idx])
        available_parties[chosen_party_idx] = available_parties[-1]
        available_parties.pop()

        #print(selected_parties)
        
        # Organize the available parties by whether they are close to the chosen party or not
        close_parties, far_parties = organize_parties(selected_parties[-1][1], available_parties, eps)
        available_parties = far_parties
        unavailable_parties += close_parties

        # Make sure there there are still parties available. If not, decrease eps.
        if len(selected_parties) != len(valid_parties):
            while len(available_parties) == 0:
                eps *= 0.8
                available_parties = unavailable_parties  # Make all remaining parties available again
                unavailable_parties = []
    
                print("Notice: Available parties are too close to selected parties.")
                print(f"Trying eps = {eps} for party {idx_party+1}")
                
                # Make parties unavailable again if they are too close to an already selected party
                for selected_party in selected_parties:
                    close_parties, far_parties = organize_parties(selected_party[1], available_parties, eps)
                    available_parties = far_parties
                    unavailable_parties += close_parties

    return selected_parties
        

def organize_parties(chosen_party_embedding, available_parties, eps=1.0):
    close_parties = []
    far_parties = []

    for party_idx, party_embedding in available_parties:
        if norm(chosen_party_embedding - party_embedding, ord=2) < eps:
            close_parties.append((party_idx, party_embedding))
        else:
            far_parties.append((party_idx, party_embedding))

    return close_parties, far_parties

In [9]:
start = time()
selected_parties = select_jobs(valid_parties_embeddings, num_parties=5, eps=10.0)
stop = time()
print(f"Needed {stop-start} seconds.")

Notice: Available parties are too close to selected parties.
Trying eps = 8.0 for party 1
Notice: Available parties are too close to selected parties.
Trying eps = 6.4 for party 1
Notice: Available parties are too close to selected parties.
Trying eps = 5.120000000000001 for party 1
Notice: Available parties are too close to selected parties.
Trying eps = 4.096000000000001 for party 1
Notice: Available parties are too close to selected parties.
Trying eps = 3.276800000000001 for party 2
Notice: Available parties are too close to selected parties.
Trying eps = 2.621440000000001 for party 4
Needed 7.088461399078369 seconds.


In [10]:
for idx, party_tuple in enumerate(selected_parties):
    print(f"{idx} {party_tuple[0]}")

0 Black Mage,Ninja,Thief,Thief
1 Freelancer,Monk,Monk,Berserker
2 Bard,Geomancer,Time Mage,Time Mage
3 Samurai,Monk,Monk,Dragoon
4 Knight,Bard,White Mage,Chemist


In [11]:
def generate_comparison_matrix(selected_parties):

    comparison_matrix = np.zeros((len(selected_parties), len(selected_parties)), dtype=float)
    
    for row_idx, row_tuple in enumerate(selected_parties):
        for col_idx, col_tuple in enumerate(selected_parties):
            _, embedding_row = row_tuple
            _, embedding_col = col_tuple
            comparison_matrix[row_idx][col_idx] = norm(embedding_row - embedding_col, ord=2)
    return comparison_matrix

In [12]:
def run_trials(valid_parties_embeddings, num_parties, num_trials, eps):
    trials = []
    for t in range(num_trials):
        print(f"Trial {t} #######")
        selected_parties = select_jobs(valid_parties_embeddings, num_parties, eps)
        comparison_matrix = generate_comparison_matrix(selected_parties)
        
        trials.append(([p[0] for p in selected_parties], comparison_matrix))
    return trials

In [17]:
trials = run_trials(valid_parties_embeddings, num_parties=5, num_trials=5, eps=3.0)

Trial 0 #######
Notice: Available parties are too close to selected parties.
Trying eps = 2.4000000000000004 for party 4
Trial 1 #######
Notice: Available parties are too close to selected parties.
Trying eps = 2.4000000000000004 for party 5
Trial 2 #######
Notice: Available parties are too close to selected parties.
Trying eps = 2.4000000000000004 for party 5
Trial 3 #######
Notice: Available parties are too close to selected parties.
Trying eps = 2.4000000000000004 for party 5
Trial 4 #######
Notice: Available parties are too close to selected parties.
Trying eps = 2.4000000000000004 for party 4


In [18]:
trials

[(['Mime,Summoner,Berserker,White Mage',
   'Mime,Dragoon,Samurai,Dragoon',
   'Knight,Ninja,Berserker,Dragoon',
   'White Mage,Thief,Freelancer,Chemist',
   'Geomancer,Bard,Thief,Beastmaster'],
  array([[0.        , 3.04265368, 3.03942328, 3.364575  , 2.82537783],
         [3.04265368, 0.        , 3.24514628, 3.46657724, 2.61328598],
         [3.03942328, 3.24514628, 0.        , 3.06721928, 3.58164933],
         [3.364575  , 3.46657724, 3.06721928, 0.        , 3.65381455],
         [2.82537783, 2.61328598, 3.58164933, 3.65381455, 0.        ]])),
 (['Time Mage,Summoner,Dragoon,Berserker',
   'Beastmaster,Ranger,Ranger,Ranger',
   'Monk,Ranger,Knight,Summoner',
   'White Mage,Freelancer,Freelancer,Dragoon',
   'Ranger,Thief,Freelancer,Ninja'],
  array([[0.        , 3.04824685, 3.01508693, 3.71056897, 3.03144025],
         [3.04824685, 0.        , 3.48379969, 3.63454141, 3.23445306],
         [3.01508693, 3.48379969, 0.        , 3.33467909, 3.37793202],
         [3.71056897, 3.63454141, 

## Testing

In [None]:
# Bards vs physical
print(norm(valid_jobs[0][1] - valid_jobs[97145][1], ord=2))

# Bards vs almost bards
print(norm(valid_jobs[0][1] - valid_jobs[1][1], ord=2))