In [1]:
# TODO account for main stat of 5-star
# nbd cuz false positives (of good runes) are unlikely:
# they're dragged down by low subs anyways

In [2]:
import numpy as np
import pandas as pd

In [3]:
weights = {'sub_acc': 1, 'sub_res': 1, 'sub_atkp': 1, 'sub_atkf': 0.5, 'sub_defp': 1, 'sub_deff': 0.5, 'sub_hpp': 1, 'sub_hpf': 0.5, 'sub_spd': 2, 'sub_crate': 1, 'sub_cdmg': 1}
weights_off = {'sub_acc': 1, 'sub_res': 0.5, 'sub_atkp': 1, 'sub_atkf': 0.5, 'sub_defp': 0.5, 'sub_deff': 0.25, 'sub_hpp': 0.5, 'sub_hpf': 0.25, 'sub_spd': 2, 'sub_crate': 1, 'sub_cdmg': 1}
weights_def = {'sub_acc': 1, 'sub_res': 1, 'sub_atkp': 1, 'sub_atkf': 0.5, 'sub_defp': 1, 'sub_deff': 0.5, 'sub_hpp': 1, 'sub_hpf': 0.5, 'sub_spd': 2, 'sub_crate': 1, 'sub_cdmg': 1}

In [4]:
df = pd.read_csv('runes-data.csv', sep=';')

In [5]:
df = df.drop(columns=['s1_t',	's1_v',	's1_data',
's2_t', 's2_v', 's2_data',
	's3_t',	's3_v',	's3_data',
    	's4_t',	's4_v',	's4_data',
		'DT_RowId',	'id',	'unique_id',	'monster',	'originID', 'originName','efficiency', 'max_efficiency', 'locked'])
df = df.replace('-', np.nan)

In [6]:
cols_original = df.columns

In [7]:
# convert inherent stats to eff values, 0-1.
# keep separate because these can't be increased like normal stats

inherent_label_to_sub_label = {'ACC': 'sub_acc', "RES": "sub_res", "ATK%": "sub_atkp", "ATK flat": "sub_atkf",  "DEF%": "sub_defp", "DEF flat": "sub_deff",  "HP%": "sub_hpp", "HP flat": "sub_hpf", "SPD": "sub_spd", "CRate": "sub_crate", "CDmg": "sub_cdmg"}

df['i_t_clean'] = df['i_t'].replace(inherent_label_to_sub_label)

substats_max = {'sub_acc':8, 'sub_res': 8, 'sub_atkp': 8, 'sub_atkf': 20, 'sub_defp': 8, 'sub_deff': 20, 'sub_hpp': 8, 'sub_hpf': 375, 'sub_spd': 6, 'sub_crate': 6, 'sub_cdmg': 7}

df['inh_norm'] = df['i_v'] / (df['i_t_clean'].replace(substats_max)) * (df['i_t_clean'].replace(weights)) 

In [8]:
# convert substats to eff values, 0-5
substats_max = {'sub_acc':8, 'sub_res': 8, 'sub_atkp': 8, 'sub_atkf': 20, 'sub_defp': 8, 'sub_deff': 20, 'sub_hpp': 8, 'sub_hpf': 375, 'sub_spd': 6, 'sub_crate': 6, 'sub_cdmg': 7}

for label in substats_max:
    df[label] = pd.to_numeric(df[label])

for label in substats_max:
    df[label+'_norm'] = df[label]/substats_max[label]*weights[label]


In [9]:
#specify the columns to sum
cols = [str(label+'_norm') for label in substats_max]
cols.append('inh_norm')
#find sum of columns specified 
df['sub_sum_norm'] = df[cols].sum(axis=1)

In [10]:
df['num_powerup_left'] = np.maximum(0, np.subtract(4, np.floor_divide(df['level'], 3)))
df['num_powerup_used'] = np.minimum(4, np.floor_divide(df['level'], 3))


df['num_powerup_incsub'] = df['quality'].map({'Unknown': 0, 'Rare': 2, 'Hero': 3, 'Legend': 4})

df['num_powerup_incsub_left'] = np.maximum(np.subtract(df['num_powerup_incsub'], df['num_powerup_used']) , 0)
df['num_powerup_newsub_left'] = np.subtract(df['num_powerup_left'], df['num_powerup_incsub_left'] )

In [11]:
# From increasing current substats:
# if spd is an increasable substat, then assume all rolls go there
# otherwise assume all rolls go to not-good stats
# TODO this roll could possibly only go to a bad stat (as per norm), not a good one. account for this

df['sub_inc_max_norm'] = (df['num_powerup_incsub_left']*2).where(~df['sub_spd'].isna(), df['num_powerup_incsub_left'])

# assume these are all going to bad stats.
# (even if speed is rollable, probability of going to spd is...low and not worth)
# TODO consider like max vs expected. this straddles the line somewhere
df['sub_new_max_norm'] = df['num_powerup_newsub_left']

In [12]:
# and now sum for the "max" roll eff
cols = ['sub_sum_norm', 'sub_inc_max_norm', 'sub_new_max_norm']

df['tot_max_norm'] = df[cols].sum(axis=1)

In [13]:
# for output
df['is_odd'] = df['slot'].apply(lambda x: (x%2!=0))

certain_set = ['Blade', 'Fatal', 'Rage', 'Swift', 'Violent', 'Will']
df['is_certain_set'] = df['set'].apply(lambda x: x in certain_set)

df['tot_max_norm'] = df['tot_max_norm'].round(3)

In [14]:
pd.options.display.max_columns = None
df.head()
# df[df['monster_n']=='Okeanos']

Unnamed: 0,monster_n,ancient,set,slot,grade,level,m_t,m_v,i_t,i_v,sub_res,sub_cdmg,sub_atkf,sub_acc,sub_atkp,sub_defp,sub_deff,sub_hpp,sub_hpf,sub_spd,sub_crate,quality,i_t_clean,inh_norm,sub_acc_norm,sub_res_norm,sub_atkp_norm,sub_atkf_norm,sub_defp_norm,sub_deff_norm,sub_hpp_norm,sub_hpf_norm,sub_spd_norm,sub_crate_norm,sub_cdmg_norm,sub_sum_norm,num_powerup_left,num_powerup_used,num_powerup_incsub,num_powerup_incsub_left,num_powerup_newsub_left,sub_inc_max_norm,sub_new_max_norm,tot_max_norm,is_odd,is_certain_set
0,Dongbaek,True,Energy,1,6,9,ATK flat,94,,0,,13.0,,,,,,17.0,,12.0,,Hero,,,,,,,,,2.125,,4.0,,1.857143,7.982143,1,3,3.0,0.0,1.0,0.0,1.0,8.982,True,False
1,Dongbaek,True,Energy,2,6,15,DEF%,63,,0,,11.0,,,7.0,,,9.0,,17.0,,Hero,,,,,0.875,,,,1.125,,5.666667,,1.571429,9.238095,0,4,3.0,0.0,0.0,0.0,0.0,9.238,False,False
2,Inventory,False,Energy,2,5,15,SPD,39,HP%,4,13.0,5.0,,17.0,11.0,,,,,,,Legend,sub_hpp,0.5,2.125,1.625,1.375,,,,,,,,0.714286,6.339286,0,4,4.0,0.0,0.0,0.0,0.0,6.339,False,False
3,Dongbaek,True,Energy,3,6,12,DEF flat,118,,0,10.0,,,10.0,,28.0,,,,12.0,,Legend,,,1.25,1.25,,,3.5,,,,4.0,,,10.0,0,4,4.0,0.0,0.0,0.0,0.0,10.0,True,False
4,Inventory,False,Energy,3,6,12,DEF flat,118,,0,5.0,,,15.0,,13.0,,,,14.0,,Legend,,,1.875,0.625,,,1.625,,,,4.666667,,,8.791667,0,4,4.0,0.0,0.0,0.0,0.0,8.792,True,False


In [15]:
cols_export = cols_original
cols_export = cols_export.append(pd.Index(['tot_max_norm', 'is_odd', 'is_certain_set', 'num_powerup_incsub_left']))
df_export = df[cols_export]

In [16]:
# using datetime module
import datetime
  
# ct stores current time
ct = datetime.datetime.now()
ct = int(ct.timestamp())
df_export.to_csv(f'runes-data-out-{str(ct)}.csv', sep='\t')

In [17]:
# TODO custom sort keys??