In [1]:
import os
import re
from functools import reduce

import pandas as pd

import general_rulings_map

pd.set_option('display.max_rows', 500)

In [2]:
master = pd.read_excel('wingspan-card-list.xlsx', sheet_name='Birds')
bonus = pd.read_excel('wingspan-note-list.xlsx', sheet_name='Bonus')
goals = pd.read_excel('wingspan-card-list.xlsx', sheet_name='Goals')
note = pd.read_excel('wingspan-note-list.xlsx', sheet_name='Birds')
parameters = pd.read_excel('wingspan-note-list.xlsx', sheet_name='Parameters', index_col=0)

In [3]:
expansion_order = {
    'originalcore': 0,
    'swiftstart': 0,
    'core': 0,
    'european': 0,
    'oceania': 1,
    'asia': 2,
    'promoAsia': 3,
    'promoCA': 4,
    'promoEurope': 5,
    'promoNZ': 6,
    'promoUK': 7,
    'promoUS': 8
}

def sort_key(x):
    if x.name == 'Set':
        return x.map(expansion_order)
    return x

master.dropna(subset=['Common name'], inplace=True)
master.sort_values(by=['Set', 'Common name'], inplace=True, ignore_index=True, key=sort_key)
master['id'] = master.index + 2
master['Common name'] = master['Common name'].map(lambda s: s.strip())
master['Native name'] = note['Native name']
master['Note'] = note['Note']
master['Nest type'].loc[pd.isna(master['Nest type'])] = 'none'

bonus.sort_values(by=['Set', 'Bonus card'], inplace=True, ignore_index=True, key=sort_key)
bonus['id'] = bonus.index + 1000

goals.sort_values(by=['Set', 'Goal'], inplace=True, ignore_index=True, key=sort_key)
goals['id'] = goals.index + 2000

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  master['Nest type'].loc[pd.isna(master['Nest type'])] = 'none'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  

In [4]:
master_ids = reduce(lambda acc, val: {**acc, val[1]['Common name']: val[1]['id']}, master.iterrows(), {})
bonus_ids = reduce(lambda acc, val: {**acc, val[1]['Bonus card']: val[1]['id']}, bonus.iterrows(), {})
ids = {**master_ids, **bonus_ids}

def transform_links(link: str):
    name = re.findall(r' applink="/card/([^"]+)"', link)[0]
    return f' applink="/card/{ids[name]}"' if name in ids else ''

rulings = pd.read_csv('Wingspan - Rulings.tsv', sep='\t', header=None, names=['id', 'general', 'specific', 'text', 'source'])
rulings['text'] = rulings.text.map(lambda t: re.sub(r'\\textbf\{([^}]+)\}', r'<strong applink="/card/\1">\1</strong>', re.sub(r'\\textit\{([^}]+)\}', r'<i>\1</i>', re.sub(r"``", r'<i>"', re.sub(r"''", r'"</i>', t)))))
rulings['text'] = rulings.text.map(lambda text: reduce(lambda acc, val: acc.replace(val, transform_links(val)), [x.group() for x in re.finditer( r' applink="/card/([^"]+)"', text)], text))

general = rulings[~pd.isna(rulings['general'])].drop(['specific'], axis=1)
general['general'] = general.general.map(lambda t: re.sub(r'\$\\Rightarrow\$', '➔', t))
specific = rulings[~pd.isna(rulings['specific'])].drop(['general'], axis=1)
specific['specific'] = specific['specific'].map(lambda s: s.strip())
grouped = specific.groupby(by='specific').apply(lambda group: list(map(lambda t: {'text': t[0], 'source': t[1]}, zip(group['text'], group['source']))))

  grouped = specific.groupby(by='specific').apply(lambda group: list(map(lambda t: {'text': t[0], 'source': t[1]}, zip(group['text'], group['source']))))


In [5]:
vals =  list(master['Common name'].values) + list(bonus['Bonus card'].values)
list(filter(lambda g: g not in vals, grouped.index))

['Greater Prairie Chicken']

In [6]:
general_dict = reduce(lambda acc, val: {**acc, val: []}, list(master['Common name']) + list(bonus['Bonus card']), {})
rule_counts = {}

for i, rule in general.iterrows():
    if rule['id'] in general_rulings_map.rulings:
        rule_counter = 0
        for j, row in master.iterrows():
            if general_rulings_map.rulings[rule['id']](row):
                rule_counter += 1
                general_dict[master.loc[j, 'Common name']] += [{'id': rule['id'], 'text': rule['text'], 'source': rule['source']}]
        rule_counts[rule['id']] = rule_counter
    else:
        print(f'Rule {rule["id"]} not yet implemented')

for rules in general_dict.values():
    rules.sort(key=lambda key: rule_counts[key['id']])
    for rule in rules:
        del rule['id']

rule_counts

{'02c': 0,
 '02g': 63,
 '03a': 4,
 '20190122': 0,
 '20190205': 0,
 '20190313': 0,
 '20190601': 93,
 '20190908': 8,
 '20191010': 10,
 '20191202': 0,
 '20191203c': 0,
 '20200109a': 0,
 '20200208': 0,
 '2020022b': 4,
 '20200330': 0,
 '20200404': 413,
 '20200511': 8,
 '20200712': 0,
 '20200716a': 3,
 '20200716b': 14,
 '20201003': 0,
 '20201009': 0,
 '20201116a': 0,
 '20201117': 68,
 '20201211': 0,
 '20210101': 0,
 '20210199a': 0,
 '20210199b': 0,
 '20210206': 4,
 '20210318': 3}

In [7]:
general.reset_index(drop=True, inplace=True)
general.drop(['id'], axis=1, inplace=True)
general.columns = ['name', 'text', 'source']
master['rulings'] = master['Common name'].map(lambda name: grouped[name] if name in grouped else [])
master['additionalRulings'] = master['Common name'].map(lambda name: general_dict[name])
bonus['rulings'] = bonus['Bonus card'].map(lambda name: grouped[name] if name in grouped else [])
bonus['%'] = bonus['%'].map(lambda p: int(p) if type(p) == float else p)

In [8]:
master.sort_values(by='Common name', inplace=True)
bonus.sort_values(by='Bonus card', inplace=True)

In [9]:
data_dir = os.path.join((".." if os.getcwd().endswith('scripts') else "."), "src/assets/data")

master.to_json(os.path.join(data_dir, 'master.json'), orient='records', indent=2)
bonus.to_json(os.path.join(data_dir, 'bonus.json'), orient='records', indent=2)
general.to_json(os.path.join(data_dir, 'general.json'), orient='index', indent=2)
goals.to_json(os.path.join(data_dir, 'goals.json'), orient='records', indent=2)
parameters.to_json(os.path.join(data_dir, 'parameters.json'), orient='index', indent=2)