# Scrap all uniques available from altered API

uniques scraping comes from api.altered.gg<br>
we also scrap all non-unique cards using [altered-scripts repo](https://github.com/chardetm/altered-scripts)<br>


In [1]:
import requests
import re
import polars as pl
# from lib.get_cards_data import get_cards_data   # github package
from dataclasses import dataclass
from dataclasses import field
from IPython.display import clear_output
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math
# from fpdf import FPDF
from PIL import Image
from io import BytesIO
import numpy as np

we use polars dataframe here, because the dataset is quite large (280k+ entries)

In [2]:
dataset = pl.DataFrame(
    schema={
        'id': pl.Utf8,
        'name': pl.Utf8,
        'Mcost': pl.Int32,
        'Rcost': pl.Int32,
        'Fp': pl.Int32,
        'Mp': pl.Int32,
        'Op': pl.Int32,
        'MAIN_EFFECT': pl.Utf8,
        'ECHO_EFFECT': pl.Utf8,
        'raw_value': pl.Float32,
        'effect_value': pl.Float32,
        'faction': pl.Utf8,
        'rarity': pl.Utf8,
        'type': pl.Utf8,
        'subtypes': pl.Utf8,
        'image_path': pl.Utf8,
    }
)

## 1 loading all non-unique cards

In [None]:
# scrap Altered API cards (last 15 seconds)
nonuniq_scrapped = get_cards_data(languages=['en'], include_ks=False)
clear_output()

In [None]:
def clean_text(input):
    if isinstance(input, str):
        text = input
        text = text.replace('[', '').replace(']', '').replace('#', '').replace('(', '').replace(')', '')
        text = text.replace(' ', ' ')
        if len(text) < 3:
            return int(text)
        return text.lower()
    elif isinstance(input, int):
        return input
    else:
        return None

In [None]:
nonuniq_list = []
for _, value in nonuniq_scrapped[0].items():    
    if '_A_' in value['id']:
        continue    # skipping all alternative arts
    
    new_row = pl.DataFrame(
    {
        'id': value['id'],
        'name': value['name']['en'],
        'Mcost': clean_text(value['elements']['MAIN_COST']),
        'Rcost': clean_text(value['elements']['RECALL_COST']),
        'Fp': clean_text(value['elements'].get('FOREST_POWER', 0)),
        'Mp': clean_text(value['elements'].get('MOUNTAIN_POWER', 0)),
        'Op': clean_text(value['elements'].get('OCEAN_POWER', 0)),
        'MAIN_EFFECT': clean_text(value['elements']['MAIN_EFFECT']['en']) if 'MAIN_EFFECT' in value['elements'] else None,
        'ECHO_EFFECT': clean_text(value['elements']['ECHO_EFFECT']['en']) if 'ECHO_EFFECT' in value['elements'] else None,
        'raw_value': 0.0,  # Placeholder, will be calculated later
        'effect_value': 0.0,  # Placeholder, will be calculated later,
        'faction': value['mainFaction'],
        'rarity': value['rarity'],
        'type': value['type'],
        'subtypes': ', '.join(value['subtypes']),
        'image_path': value['imagePath']['en']
    },
    schema={
        'id': pl.Utf8,
        'name': pl.Utf8,
        'Mcost': pl.Int32,
        'Rcost': pl.Int32,
        'Fp': pl.Int32,
        'Mp': pl.Int32,
        'Op': pl.Int32,
        'MAIN_EFFECT': pl.Utf8,
        'ECHO_EFFECT': pl.Utf8,
        'raw_value': pl.Float32,
        'effect_value': pl.Float32,
        'faction': pl.Utf8,
        'rarity': pl.Utf8,
        'type': pl.Utf8,
        'subtypes': pl.Utf8,
        'image_path': pl.Utf8,
    })
    
    dataset = dataset.vstack(new_row)
    
    # replace None with 0 in Fp, Mp, Op columns because View data shows float
    dataset = dataset.with_columns(
        pl.col('Fp').fill_null(0).cast(pl.Int32),
        pl.col('Mp').fill_null(0).cast(pl.Int32),
        pl.col('Op').fill_null(0).cast(pl.Int32)
    )


## 2 Loading all uniques cards from altered api

### option 1 - load an already saved parquet file

In [3]:
dataset = pl.read_parquet("../dataset_pimped.parquet")

### option 2 - scrap all the uniques available

In [None]:
def append_cards_to_list(entry_list, resp_dict):
    for card in resp_dict['hydra:member']:
        if 'MAIN_EFFECT' in card['elements']:
            main_effect = card['elements']['MAIN_EFFECT']
        else:
            main_effect = None
        if 'ECHO_EFFECT' in card['elements']:
            echo_effect = card['elements']['ECHO_EFFECT']
        else:
            echo_effect = None
        
        entry_list.append({
            'id': card['reference'],
            'name': card['name'],
            'Mcost': clean_text(card['elements']['MAIN_COST']),
            'Rcost': clean_text(card['elements']['RECALL_COST']),
            'Fp': clean_text(card['elements']['FOREST_POWER']),
            'Mp': clean_text(card['elements']['MOUNTAIN_POWER']),
            'Op': clean_text(card['elements']['OCEAN_POWER']),
            'MAIN_EFFECT': clean_text(main_effect),
            'ECHO_EFFECT': clean_text(echo_effect),
            'raw_value': 0.0,
            'effect_value': 0.0,
            'faction': card['mainFaction']['reference'],
            'rarity': 'UNIQUE',
            'type': 'CHARACTER',
            'subtypes': None,
            'image_path': card['imagePath'],
        })
    return entry_list

def scrap(name, faction):
    url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D=UNIQUE&translations.name="{name}"'
    headers = {
        # 'Authorization': bearer_token,
        'Accept-Language': 'en-en',
    }
    response = requests.get(url, headers=headers)
    resp_dict = response.json()
    cards_detail = append_cards_to_list([], resp_dict)
    
    cards_per_page = len(resp_dict['hydra:member'])
    n_cards_tot = resp_dict['hydra:totalItems']
    loop_needed = n_cards_tot // cards_per_page # not +1 because we already have the first page
    # print(f'{cards_per_page} cards per page, total cards: {n_cards_tot}\nloop needed: {loop_needed}')

    for i in range(loop_needed):
        url = f'https://api.altered.gg/cards?page={i+2}&factions%5B%5D={faction}&rarity%5B%5D=UNIQUE&translations.name="{name}"'
        response = requests.get(url, headers=headers)
        resp_dict = response.json()

        cards_detail = append_cards_to_list(cards_detail, resp_dict)

    # Create a Polars DataFrame from the extracted details
    cards_df = pl.DataFrame(cards_detail, infer_schema_length=None)
    cards_df = cards_df.sort('Mcost', 'Rcost', descending=[False, False])
    cards_df = cards_df.with_columns([  # make int, being integers
        pl.col('id').cast(pl.Utf8),
        pl.col('name').cast(pl.Utf8),
        pl.col('Mcost').cast(pl.Int32),
        pl.col('Rcost').cast(pl.Int32),
        pl.col('Fp').cast(pl.Int32),
        pl.col('Mp').cast(pl.Int32),
        pl.col('Op').cast(pl.Int32),
        pl.col('MAIN_EFFECT').cast(pl.Utf8),
        pl.col('ECHO_EFFECT').cast(pl.Utf8),
        pl.col('raw_value').cast(pl.Float32),
        pl.col('effect_value').cast(pl.Float32),
        pl.col('faction').cast(pl.Utf8),
        pl.col('rarity').cast(pl.Utf8),
        pl.col('type').cast(pl.Utf8),
        pl.col('subtypes').cast(pl.Utf8),
        pl.col('image_path').cast(pl.Utf8),
    ])

    return cards_df, cards_detail

last 130 minutes !!! (17 minutes if already done that day)

In [None]:
for faction in dataset['faction'].unique().sort():
    print(faction)
    fac_subset = dataset.filter((pl.col('faction') == faction) & (pl.col('type') == 'CHARACTER'))
    for name in fac_subset['name'].unique().sort():
        name_df, _ = scrap(name, faction)
        dataset = pl.concat([dataset, name_df])
        print(f'   {name} ({name_df.shape[0]} scraped)')

# Save the DataFrame to a Parquet file
dataset.write_parquet("dataset.parquet")

## 3. Insights

In [None]:
def calculate_value(input: pl.DataFrame, hand_reserve='hand'):
    # calculate the raw value (cost / power) of all characters in dataset (polars df)
    df = input
    main_value = df.select(
        ((pl.col('Fp') + pl.col('Mp') + pl.col('Op')) / 3 / pl.col('Mcost')).alias('value')
    )
    reserve_value = df.select(
        ((pl.col('Fp') + pl.col('Mp') + pl.col('Op')) / 3 / pl.col('Rcost')).alias('value')
    )
    
    if hand_reserve == 'hand':
        main_value = main_value
    elif hand_reserve == 'reserve':
        main_value = reserve_value
    else:
        main_value = ((main_value + reserve_value) / 2)
    return main_value

# calculate the raw value (power / cost) and save it
df_raw_val = calculate_value(dataset).rename({"value": "raw_value"})
df_raw_Hval = calculate_value(dataset, hand_reserve='hand').rename({"value": "raw_Hand_value"})
df_raw_Rval = calculate_value(dataset, hand_reserve='reserve').rename({"value": "raw_Reserve_value"})
dataset = dataset.with_columns(
    df_raw_val['raw_value'].round(2),
    df_raw_Hval['raw_Hand_value'].round(2),
    df_raw_Rval['raw_Reserve_value'].round(2)
)
# Save the DataFrame to a Parquet file
dataset.write_parquet("dataset.parquet")

### Best Mana difference Unique vs Rare

what are the characters where you can most benefits from uniques mana or reserve costs compared to the rares

In [None]:
for faction in dataset['faction'].unique().sort():
    if faction == 'NE':
        continue
    print(f'{faction}')
    fac_subset = dataset.filter((pl.col('faction') == faction) & (pl.col('type') == 'CHARACTER'))
    for name in fac_subset['name'].unique().sort():
        fac_name_sub = fac_subset.filter(pl.col('name') == name)
        rare = fac_name_sub.filter(pl.col('rarity') == 'RARE')
        rare_Mc = rare['Mcost'].item()
        rare_Rc = rare['Rcost'].item()
        fac_name_sub = fac_name_sub.with_columns([
            (pl.col('Mcost') - rare_Mc).alias('Mcost_diff'),
            (pl.col('Rcost') - rare_Rc).alias('Rcost_diff')
        ])
        min_Mcost_diff = fac_name_sub['Mcost_diff'].min()
        if min_Mcost_diff > -2:
            continue
        fac_name_sub = fac_name_sub.filter(pl.col('Mcost_diff') <= -2)
        print(f"\t{rare['name'].item()} ({rare_Mc}, {rare_Rc}) - Min Mcost_diff: {min_Mcost_diff} mana")
        fac_name_sub = fac_name_sub.sort('raw_value', descending=True)
        print(f"\t\t{list(fac_name_sub['id'][:5])}")
    print('\n')

### Real cost of the effects

first we need to know what is the mean raw value with no effect (or with support effect)

In [None]:
no_main_effect_df = dataset.filter(
    pl.col('MAIN_EFFECT').is_null(),
    pl.col('ECHO_EFFECT').is_null(),
    ((pl.col('rarity') == 'RARE') | (pl.col('rarity') == 'UNIQUE'))
)
mean_raw_value = no_main_effect_df['raw_value'].mean()
print(f"No main & support effect ({no_main_effect_df.shape[0]} rare) - mean raw value: {mean_raw_value:.2f}")

only_echo_effect_df = dataset.filter(
    pl.col('MAIN_EFFECT').is_null(),
    ~pl.col('ECHO_EFFECT').is_null(),
    pl.col('rarity') == 'RARE'
)
print(f"Only support effect for Rares ({only_echo_effect_df.shape[0]} rares) - mean raw value: {only_echo_effect_df['raw_value'].mean():.2f}")

only_echo_effect_df_wunique = dataset.filter(
    pl.col('MAIN_EFFECT').is_null(),
    ~pl.col('ECHO_EFFECT').is_null(),
    pl.col('rarity') == 'UNIQUE'
)
print(f"Only support effect for Uniques ({only_echo_effect_df_wunique.shape[0]} uniques) - mean raw value: {only_echo_effect_df_wunique['raw_value'].mean():.2f}")

In [None]:
triggers = [r'\{j\}', r'\{h\}', r'\{r\}']
effects = ['sabotage.', 'i gain 1 boost']
charracters_df = dataset.filter(pl.col('type') == 'CHARACTER')
for effect in effects:
    for trigger in triggers:
        subrare_df = charracters_df.filter(pl.col('MAIN_EFFECT').str.contains(trigger + ' ' + effect, literal=False), pl.col('rarity') == 'RARE')
        if subrare_df.shape[0] != 0:
            print(f"{trigger + ' ' + effect} ({subrare_df.shape[0]} rares) - mean raw value: {subrare_df['raw_value'].mean():.2f}, real effect cost {mean_raw_value - subrare_df['raw_value'].mean():.2f}")
        
        subunique_df = charracters_df.filter(pl.col('MAIN_EFFECT').str.contains(trigger + ' ' + effect, literal=False), pl.col('rarity') == 'UNIQUE')
        if subunique_df.shape[0] != 0:
            print(f"{trigger + ' ' + effect} ({subunique_df.shape[0]} uniques) - mean raw value: {subunique_df['raw_value'].mean():.2f}")
        
        print('\n')

## 4. Filter uniques

add raw value to all_unique_df

In [6]:
charracters_df = dataset.filter(pl.col('type') == 'CHARACTER')
filtered_df = charracters_df.filter(
    # region LYRA
    (pl.col('faction') == 'LY'),
    
    # region - Hydracaena ---------------------------------------
    # pl.col('name').str.contains('Hydracaena', literal=False),
    # ~pl.col('MAIN_EFFECT').str.contains(r'if', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'draw', literal=False),
    # pl.col('Mcost') <= 7, pl.col('Rcost') <= 7,
    # endregion -------------------------------------------------

    # region remonte cartes en mains ----------------------------
    # pl.col('MAIN_EFFECT').str.contains(r'\{j\} you may return a card.*from your reserve to your hand', literal=False),
    # ~pl.col('name').str.contains('Ouroboros Inkcaster', literal=False),
    # endregion -------------------------------------------------

    # region Big raw value with support effet -1 mana ---------------------------
    # pl.col('ECHO_EFFECT').str.contains(r'the next card you play this afternoon costs \{1\} less', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'draw a card', literal=False),
    # endregion - ---------------------------------------------------------------
    
    # region draw  -------------------------------------------------------
    # ((pl.col('Fp') == 0) + (pl.col('Mp') == 0) + (pl.col('Op') == 0)) >= 2,
    # pl.col('MAIN_EFFECT').str.contains(r'if there are three or more base statistics of 0 among characters you control: draw a card', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'draw a card', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'anchored', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'(when i leave the expedition zone|at dusk) — draw a card', literal=False),
    # (~pl.col('name').str.contains('Ouroboros Inkcaster', literal=False)),
    #endregion -------------------------------------------------------------------

    # region kodama qui s'endort et qui pioche au crépuscule (ou ancré ?)
    (pl.col('MAIN_EFFECT').str.contains(r'i gain asleep.*at dusk.*draw', literal=False)),
    # (pl.col('MAIN_EFFECT').str.contains(r'i gain anchored.*at dusk.*draw', literal=False)),     # trop cher
    #endregion
    
    #endregion

    # region Axiom
    # pl.col('faction') == 'AX',
    # region - hand -> reserve --> reserve -> hand
    # pl.col('MAIN_EFFECT').str.contains(r'\{j\} you may put a card from your hand in reserve. if you do: you may return a card from your reserve to your hand', literal=False),
    # endregion
    # region - hand -> reserve --> reserve -> hand
    # pl.col('MAIN_EFFECT').str.contains(r'i gain asleep.*at dusk.*draw', literal=False),
    # pl.col('MAIN_EFFECT').str.contains(r'a card goes from your hand or deck to reserve.*draw a card', literal=False),
    # endregion
    # endregion
    
    # region GENERAL
    # pl.col('MAIN_EFFECT').str.contains(r'draw three cards', literal=False),
    # endregion

)

# filtered_df = filtered_df.filter(pl.col('Mcost') <= 1)
filtered_df = filtered_df.sort(['raw_HRvalue', 'Mcost'], descending=[True, False])
# filtered_df = filtered_df.sort(['raw_Hand_value', 'Mcost'], descending=[True, False])
print(f'{100 * filtered_df.shape[0] / charracters_df.shape[0]:.2f}% ({filtered_df.shape[0]})')

0.01% (29)


## 5. Filter Non uniques

In [3]:
def get_image_by_id(card_id):
    card = dataset.filter(pl.col('id') == card_id).to_dicts()[0]
    image_url = card['image_path']
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    return img

def plot_all_cards(filtered_df, n_col=4):
    n_row = math.ceil(filtered_df.shape[0] / n_col)
    fig, axes = plt.subplots(n_row, n_col, figsize=(n_col * 3, n_row * 4))
    axes = axes.flatten()
    
    for ax, card_id in zip(axes, filtered_df['id']):
        img = get_image_by_id(card_id)
        img = img.convert('RGB')
        ax.imshow(np.array(img))
        ax.axis('off')
    
    # Hide any remaining empty subplots
    for ax in axes[len(filtered_df['id']):]:
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

In [4]:
nonunique_df = dataset.filter(pl.col('rarity') != 'UNIQUE')
nonunique_df = nonunique_df.with_columns([  # remove all text between parenthesis
    pl.col("MAIN_EFFECT").str.replace_all(r'\(.*?\)', '', literal=False),
    pl.col("ECHO_EFFECT").str.replace_all(r'\(.*?\)', '', literal=False)
])
filtered_df = nonunique_df.filter(
    pl.col('faction') == 'LY',
    
    # region - CONTROL EFFECTS
    # pl.col('MAIN_EFFECT').str.contains(r'sabotage', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'you may exhaust.*?target card', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'send (target)?.*?to reserve', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'discard target', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'player discards', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'switch(s)? expedition(s)?', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'(target|it).*?gain(s)? (asleep|fleeting)', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'return target', literal=False),
    # endregion  - CONTROL EFFECTS

    # region - CARD ADVANTAGE EFFECTS
    # pl.col('MAIN_EFFECT').str.contains(r'draw', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'resupply', literal=False) |    # also work for exhausted resupply
    # pl.col('ECHO_EFFECT').str.contains(r'resupply', literal=False) |    # also work for exhausted resupply
    # pl.col('MAIN_EFFECT').str.contains(r'gain(s)? anchored', literal=False) |
    # pl.col('ECHO_EFFECT').str.contains(r'gain(s)? anchored', literal=False) |
    # pl.col('MAIN_EFFECT').str.contains(r'you may return.*from your reserve to your hand', literal=False),
    # endregion - CARD ADVANTAGE EFFECTS

    # pl.col('MAIN_EFFECT').str.contains(r'roll.*(dice|die)', literal=False)

)

# filtered_df = filtered_df.filter(pl.col('Mcost') <= 1)
filtered_df = filtered_df.sort(['raw_value', 'Mcost'], descending=[True, False])
# filtered_df = filtered_df.sort(['raw_Hand_value', 'Mcost'], descending=[True, False])
print(f'{100 * filtered_df.shape[0] / nonunique_df.shape[0]:.2f} % ({filtered_df.shape[0]})')


ColumnNotFoundError: raw_value

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'sink' <---
DF ["id", "name", "Mcost", "Rcost", ...]; PROJECT */17 COLUMNS

In [None]:
plot_all_cards(filtered_df, n_col=6)

In [None]:
plot_all_cards(filtered_df, n_col=6)

## 6. Value effects text analyser (Retro engineering of unique text creation)

automatically value card using their text

### 0. separate effects (if more than 1 effect present)

In [None]:
def segregate_effects(text):
    effects_list = text.split('. ')
    # print(effects_list)
    out_lst = []
    for i in range(len(effects_list)):
        txt = effects_list[i]
        if ('if you do:' in effects_list[i]) or ("if it's" in effects_list[i]) or (effects_list[i].startswith("you may")) or (effects_list[i].startswith("on a")):
            txt = effects_list[i-1] + '. ' + effects_list[i]
            out_lst.pop()  # Remove the last entry
        out_lst.append(txt)
    return out_lst

characters_df = dataset.filter((pl.col('type') == 'CHARACTER') & (pl.col('rarity') == 'UNIQUE'))
# we take a sample of 10 rows to debug our system
sampled_df = characters_df.sample(n=10, seed=42) # seed=1337 / seed=42

sampled_df = sampled_df.with_columns(
    # we create a new column with the list of effects (soùetimes there are multiple effects in the same string)
    pl.col("MAIN_EFFECT").map_elements(segregate_effects, return_dtype=pl.List(pl.Utf8)).alias('effects_list')
)
sampled_df['effects_list']

apply this function on all the dataset

In [None]:
characters_df = dataset.filter((pl.col('type') == 'CHARACTER') & (pl.col('rarity') == 'UNIQUE'))
characters_df = characters_df.with_columns(
    # we create a new column with the list of effects (soùetimes there are multiple effects in the same string)
    pl.col("MAIN_EFFECT").map_elements(segregate_effects, return_dtype=pl.List(pl.Utf8)).alias('effects_list')
)

see another 10 examples

In [None]:
sampled_df = characters_df.sample(n=10, seed=1234)
for row in sampled_df.iter_rows(named=True):
    print(f"{row['name']} ({row['faction']}) {row['Mcost']}/{row['Rcost']} - {row['Fp']}/{row['Mp']}/{row['Op']} [{row['id']}]")
    for effect in row['effects_list']:
        print(f"\t{effect}")

cards with the most text in it

In [None]:
# Add a new column with the length of the MAIN_EFFECT text
characters_df = characters_df.with_columns(
    pl.col("MAIN_EFFECT").str.len_chars().alias("effect_length")
)

# Sort the DataFrame by the length of the MAIN_EFFECT text in descending order and select the top 5
top_5_cards = characters_df.sort("effect_length", descending=False).tail(5)

# Display the top 5 cards
for row in top_5_cards.iter_rows(named=True):
    print(f"{row['name']} ({row['faction']}) {row['Mcost']}/{row['Rcost']} - {row['Fp']}/{row['Mp']}/{row['Op']} ({row['effect_length']}) [{row['id']}]")
    for effect in row['effects_list']:
        print(f"\t{effect}")

### 1. extract all uniques effects and conditions

#### Conditions (when, if)

In [None]:
# Starting from the new column with the list of effects
uniques_df = dataset.filter((pl.col('type') == 'CHARACTER') & (pl.col('rarity') == 'UNIQUE'))
uniques_df = uniques_df.with_columns(
    # we create a new column with the list of effects (up to 3 effects by card)
    pl.col("MAIN_EFFECT").map_elements(segregate_effects, return_dtype=pl.List(pl.Utf8)).alias('effects_list')
)

# Iterate through all rows in uniques_df
all_conditions = []

# for row in uniques_df.sample(n=20, seed=1773).iter_rows(named=True):
for row in uniques_df.iter_rows(named=True):
    effects_list = row['effects_list']
    if effects_list is None:
        continue
    for effect in effects_list:
        # Extract conditions from each effect
        # print('\n', effect)
        found = re.findall(r'(you may|if) (.*?)(?::)', effect)
        if found:
            for f in found:
                txt = f[0] + ' ' + f[1]
                all_conditions.append(txt)
                # print(f"\t{f[0]} {f[1]}")
        
        found = re.findall(r'(when) (.*?)(?:—)', effect)
        if found:
            for f in found:
                txt = f[0] + ' ' + f[1]
                all_conditions.append(txt)
                # print(f"\t{f[0]} {f[1]}")
        
        found = re.findall(r'(unless) (.*?)(?::)', effect)
        if found:
            for f in found:
                txt = f[0] + ' ' + f[1]
                all_conditions.append(txt)
                # print(f"\t{f[0]} {f[1]}")

        found = re.findall(r'— (.*?)(?::)', effect)
        if found:
            for f in found:
                txt = f[0] + ' ' + f[1]
                all_conditions.append(txt)
                # print(f"\t{f[0]} {f[1]}")

# Get unique values from all_conditions list
unique_conditions = sorted(list(set(all_conditions)))
unique_conditions

# add:
# {j} / {h} / {r}
# 'at dusk —', 'at noon —', 'after rest',
# roll a die. on a 4+:

#### Effects

In [None]:
# Starting from the new column with the list of effects
uniques_df = dataset.filter((pl.col('type') == 'CHARACTER') & (pl.col('rarity') == 'UNIQUE'))
uniques_df = uniques_df.with_columns(
    # we create a new column with the list of effects (up to 3 effects by card)
    pl.col("MAIN_EFFECT").map_elements(segregate_effects, return_dtype=pl.List(pl.Utf8)).alias('effects_list')
)

# Iterate through all rows in uniques_df
all_effects = []

# for row in uniques_df.sample(n=20, seed=1773).iter_rows(named=True):
for row in uniques_df.iter_rows(named=True):
    effects_list = row['effects_list']
    if effects_list is None:
        continue
    for effect in effects_list:
        # Extract effects from each effect
        # print('\n', effect)
        found = re.findall(r'(\{|:) (.*?)(?:\.)', effect)
        if found:
            for f in found:
                txt = f[1]
                all_effects.append(txt)
                # print(f"\t{f[1]}")

# Get unique values from all_effects list
unique_effects = sorted(list(set(all_effects)))
unique_effects


In [None]:
CONDITIONS = [
    '{j}',
    '{h}',
    '{r}',
    'at dusk —',
    'at noon —',
    'after rest',
    'roll a die. on a 4+:',
    'if each of your expeditions is behind or tied',
    'if i have 1 or more boosts',
    'if i have 2 or more boosts',
    'if i have 3 or more boosts',
    "if i'm in {m}",
    "if i'm in {v}",
    "if i'm not fleeting",
    "if i'm the only character in my expedition",
    'if my expedition is behind',
    "if there are no characters in the expedition i'm played in",
    'if there are three or more base statistics of 0 among characters you control',
    'if there are two or more exhausted cards in reserve',
    'if you control a fleeting character',
    'if you control a token',
    'if you control four or more characters',
    'if you control one or more landmarks',
    'if you control three or more characters',
    'if you control two or more boosted characters',
    'if you control two or more landmarks',
    'if you control two or more permanents',
    'if you control two or more plants other than me',
    'if you have ten or more mana orbs',
    'if you have two or more cards in reserve',
    'if your companion expedition is behind',
    'if your hand is empty',
    'if your hero expedition is behind',
    'if your reserve is empty',
    "unless i'm in {o}",
    'unless you control two or more bureaucrats other than me',
    'unless you control two or more landmarks',
    'unless you control two or more plants other than me',
    'unless you have eight or more mana orbs',
    'when a card goes from your hand or deck to reserve ',
    'when a card leaves your reserve during the afternoon ',
    'when a character you control gains 1 or more boosts ',
    'when an opponent draws one or more cards or does resupply ',
    'when another character joins my expedition ',
    'when another character joins your expeditions ',
    'when another character you control gains fleeting ',
    'when another non-token character joins one of your expeditions that is behind ',
    'when another non-token character joins your expeditions ',
    'when another robot joins your expeditions ',
    'when i gain 1 or more boosts ',
    'when i gain asleep ',
    'when i go to reserve from the expedition zone ',
    'when i go to reserve from your hand ',
    'when i leave the expedition zone ',
    'when i leave the expedition zone, if i was fleeting ',
    "when i'm sacrificed ",
    'when my expedition fails to move forward during dusk ',
    'when my expedition moves forward ',
    'when my expedition moves forward due to {v} ',
    'when one of your expeditions moves forward due to {v} ',
    'when you create a token ',
    'when you exhaust a card in reserve ',
    'when you play a permanent ',
    'when you play a permanent with hand cost {3} or more ',
    'when you play a spell ',
    'when you play another character in {v} ',
    'when you play another character with a base statistic of 0 ',
    'when you roll a 1-3 this way ',
    'when you roll one or more dice ',
    'you may discard a card from your reserve. if you do',
    'you may discard a character from your reserve. if you do',
    'you may discard a spell from your reserve. if you do',
    'you may discard one of your mana orbs. if you do',
    'you may have me gain fleeting. if you do',
    'you may pay {1}. if you do',
    "you may put a card from your hand in reserve. if it's a permanent",
    "you may put a card from your hand in reserve. if it's a spell",
    'you may put a card from your hand in reserve. if you do',
    'you may ready an exhausted card in reserve. if you do',
    'you may sacrifice a character or permanent. if you do',
    'you may sacrifice a character. if you do',
    'you may sacrifice a permanent. if you do',
    'you may sacrifice another robot or permanent. if you do',
    'you may sacrifice me. if you do'
]

EFFECTS = [
    'all characters in target expedition gain asleep',
    'any number of target characters in {v} gain 2 boosts',
    'cards other than me cost {1} more to play from reserve',
    "cards your opponents play can't cost less than {2}",
    'characters you control gain 1 boost',
    "characters your opponents play can't cost less than {2}",
    'create a brassbug 2/2/2 robot token in each of your expeditions',
    'create a brassbug 2/2/2 robot token in my expedition',
    'create a brassbug 2/2/2 robot token in target expedition',
    'create a brassbug 2/2/2 robot token in your companion expedition',
    'create a brassbug 2/2/2 robot token in your hero expedition',
    "create a brassbug 2/2/2 robot token in your other expedition the one i'm not in",
    'create a mana moth 2/2/2 illusion token in each of your expeditions',
    'create a mana moth 2/2/2 illusion token in my expedition',
    'create a mana moth 2/2/2 illusion token in target expedition',
    'create a mana moth 2/2/2 illusion token in your companion expedition',
    'create a mana moth 2/2/2 illusion token in your hero expedition',
    "create a mana moth 2/2/2 illusion token in your other expedition the one i'm not in",
    'create an ordis recruit 1/1/1 soldier token in each of your expeditions',
    'create an ordis recruit 1/1/1 soldier token in each of your expeditions, otherwise create one in my expedition',
    'create an ordis recruit 1/1/1 soldier token in my expedition',
    'create an ordis recruit 1/1/1 soldier token in target expedition',
    'create an ordis recruit 1/1/1 soldier token in your companion expedition',
    'create an ordis recruit 1/1/1 soldier token in your hero expedition',
    "create an ordis recruit 1/1/1 soldier token in your other expedition the one i'm not in",
    'create four ordis recruit 1/1/1 soldier tokens, distributed among any expeditions',
    'create two brassbug 2/2/2 robot tokens in target expedition, otherwise create only one',
    'create two ordis recruit 1/1/1 soldier tokens in each of your expeditions',
    'create two ordis recruit 1/1/1 soldier tokens in my expedition',
    "defender characters don't prevent my expedition from moving forward",
    'draw a card',
    'draw a card, otherwise resupply',
    'draw three cards',
    'draw two cards',
    'each character controlled by target player gains fleeting',
    'each character in target expedition gains 1 boost',
    'each character you control other than me gains 1 boost',
    'each character you control other than me is tough 1',
    'each character you control other than me is tough 2',
    'each player discards their hand and their reserve, then draws three cards',
    'each player discards their hand, then draws three cards',
    'each player draws a card',
    'each player may put a card from their hand in reserve',
    'each player may resupply',
    'each player puts the top card of their deck in their mana zone as an exhausted mana orb',
    'each player sacrifices a character',
    'each robot you control other than me gains 1 boost',
    'each token you control is gigantic',
    'exhaust up to two cards in reserve',
    'exhausted resupply',
    'for each character you control other than me, you may activate its {j} abilities',
    'for each permanent you control, you may activate its {j} abilities',
    'i am defender',
    'i am eternal',
    'i am tough 1',
    'i am tough 2',
    'i cost {1} less',
    'i gain 1 boost',
    'i gain 1 boost and fleeting',
    "i gain 1 boost per card in each player's reserve",
    'i gain 1 boost per card in your reserve',
    'i gain 1 boost per expedition in {v}',
    'i gain 1 boost per fleeting character you control',
    'i gain 1 boost per landmark you control',
    'i gain 2 boosts',
    'i gain 2 boosts and fleeting',
    'i gain 2 boosts and lose fleeting',
    'i gain 2 boosts, otherwise i gain 1 boost',
    'i gain 3 boosts',
    'i gain anchored',
    'i gain asleep',
    'i gain fleeting',
    'i lose fleeting',
    'my region is {v} in addition to its other types',
    'plants you control other than me gain 1 boost',
    "put me in my owner's mana zone as an exhausted mana orb",
    'put me in reserve',
    'put the top card of your deck in your mana zone as an exhausted mana orb',
    'put the top two cards of your deck in your mana zone as exhausted mana orbs',
    'ready all cards in your reserve',
    'resupply',
    'resupply, otherwise exhausted resupply',
    'sabotage',
    'sabotage, otherwise you may exhaust target card in reserve',
    'sacrifice me',
    'sacrifice one character',
    'send me to reserve',
    'starting with you, each player may immediately play a card with hand cost {3} or less for free',
    'target character gains 1 boost',
    'target character gains 1 boost and fleeting',
    'target character gains 2 boosts',
    'target character gains 2 boosts and fleeting',
    'target character gains 3 boosts',
    'target character gains anchored',
    "target character in your other expedition the one i'm not in gains 2 boosts",
    'target character other than me gains 1 boost',
    'target character other than me gains fleeting, anchored or asleep',
    'target character switches expedition',
    'target opponent discards a card from their hand',
    'target opponent draws a card',
    'target opponent may exhausted resupply',
    'target opponent may exhausted resupply twice',
    'target player sacrifices a character',
    'the next bureaucrat you play this afternoon costs {1} less',
    'the next card you play this afternoon costs {1} less',
    'the next character you play this afternoon costs {1} less',
    'the next permanent you play this afternoon costs {1} less',
    'the next permanent you play this afternoon costs {2} less',
    'the next permanent you play this afternoon costs {3} less',
    'the next permanent you play this afternoon costs {4} less',
    'the next spell you play this afternoon costs {1} less',
    "the {j}, {h} and {r} abilities of characters facing me can't activate",
    'up to one target character gains asleep',
    'up to two target characters each gain 1 boost',
    'up to two target characters each gain 2 boosts',
    'you may activate the {j} abilities of target permanent you control',
    'you may activate the {j} abilities of up to two target permanents you control',
    'you may discard target character',
    'you may discard target character or permanent',
    'you may discard target character with hand cost {3} or less',
    'you may discard target character with hand cost {4} or more',
    'you may discard target fleeting, anchored or asleep character',
    'you may discard target permanent',
    'you may discard target permanent with hand cost {4} or more',
    'you may exhaust target card in reserve',
    'you may give 2 boosts to target character in {v}',
    'you may give target fleeting character 2 boosts',
    'you may give target plant 2 boosts',
    'you may have target character gain asleep',
    'you may have target character gain fleeting',
    'you may have target character in the expedition facing me gain asleep',
    'you may have target character other than me lose fleeting',
    'you may have target character other than me lose fleeting and gain 1 boost',
    'you may have target character with hand cost {3} or less other than me gain anchored',
    'you may immediately play a character for {3} less',
    'you may play exhausted cards from your reserve',
    'you may put a card from your hand in reserve',
    "you may put me in my owner's mana zone as an exhausted mana orb",
    "you may put target character or permanent in its owner's mana zone as an exhausted mana orb",
    'you may return a card from your reserve to your hand',
    'you may return a card other than me from your reserve to your hand',
    'you may return a spell from your reserve to your hand',
    "you may return target character or permanent to its owner's hand",
    "you may return target character or permanent to the top of its owner's deck",
    "you may return target character or permanent with hand cost {4} or less to its owner's hand",
    "you may return target character or permanent with hand cost {5} or less to its owner's hand",
    'you may send target character in {v} to reserve',
    'you may send target character to reserve',
    'you may send target character to reserve, then exhaust it',
    'you may send to reserve any number of target characters with total {m} of 4 or less',
    'you may send to reserve any number of target characters with total {m} of 5 or less',
    'you may send to reserve target character with hand cost {3} or less',
    'you may send to reserve target character with hand cost {4} or more',
    'you may send to reserve target character with hand cost {x} or less, where x is the number of characters you control',
    'your characters other than me have: "{r} i gain 1 boost',
    "your opponent's expedition facing me moves backwards one region",
    "your other expedition the one i'm not in and the expedition facing it can't move forward"
]

### 2. build system analyser

Uniques text system seems to build the text this way:<br>
Condition 1 (or pending effect directly) / Condition 2 potentially / Effect<br>
conditions and effects have been extracted using the cells just above and gave the results stored in [this gSheet](https://docs.google.com/spreadsheets/d/1s0N4AE5BEyj7s5jY2r8u5-8yk20lqbMfvkDhnW3zu9U/edit?gid=0#gid=0)

In [None]:
# Starting from the new column with the list of effects
uniques_df = dataset.filter((pl.col('type') == 'CHARACTER') & (pl.col('rarity') == 'UNIQUE'))
uniques_df = uniques_df.with_columns(
    # we create a new column with the list of effects (up to 3 effects by card)
    pl.col("MAIN_EFFECT").map_elements(segregate_effects, return_dtype=pl.List(pl.Utf8)).alias('effects_list')
)

def analyse_text(text, verbose=False):
    if text.startswith(' '):    # remove leading space
        text = text[1:]
    
    # 0. get the first condition
    cond_final = ''
    for condition in CONDITIONS:
        if text.startswith(condition):
            cond_final = condition
            if verbose:
                print(f"\tCondition 1 found: {condition}")
            break

    # 1. get the second potential condition
    remaining_text = text[len(cond_final):].strip()
    remaining_text = re.sub(r'^[^a-zA-Z0-9]+', '', remaining_text)  # clean
    sec_cond_final = ''
    for second_condition in CONDITIONS:
        if remaining_text.startswith(second_condition):
            sec_cond_final = second_condition
            if verbose:
                print(f"\tCondition 2 found: {sec_cond_final}")
            break

    # 3. get the effect
    remaining_text2 = remaining_text[len(sec_cond_final):].strip()
    remaining_text2 = re.sub(r'^[^a-zA-Z0-9]+', '', remaining_text2)  # clean
    if remaining_text2.endswith('.'):
        remaining_text2 = remaining_text2[:-1]
    # print(f"\t --- remaining_text2: {remaining_text2}")
    effect_final = ''
    for effect in EFFECTS:
        if remaining_text2.startswith(effect):
            effect_final = effect
            if verbose:
                print(f"\tEffect found: {effect}")
            break

    if (cond_final == '') and (sec_cond_final == '') and (effect_final == ''):
        if text.endswith('.'):
            text = text[:-1]
        if verbose:
            print(f"\tPending effect found: {text}")

    return 0

not_in_list = []
# for row in uniques_df.sample(n=20, seed=1773).iter_rows(named=True):
for row in uniques_df.iter_rows(named=True):
    texts_list = row['effects_list']
    if texts_list is None:
        continue
    for text in texts_list:     # up to 3 texts by card
        if text.startswith(' '):    # remove leading space
            text = text[1:]
        print('\n', text)

        # 0. get the first condition
        cond_final = ''
        for condition in CONDITIONS:
            if text.startswith(condition):
                cond_final = condition
                print(f"\tCondition 1 found: {condition}")
                break

        # 1. get the second potential condition
        remaining_text = text[len(condition):].strip()
        remaining_text = re.sub(r'^[^a-zA-Z0-9]+', '', remaining_text)  # clean
        sec_cond_final = ''
        for second_condition in CONDITIONS:
            if remaining_text.startswith(second_condition):
                sec_cond_final = second_condition
                print(f"\tCondition 2 found: {sec_cond_final}")
                break

        # 3. get the effect
        remaining_text2 = remaining_text[len(sec_cond_final):].strip()
        remaining_text2 = re.sub(r'^[^a-zA-Z0-9]+', '', remaining_text2)  # clean
        if remaining_text2.endswith('.'):
            remaining_text2 = remaining_text2[:-1]
        # print(f"\t --- remaining_text2: {remaining_text2}")
        effect_final = ''
        for effect in EFFECTS:
            if remaining_text2.startswith(effect):
                effect_final = effect
                print(f"\tEffect found: {effect}")
                break

        if (cond_final == '') and (sec_cond_final == '') and (effect_final == ''):
            if text.endswith('.'):
                text = text[:-1]
            not_in_list.append(text)
            print(f"\tPending effect found: {text}")

not_in_list = sorted(list(set(not_in_list)))

## Proxy PDF

with card ids for RARES

In [None]:
# Get the original dimensions of the image
img_w_mm = 63
img_h_mm = 88

def get_image_by_id(card_id):
    card = dataset.filter(pl.col('id') == card_id).to_dicts()[0]
    image_url = card['image_path']
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    return img, card

def create_pdfs(card_ids, output_filename):
    pdf = FPDF()
    pdf.add_page()
    edge = 5
    for i, card_id in enumerate(card_ids):
        img, _ = get_image_by_id(card_id)
        
        # Save the image to a unique temporary file
        img_path = f"pdf/temp_image_{i}.png"
        img.save(img_path)
        
        # Add image to PDF
        pdf.image(img_path, x=edge + (img_w_mm*i), y=10, w=img_w_mm, h=img_h_mm)
        
    # Output the PDF
    pdf.output(output_filename)

# Example usage
card_ids = ["ALT_ALIZE_B_AX_45_R1"]
create_pdfs(card_ids, "pdf/cards.pdf")

with images links directly for UNIQUES

In [None]:
# Get the original dimensions of the image
img_w_mm = 63
img_h_mm = 88

def get_image_by_link(card_link):
    image_url = card_link
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    return img

def create_pdfs(card_links, output_filename):
    pdf = FPDF()
    pdf.add_page()
    edge = 5
    for i, card_link in enumerate(card_links):
        img = get_image_by_link(card_link)
                
        # Save the image to a unique temporary file
        img_path = f"pdf/temp_image_{i}.png"
        img.save(img_path)
        
        # Add image to PDF
        pdf.image(img_path, x=edge + (img_w_mm*i), y=10, w=img_w_mm, h=img_h_mm)
        
    # Output the PDF
    pdf.output(output_filename)

# Example usage
card_links = [
    "https://www.altered.gg/image-transform/?width=384&format=auto&quality=75&image=https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_LY_10/UNIQUE/JPG/fr_FR/936f67e5143853e2493f05e0a2941533.jpg",
]
create_pdfs(card_links, "pdf/cards.pdf")

## Anexes

### output of the unique scrap

In [None]:
faction = 'ax'
name = 'Ada Lovelace' # 'Heimdall'
url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D=UNIQUE&translations.name="{name}"'
headers = {
    # 'Authorization': bearer_token,
    'Accept-Language': 'en-en',
}
response = requests.get(url, headers=headers)
resp_dict = response.json()
resp_dict

In [None]:
faction = 'ax'
url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D=COMMON'
headers = {
    # 'Authorization': bearer_token,
    'Accept-Language': 'en-en',
}
response = requests.get(url, headers=headers)
resp_dict = response.json()
resp_dict

In [None]:
# new format March 2025
card_id = 'ALT_ALIZE_A_YZ_46_R2'
url = f'https://api.altered.gg/cards/ALT_COREKS_B_OR_05_U_3798'
headers = {
    # 'Authorization': bearer_token,
    'Accept-Language': 'en-en',
}
response = requests.get(url, headers=headers)
resp_dict = response.json()
resp_dict

### output of the non-unique cards scraped

In [None]:
# Filter and display items with name 'A Cappella Training'
a_cappella_training_items = {k: v for k, v in nonuniq_scrapped[0].items() if v['name']['en'] == 'A Cappella Training'}
for k, v in a_cappella_training_items.items():
    if '_A_' in k:
        continue    # skipping all alternative arts
    print(k, v)