# Update dataset

In [65]:
import requests
import polars as pl
import os
from datetime import datetime
import shutil

Before March 2025 we could use this url format to scrap uniques:<br>
```
faction = 'ax'
name = 'Ada Lovelace' # 'Heimdall'
url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D=UNIQUE&translations.name="{name}"'
```
and we had MAIN_EFFECT accessible for their directly.
Since, MAIN_EFFECT disapeared from this request, and we have to use ```https://api.altered.gg/cards/ALT_COREKS_B_OR_05_U_3798``` individually

In [46]:
dataset = pl.read_parquet("dataset.parquet")

In [70]:
FACTIONS = ['AX', 'BR', 'LY', 'MU', 'OR', 'YZ']

In [67]:
def dataset_find_new(faction, name, rarity, dataset):
    """
    scrap cards list from Altered API with:
        faction : 'ax', 'br', 'mu', ...
        name : card name ('Ada Lovelace', 'A Cappella Training')
        rarity : 'COMMON', 'RARE', 'UNIQUE'
    ... and return a list of cards not present in dataset
        dataset : polars dataframe

        output : list of missing cards -> missing_ids / list of all cards in API -> cards_lst
    """
    url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D={rarity}&translations.name="{name}"'
    headers = {'Accept-Language': 'en-en'}
    response = requests.get(url, headers=headers)
    resp_dict = response.json()
    
    cards_lst = [i['reference'] for i in resp_dict['hydra:member']] # all cards in API
    
    n_cards_tot = resp_dict['hydra:totalItems']
    cards_per_page = len(resp_dict['hydra:member'])
    loop_needed = n_cards_tot // cards_per_page # not +1 because we already have the first page

    for i in range(loop_needed):
        url = f'https://api.altered.gg/cards?page={i+2}&factions%5B%5D={faction}&rarity%5B%5D={rarity}&translations.name="{name}"'
        response = requests.get(url, headers=headers)
        resp_dict = response.json()

        cards_lst.extend([i['reference'] for i in resp_dict['hydra:member']])

    # Filter the dataset by faction, name, and rarity
    filtered_dataset = dataset.filter(
        (pl.col("faction") == faction.upper()) & 
        (pl.col("name") == name) & 
        (pl.col("rarity") == rarity)
    )
    filtered_dataset_ids = set(filtered_dataset['id'].to_list())
    # Find IDs in cards_lst that are not in the filtered dataset
    missing_ids = [id_ for id_ in cards_lst if id_ not in filtered_dataset_ids]

    return missing_ids, cards_lst

def clean_text(input):
    if isinstance(input, str):
        text = input
        text = text.replace('[', '').replace(']', '').replace('#', '').replace('(', '').replace(')', '')
        text = text.replace(' ', ' ')
        if len(text) < 3:
            return int(text)
        return text.lower()
    elif isinstance(input, int):
        return input
    else:
        return None

def dataset_add_new(new_cards_list, dataset):
    """
    Given the list of cards not present in the dataset, add them to the dataset
    """
    df_output = dataset.clone()
    for new_card in new_cards_list:
        url = f'https://api.altered.gg/cards/{new_card}'
        headers = {'Accept-Language': 'en-en'}
        response = requests.get(url, headers=headers)
        resp_dict = response.json()
        
        if 'message' in resp_dict:  # if card not present in API
            print(f"{new_card} not in the API")
            continue
        
        # Create a new row as a DataFrame
        new_row = pl.DataFrame(
            {
                "id": resp_dict["reference"],
                "name": resp_dict["name"],
                "Mcost": clean_text(resp_dict["elements"]["MAIN_COST"]),
                "Rcost":  clean_text(resp_dict["elements"]["RECALL_COST"]),
                "Fp":  clean_text(resp_dict["elements"]["FOREST_POWER"]),
                "Mp":  clean_text(resp_dict["elements"]["MOUNTAIN_POWER"]),
                "Op":  clean_text(resp_dict["elements"]["OCEAN_POWER"]),
                "MAIN_EFFECT":  clean_text(resp_dict["elements"].get("MAIN_EFFECT", None)),
                "ECHO_EFFECT":  clean_text(resp_dict["elements"].get("ECHO_EFFECT", None)),
                "raw_value": 0.0,
                "effect_value": 0.0,
                "faction": resp_dict["mainFaction"]["reference"],
                "rarity": resp_dict["rarity"]["reference"],
                "type": resp_dict["cardType"]["reference"],
                "subtypes": ''.join(*[i["reference"] + ' - ' for i in resp_dict["cardSubTypes"]]),
                "image_path": resp_dict["imagePath"],
                "raw_Hand_value": 0.0,
                "raw_Reserve_value": 0.0,
            },
            schema=
            {
                'id': pl.Utf8,
                'name': pl.Utf8,
                'Mcost': pl.Int32,
                'Rcost': pl.Int32,
                'Fp': pl.Int32,
                'Mp': pl.Int32,
                'Op': pl.Int32,
                'MAIN_EFFECT': pl.Utf8,
                'ECHO_EFFECT': pl.Utf8,
                'raw_value': pl.Float32,
                'effect_value': pl.Float32,
                'faction': pl.Utf8,
                'rarity': pl.Utf8,
                'type': pl.Utf8,
                'subtypes': pl.Utf8,
                'image_path': pl.Utf8,
                'raw_Hand_value': pl.Float32,
                'raw_Reserve_value': pl.Float32,
            }
        )
        

        df_output = df_output.vstack(new_row)
        return df_output

def dataset_update_non_uniques(dataset):
    """
    Update the dataset with the new non-unique cards
    """
    rarities = ['COMMON', 'RARE']
    for faction in FACTIONS:
        url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction.lower()}&rarity%5B%5D={rarities[0]}&rarity%5B%5D={rarities[1]}'
        headers = {'Accept-Language': 'en-en'}
        response = requests.get(url, headers=headers)
        resp_dict = response.json()

        cards_lst = [i['name'] for i in resp_dict['hydra:member']] # all cards in API
            
        n_cards_tot = resp_dict['hydra:totalItems']
        cards_per_page = len(resp_dict['hydra:member'])
        loop_needed = n_cards_tot // cards_per_page # not +1 because we already have the first page

        for i in range(loop_needed):
            url = f'https://api.altered.gg/cards?page={i+2}&factions%5B%5D={faction.lower()}&rarity%5B%5D={rarities[0]}&rarity%5B%5D={rarities[1]}'
            response = requests.get(url, headers=headers)
            resp_dict = response.json()

            cards_lst.extend([i['name'] for i in resp_dict['hydra:member']])
        cards_lst = list(set(cards_lst))

        for card in cards_lst:
            for rarity in rarities:
                missing_ids, _ = dataset_find_new(faction, card, rarity, dataset)
                if missing_ids:
                    dataset = dataset_add_new(missing_ids, dataset)

    return dataset

def backup_dataset():
    # Créer le dossier de sauvegarde s'il n'existe pas
    backup_folder = "dataset_backup"
    os.makedirs(backup_folder, exist_ok=True)

    # Ajouter la date du jour au nom du fichier
    today_date = datetime.now().strftime("%Y-%m-%d")
    backup_file_name = f"dataset_{today_date}.parquet"
    backup_file_path = os.path.join(backup_folder, backup_file_name)

    # Copier le fichier
    shutil.copy("dataset.parquet", backup_file_path)

    print(f"Fichier sauvegardé sous : {backup_file_path}")

In [71]:
backup_dataset()
dataset = dataset_update_non_uniques(dataset)
dataset.write_parquet("dataset.parquet")        # Save the DataFrame to a Parquet file

Fichier sauvegardé sous : dataset_backup\dataset_2025-03-19.parquet


ZeroDivisionError: integer division or modulo by zero

In [33]:
new_cards_list, all_cardsAPI_ref = dataset_find_new('ax', 'Ada Lovelace', 'UNIQUE', dataset)
new_cards_list[:3]

['ALT_CORE_B_AX_13_U_1017',
 'ALT_CORE_B_AX_13_U_1029',
 'ALT_CORE_B_AX_13_U_1280']

In [50]:
dataset = dataset_add_new(new_cards_list, dataset)

## Tests

In [3]:
faction = 'ax'
name = 'Ada Lovelace' # 'Heimdall'
url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction}&rarity%5B%5D=UNIQUE&translations.name="{name}"'
headers = {'Accept-Language': 'en-en'}
response = requests.get(url, headers=headers, verify=False)
resp_dict = response.json()
resp_dict



{'@context': '/contexts/Card',
 '@id': '/cards',
 '@type': 'hydra:Collection',
 'hydra:totalItems': 1000,
 'hydra:member': [{'id': '2BMA17XSY59FSS0BJ25JZ2Q48B',
   'cardType': {'@id': '/card_types/01H19NWA92A4ERAC4ATMSZNASS',
    '@type': 'CardType',
    'reference': 'CHARACTER',
    'id': '01H19NWA92A4ERAC4ATMSZNASS',
    'name': 'Character'},
   'cardSet': {'@id': '/card_sets/CORE',
    '@type': 'CardSet',
    'id': '01HKAFJN3HG3TWKYV0E014K01G',
    'reference': 'CORE',
    'name': 'Beyond the Gates'},
   'cardSubTypes': [],
   'rarity': {'@id': '/rarities/UNIQUE',
    '@type': 'Rarity',
    'reference': 'UNIQUE',
    'id': '01GE7AC9X35JXEQZJBBD6E4BKW',
    'name': 'Unique'},
   'imagePath': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/en_US/6e4ffd2ebc58479dfe6040f6516b2226.jpg',
   'assets': {'WEB': ['https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/ALT_CORE_B_AX_13_C_WEB.jpg',
     'https://altered-prod-eu.s3.amazonaws

In [4]:
n_pages_to_proceed = math.ceil(resp_dict['hydra:totalItems'] / len(resp_dict['hydra:member']))
print(f"{name} {resp_dict['hydra:totalItems']} items ({len(resp_dict['hydra:member'])} by page) {n_pages_to_proceed} pages to scrap")

Ada Lovelace 1000 items (36 by page) 28 pages to scrap


In [5]:
id_lst = [i['reference'] for i in resp_dict['hydra:member']]
id_lst[:4]

['ALT_CORE_B_AX_13_U_1000',
 'ALT_CORE_B_AX_13_U_1010',
 'ALT_CORE_B_AX_13_U_1011',
 'ALT_CORE_B_AX_13_U_1015']

In [6]:
# Convert the 'id' column of the dataset to a set for faster lookup
dataset_ids = set(dataset['id'].to_list())

# Find IDs in id_lst that are not in the dataset
missing_ids = [id_ for id_ in id_lst if id_ not in dataset_ids]

# Print the missing IDs
print(f"IDs in id_lst that are not in the dataset: {missing_ids}")

IDs in id_lst that are not in the dataset: ['ALT_CORE_B_AX_13_U_1017', 'ALT_CORE_B_AX_13_U_1029']


In [34]:
# test numbers that are not in the API
for i in range(100,300):
    card_reference = f'ALT_CORE_B_AX_13_U_1{i:02}'  # test one that is not in the list
    url = f'https://api.altered.gg/cards/{card_reference}'
    headers = {'Accept-Language': 'en-en'}
    response = requests.get(url, headers=headers)
    resp_dict = response.json()
    if 'message' not in resp_dict:
        print(i, resp_dict)

100 {'@context': '/contexts/Card', '@id': '/cards/ALT_CORE_B_AX_13_U_1100', '@type': 'Card', 'loreEntries': [], 'cardType': {'@id': '/card_types/01H19NWA92A4ERAC4ATMSZNASS', '@type': 'CardType', 'reference': 'CHARACTER', 'id': '01H19NWA92A4ERAC4ATMSZNASS', 'name': 'Character'}, 'cardSubTypes': [{'@type': 'CardSubType', '@id': '/.well-known/genid/37cbe97d38bb8c7ec693', 'reference': 'ENGINEER', 'id': '01HKAGPA9AS71JN0H9HQZTBNCD', 'name': 'Engineer'}], 'cardSet': {'@id': '/card_sets/CORE', '@type': 'CardSet', 'id': '01HKAFJN3HG3TWKYV0E014K01G', 'reference': 'CORE', 'name': 'Beyond the Gates'}, 'rarity': {'@id': '/rarities/UNIQUE', '@type': 'Rarity', 'reference': 'UNIQUE', 'id': '01GE7AC9X35JXEQZJBBD6E4BKW', 'name': 'Unique'}, 'cardRulings': [], 'imagePath': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/en_US/07f2d31a3ee5ea145231a5c6ab80554f.jpg', 'assets': {'WEB': ['https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/ALT_CORE_B_A

In [None]:
card_reference = f'ALT_CORE_B_AX_13_U_1140'  # test one that is not in the list
url = f'https://api.altered.gg/cards/{card_reference}'
headers = {'Accept-Language': 'en-en'}
response = requests.get(url, headers=headers)
resp_dict = response.json()

response to the request of one card looks like:
```json
{
  '@context': '/contexts/Card',
  '@id': '/cards/ALT_CORE_B_AX_13_U_1140',
  '@type': 'Card',
  'loreEntries': [],
  'cardType': {
    '@id': '/card_types/01H19NWA92A4ERAC4ATMSZNASS',
    '@type': 'CardType',
    'reference': 'CHARACTER',
    'id': '01H19NWA92A4ERAC4ATMSZNASS',
    'name': 'Character'
  },
  'cardSubTypes': [
    {
      '@type': 'CardSubType',
      '@id': '/.well-known/genid/8ca26a0a3108fe9a5011',
      'reference': 'ENGINEER',
      'id': '01HKAGPA9AS71JN0H9HQZTBNCD',
      'name': 'Engineer'
    }
  ],
  'cardSet': {
    '@id': '/card_sets/CORE',
    '@type': 'CardSet',
    'id': '01HKAFJN3HG3TWKYV0E014K01G',
    'reference': 'CORE',
    'name': 'Beyond the Gates'
  },
  'rarity': {
    '@id': '/rarities/UNIQUE',
    '@type': 'Rarity',
    'reference': 'UNIQUE',
    'id': '01GE7AC9X35JXEQZJBBD6E4BKW',
    'name': 'Unique'
  },
  'cardRulings': [],
  'imagePath': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/en_US/dd075dc7bf83560c4752f6b718404dc9.jpg',
  'assets': {
    'WEB': [
      'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/ALT_CORE_B_AX_13_C_WEB.jpg',
      'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/ALT_CORE_B_AX_13_R_WEB.jpg',
      'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/ALT_CORE_B_AX_13_U_WEB.jpg'
      ]
  },
  'lowerPrice': 0,
  'qrUrlDetail': 'https://qr.altered.gg/ALT_CORE_B_AX_13_U_1140',
  'isSuspended': False,
  'reference': 'ALT_CORE_B_AX_13_U_1140',
  'id': '1RBWYAJV1N81RVQ6E138QMG3V2',
  'mainFaction': {
    '@id': '/factions/AX',
    '@type': 'Faction',
    'reference': 'AX',
    'color': '#8c432a',
    'id': '01GE7AC9XBG707G19F03A95TH1',
    'name': 'Axiom'
  },
  'allImagePath': {
    'fr-fr': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/fr_FR/75701dfa44ae3e6a586cea258487b465.jpg',
    'es-es': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/es_ES/2be993c58967c444cafa8c287f905663.jpg',
    'it-it': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/it_IT/8464916b9c0ed8b0b49a32563d54fd01.jpg',
    'de-de': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/de_DE/d66d8befdc1349ef7e992812f081c51d.jpg',
    'en-us': 'https://altered-prod-eu.s3.amazonaws.com/Art/CORE/CARDS/ALT_CORE_B_AX_13/UNIQUE/JPG/en_US/dd075dc7bf83560c4752f6b718404dc9.jpg'
  },
  'name': 'Ada Lovelace',
  'elements': {
    'MAIN_COST': '3',
    'RECALL_COST': '2',
    'MOUNTAIN_POWER': '1',
    'OCEAN_POWER': '1',
    'FOREST_POWER': '4',
    'MAIN_EFFECT': '{J} You may put a card from your hand in Reserve. If you do: I gain 1 boost per card in your Reserve.',
    'ECHO_EFFECT': '{D} : []You may have target Character other than me lose [[Fleeting]].'
  }
}
  ```

In [64]:
faction = 'AX'
rarities = ['COMMON', 'RARE']

url = f'https://api.altered.gg/cards?page=1&factions%5B%5D={faction.lower()}&rarity%5B%5D={rarities[0]}&rarity%5B%5D={rarities[1]}'
headers = {'Accept-Language': 'en-en'}
response = requests.get(url, headers=headers)
resp_dict = response.json()

cards_lst = [i['name'] for i in resp_dict['hydra:member']] # all cards in API
    
n_cards_tot = resp_dict['hydra:totalItems']
cards_per_page = len(resp_dict['hydra:member'])
loop_needed = n_cards_tot // cards_per_page # not +1 because we already have the first page

for i in range(loop_needed):
    url = f'https://api.altered.gg/cards?page={i+2}&factions%5B%5D={faction.lower()}&rarity%5B%5D={rarities[0]}&rarity%5B%5D={rarities[1]}'
    response = requests.get(url, headers=headers)
    resp_dict = response.json()

    cards_lst.extend([i['name'] for i in resp_dict['hydra:member']])

cards_lst = list(set(cards_lst))
cards_lst


['Muna Merchant',
 'Yzmir Stargazer',
 'Ogun',
 'Lord Kelvin',
 'The Nilam, Withered Tree',
 'Lyra Chronicler',
 'Daughter of Yggdrasil',
 'Technical Boots',
 'Bravos Rescuer',
 'Frozen Reprocessor',
 'Treyst & Rossum',
 'Haven Bouncer',
 'The Little Match Girl',
 'Sierra & Oddball',
 'Arcolano Milk',
 'Ada Lovelace',
 'The Foundry, Axiom Bastion',
 "Jeanne d'Arc",
 'Mowgli',
 'Walrus Scout',
 'Kelonic Generator',
 'Rocket Puffin',
 'Ganesha',
 'Vishvakarma',
 'Moth Larva',
 'Tinker Bell',
 "Kraken's Wrath",
 'Kelon Burst',
 'Monolith Rune-Scribe',
 'Flutter of Moths',
 'Daring Porter',
 'Amelia Earhart',
 'Ouroboros Inkcaster',
 'Boom!',
 'Flawed Prototype',
 'Repotter Otter',
 'Axiom Salvager',
 'Martengale',
 'Amahle, Asgarthan Outcast',
 'Foundry Armorer',
 'Avalanche',
 'Frozen Delivery',
 'Baku',
 'Icebound Peak',
 'Mechanical Training',
 'Bravos Tracer',
 'Foundry Mechanic',
 'Kelon Cylinder',
 'Three Little Pigs',
 'Sticky Note Seals',
 'The Frog Prince',
 'Icebound Hollow',
 '

In [49]:
# change type of columns in polars df (all Float64 to Float32)
print(dataset.dtypes)
dataset = dataset.with_columns([pl.col(column).cast(pl.Float32) for column in dataset.columns if dataset.schema[column] == pl.Float64])
print(dataset.dtypes)

[String, String, Int32, Int32, Int32, Int32, Int32, String, String, Float64, Float32, String, String, String, String, String, Float64, Float64]
[String, String, Int32, Int32, Int32, Int32, Int32, String, String, Float32, Float32, String, String, String, String, String, Float32, Float32]
