In [1]:
import time
import re
import argparse
from loguru import logger


# APPLICATION PARAMETERS ----------------------------

# Set up argument parser
parser = argparse.ArgumentParser(description="Process a CK3 save file.")
parser.add_argument(
    "--filename",
    nargs="?",
    default="data/latest/gamestate.ck3",
    help="Name of the readable CK3 save file",
)


# Parse arguments
args = parser.parse_args()

# Use the filename from arguments
filename = args.filename

logger.info(f"Using save file: {filename}")



# FUNCTION ----------------------------

def import_file(filename: str) -> str:
    """Import a CK3 save file."""

    start_time = time.time()

    with open(filename, "r", encoding="utf-8") as myfile:
        data = myfile.read()

        # Calculate file length and number of lines
        file_length = len(data)
        line_count = len(data.split("\n"))

        # Log formatted output with spaces as thousand separators
        logger.info(
            f"File length: {file_length:,}".replace(",", " ")
            + f" characters ({line_count:,}".replace(",", " ")
            + " lines)"
        )

    end_time = time.time()
    reading_time = end_time - start_time

    # Log the reading time
    logger.info(f"Reading time: {reading_time:.2f} seconds")

    return data


# FUNCTION ----------------------------

data = import_file("data/latest/gamestate.ck3")
charachterhistory = re.findall(r'played_character={.+?\n}', data, re.S)

[32m2024-12-23 12:51:28.573[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [1mUsing save file: c:\Users\linog\AppData\Roaming\jupyter\runtime\kernel-v39343dc6b3438257a8e9e7fddfa402444915d64c3.json[0m


[32m2024-12-23 12:51:30.523[0m | [1mINFO    [0m | [36m__main__[0m:[36mimport_file[0m:[36m44[0m - [1mFile length: 340 381 637 characters (20 322 233 lines)[0m
[32m2024-12-23 12:51:30.524[0m | [1mINFO    [0m | [36m__main__[0m:[36mimport_file[0m:[36m54[0m - [1mReading time: 1.95 seconds[0m


In [2]:
import pandas as pd
from src.utils.utils import combine_dicts
from src.structures.character import retrieve_character_info

played_characters = re.findall(r'character=(\d+)', charachterhistory[0])

history_played_characters = [
    retrieve_character_info(id, data) for id in played_characters
]
combined_dict = combine_dicts(history_played_characters)

history_played_characters = pd.DataFrame(combined_dict)

# Reorder columns
first_columns = ['id', 'name', 'nickname', 'birth', 'death_date', 'dynasty', 'faith', 'culture', 'domain_at_death']
other_columns = [col for col in history_played_characters.columns if col not in first_columns]
history_played_characters = history_played_characters.loc[:, first_columns + other_columns]
history_played_characters = history_played_characters.drop_duplicates(subset=['id'])

# convert to datetime some columns
history_played_characters = history_played_characters.sort_values(by='birth')


logger.info(f"Played characters history extracted ({history_played_characters.shape[0]} characters)")

[32m2024-12-23 12:51:38.761[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m24[0m - [1mPlayed characters history extracted (12 characters)[0m


In [3]:
history_played_characters.head(1)

Unnamed: 0,id,name,nickname,birth,death_date,dynasty,faith,culture,domain_at_death,liege_title_at_death,traits,liege_at_death,skills,recessive_traits,death_reason
1,18102,Murchad,l’Ancien,1027-01-01 00:00:00,1099-06-25 00:00:00,5117,Unknown,Unknown,"446, 447, 503, 504, 509, 516, 505, 510, 507, 517",446,"50, 66, 58, 7, 26, 244, 92, 129",18102,"10, 12, 6, 3, 3, 12",,death_old_age


In [12]:
from src.structures.title import get_title_name
titleid = 446

all_titles_history = history_played_characters["domain_at_death"].str.split(", ").explode().unique()

title_table = [get_title_name(titleid, data) for titleid in all_titles_history]
title_table

446
447
503
504
509
516
505
510
507
517
448
454
449
451
520
508
455
456
458
459
464
470
512
460
461
462
465
466
467
471
472
473
513
514
307
290
322
293
308
319
291
323
324
294
309
320
289
296
297
298
1383
288
175
1385
1386
1388
336
151
493
158
176
346
494
496
497
159
160
177
178
347
348
93
2165
2078
94
2327
2169
2360
2378
2368
2218
2244
2201
2213
2369
2370
2219
2245
2202
2246
2204
2214
2155
7186
2297
7161
6793
2379
6794
6982
6990
7163
6801
6978
2380
2382
2383
2384
2385
6795
6796
6800
6983
6991
6984
7164
7165
6802
6979
6980
6805
6792
2209
6810
2210
2212
6811
6813
6791
91
2619
1289
7870
672
614
793
983
11
7101
7116
2627
698
711
7102
7103
7117
7119
2628
2630
699
700
712
716
None


IndexError: list index out of range

In [32]:
re.findall(r'\n%s={.+?\n}' % titleid, data, re.S)

['\n446={\n\tkey=k_ireland\n\tde_facto_liege=93\n\tde_jure_liege=93\n\tde_jure_vassals={ 447 458 475 492 503 }\n\tholder=134298987\n\tname="Irlande"\n\tadj="irlandaise"\n\tpre="Hiberno"\n\tdate=1422.9.27\n\their={ 240930 255826 256739 244264 33775851 240345 253993 17014337 247617 17013801 33769335 84070687 239267 17026288 250238 246535 33719912 16913508 16996098 17027537 }\n\tclaim={ 84070687 33719912 16996098 231863 231661 16992654 16991075 239538 }\n\thistory={ 846.1.1=6987 862.11.22={\n\t\t\ttype=destroyed\n\t\t}\n 980.3.23=13373 1002.1.1=13130 1015.3.23=13373 1022.9.2={\n\t\t\ttype=destroyed\n\t\t}\n 1075.1.16={\n\t\t\ttype=created\n\t\t\tholder=18102\n\t\t}\n 1099.6.25=41521 1135.11.9=33588078 1165.1.1=16801873 1201.5.24=67141529 1207.2.10=50404874 1239.5.18=50426903 1278.10.7=33672894 1317.12.24=165529 1362.10.2=16961161 1381.2.12=33739479 1422.9.27=134298987 }\n\tcapital=493\n\tcoat_of_arms_id=178\n\tdate_defeated_last_ai_raider=1092.2.17\n}',
 '\n446={\n\tname="dynn_Ocra"\n\tpr

In [35]:
regex

'\\n446={{.+?\\n}}'

In [34]:
r'\n%s={.+?\n}' % titleid

'\\n446={.+?\\n}'

In [28]:
from src.utils.utils import trim_string_ck3, convert_ck3_date

dynid = 5117

def extract_dynasty_info(dynasty_id: int, data: str) -> dict:
    """Extract dynasty information given a dynasty ID."""

    regex_number_dynasty = fr'\n{dynasty_id}=' + r'{\n\t.+?\n}'
    house_data = re.findall(regex_number_dynasty, data, re.S)
    raw_data = [line for line in house_data if 'dynasty=' in line][0]

    dynasty_data = extract_characteristics(raw_data)
    return dynasty_data


def extract_characteristics(raw_data: str) -> dict:
    """Extract characteristics from raw dynasty data."""
    pattern = re.compile(r'key=([^\s]+)')
    house = trim_string_ck3(pattern.findall(raw_data)[0])
    try:
        foundation_date = convert_ck3_date(re.findall(r'found_date=(.*?)\n', raw_data, re.S)[0])
    except IndexError:
        foundation_date = "Time immemorial..."

    parent_dynasty = re.findall(r'\tdynasty=(.*?)\n', raw_data, re.S)[0]
    dynasty_head = re.findall(r'head_of_house=(.*?)\n', raw_data, re.S)[0]
    historical_leaders = re.findall(r'historical={(.*?)}', raw_data, re.S)[0].strip()

    return {
        "house": house,
        "foundation_date": foundation_date,
        "parent_dynasty": parent_dynasty,
        "dynasty_head": dynasty_head,
        "historical_leaders": historical_leaders,
    }

dynasty_info = extract_dynasty_info(dynid, data)


{'house': 'British isles briain',
 'foundation_date': datetime.datetime(978, 1, 1, 0, 0),
 'parent_dynasty': '2045',
 'dynasty_head': '134298987',
 'historical_leaders': '13130 14191 14419 15475 18102 41521 33588078 16801873 67141529 50404874 50426903 33672894 165529 16961161 33739479 134298987'}

In [26]:
pattern

re.compile(r'key=([^\s]+)', re.UNICODE)

In [28]:
import pandas as pd
from datetime import datetime

def convert_ck3_date(date_str: str) -> pd.Timestamp:
        # Split the date string into year, month, and day
        parts = date_str.split('.')
        if len(parts) != 3:
            return None
        
        year, month, day = parts
        
        # Pad year, month, and day with leading zeros if necessary
        year = year.zfill(4)
        month = month.zfill(2)
        day = day.zfill(2)
        
        # Construct the new date string
        new_date_str = f"{year}-{month}-{day}"

        # Convert to datetime
        return datetime.strptime(new_date_str, '%Y-%m-%d')

convert_ck3_date("978.1.1")

datetime.datetime(978, 1, 1, 0, 0)

In [None]:

            self.house = 'Lowborn'
        if 'dead_data' in rawData:
            self.dead = True
            self.date = re.findall(r'date=(.*?)\n', rawData, re.S)[0]
            self.reason = gameStringToRead(re.findall(r'reason="(.*?)"\n', rawData, re.S)[0])
            findLiege = re.findall(r'liege=(.*?)\n', rawData, re.S)
            if len(findLiege) > 0:
                liege = findLiege[0]
                if(liege != charid):
                    self.liege = liege
            findGovernment = re.findall(r'government="(.*?)"', rawData, re.S)
            if len(findGovernment) > 0:
                self.government = findGovernment[0]
                findDomain = re.findall(r'domain={(.*?)}', rawData, re.S)
                titleList = findDomain[0].split(' ')[1:-1]
                self.titles = []
                for title in titleList:
                    if title in knownTitles.keys():
                        self.titles.append(knownTitles[title])
                    else:
                        self.titles.append(gTitle(title, allData, env, path))
            else:
                self.government = 'Unlanded'
        else:
            self.dead = False
            #the char isnt dead we need to parse other stuff
            findGold = re.findall(r'gold=(.*?)\n', rawData, re.S)
            self.gold = findGold[0]
            findPiety = re.findall(r'accumulated=(.*?)\n', rawData, re.S)
            self.piety = findPiety[0]
            self.prestige = findPiety[0]
            findKills = re.findall(r'kills={(.*?)}', rawData, re.S)
            if len(findKills) > 0 and limit > 0:
                killList = findKills[0].split(' ')[1:-1]
                self.kills = []
                for dead in killList:
                    if dead in knownChars.keys():
                        self.kills.append(knownChars[dead])
                    else:
                        self.kills.append(gChar(dead, allData, env, path, limit - 1))
            findLanguages = re.findall(r'languages={(.*?)}', rawData, re.S)
            if len(findLanguages) > 0:
                self.languages = []
                for lang in findLanguages:
                    self.languages.append(lang.replace('language_', ''))
            findGovernment = re.findall(r'government="(.*?)"', rawData, re.S)
            if len(findGovernment) > 0:
                self.government = findGovernment[0]
                findDomain = re.findall(r'domain={(.*?)}', rawData, re.S)
                titleList = findDomain[0].split(' ')[1:-1]
                self.titles = []
                for title in titleList:
                    if title in knownTitles.keys():
                        self.titles.append(knownTitles[title])
                    else:
                        self.titles.append(gTitle(title, allData, env, path))
                findVassals = re.findall(r'vassal_contracts={(.*?)}', rawData, re.S)
                if len(findVassals) > 0 and limit > 0:
                    self.vassals = []
                    for vassal in findVassals[0].split(' ')[1:-1]:
                        try:
                            vassalId = findVassal(vassal, allData)
                            if vassalId in knownChars.keys():
                                self.vassals.append(knownChars[vassalId])
                            else:
                                self.vassals.append(gChar(vassalId, allData, env, path, limit - 1))
                        except:
                            pass
                findDread = re.findall(r'dread=(.*?)\n', rawData, re.S)
                if len(findDread):
                    self.dread = findDread[0]
                else:
                    self.dread = 0
                findStrength = re.findall(r'current_strength=(.*?)\n', rawData, re.S)
                if len(findStrength) > 0:
                    self.strength = findStrength[0]
                else:
                    self.strength = 0
            else:
                self.government = 'Unlanded'
            findMemories = re.findall(r'memories={(.*?)}', rawData, re.S)[0].split(' ')[1:-1]
            if len(findMemories) > 0:
                self.memories = []
                for memory in findMemories:
                    self.memories.append(gMem(memory, allData))
        #save to global variable


In [5]:
findDynastyData("0", data)

'\n0={\n\tkey=2\n\tprestige={\n\t\tcurrency=781.9568\n\t\taccumulated=2081.9568\n\t}\n\tcoat_of_arms_id=1324\n\tperk={ glory_legacy_1 glory_legacy_2 }\n}'

In [None]:
import pandas as pd
import re

# Define the data (formatted as a string for demonstration purposes)
# data = """{your_nested_data_here}"""  # Replace with your actual nested data

# Step 1: Preprocess the data
# Convert the data to a Python dictionary-like structure by removing invalid syntax
data_cleaned = re.sub(r"\n|\t|\s", "", data)  # Remove unnecessary whitespace

# Extract dynasty information using regex
matches = re.findall(r"(\d+)={(.*?)name=\"(.*?)\".*?found_date=(\d{1,4}\.\d{1,2}\.\d{1,2}).*?dynasty=(\d+).*?motto=(\{.*?\}|\"\")", data_cleaned)

# Step 2: Transform into a list of dictionaries
rows = []
for match in matches:
    dynasty_id, _, name, found_date, dynasty, motto = match

    # Clean up motto (if it's a dictionary, extract relevant information)
    if motto.startswith("{"):
        motto = re.sub(r"key=\"(.*?)\".*?value=\"(.*?)\".*?", r"\2", motto)

    rows.append({
        "Dynasty ID": int(dynasty_id),
        "Name": name,
        "Found Date": found_date,
        "Dynasty": int(dynasty),
        "Motto": motto.strip("\"{}")
    })

# Step 3: Create the DataFrame
df = pd.DataFrame(rows)

# Display the DataFrame
print(df)

In [20]:
import json
json_data = json.loads(data2, object_pairs_hook=_handle_duplicates)

JSONDecodeError: Expecting ',' delimiter: line 1 column 1781 (char 1780)