In [1]:
import time
import re
import argparse
from loguru import logger


# APPLICATION PARAMETERS ----------------------------

# Set up argument parser
parser = argparse.ArgumentParser(description="Process a CK3 save file.")
parser.add_argument(
    "--filename",
    nargs="?",
    default="data/latest/gamestate.ck3",
    help="Name of the readable CK3 save file",
)


# Parse arguments
args = parser.parse_args()

# Use the filename from arguments
filename = args.filename

logger.info(f"Using save file: {filename}")



# FUNCTION ----------------------------

def import_file(filename: str) -> str:
    """Import a CK3 save file."""

    start_time = time.time()

    with open(filename, "r", encoding="utf-8") as myfile:
        data = myfile.read()

        # Calculate file length and number of lines
        file_length = len(data)
        line_count = len(data.split("\n"))

        # Log formatted output with spaces as thousand separators
        logger.info(
            f"File length: {file_length:,}".replace(",", " ")
            + f" characters ({line_count:,}".replace(",", " ")
            + " lines)"
        )

    end_time = time.time()
    reading_time = end_time - start_time

    # Log the reading time
    logger.info(f"Reading time: {reading_time:.2f} seconds")

    return data


# FUNCTION ----------------------------

data = import_file("data/latest/gamestate.ck3")
charachterhistory = re.findall(r'played_character={.+?\n}', data, re.S)

[32m2024-12-22 11:30:36.539[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [1mUsing save file: c:\Users\linog\AppData\Roaming\jupyter\runtime\kernel-v3e74b944e05d9022d58d25a181c0e890d34531180.json[0m
[32m2024-12-22 11:30:38.391[0m | [1mINFO    [0m | [36m__main__[0m:[36mimport_file[0m:[36m44[0m - [1mFile length: 340 381 637 characters (20 322 233 lines)[0m
[32m2024-12-22 11:30:38.392[0m | [1mINFO    [0m | [36m__main__[0m:[36mimport_file[0m:[36m54[0m - [1mReading time: 1.85 seconds[0m


In [4]:
import pandas as pd
from src.utils.utils import combine_dicts
from src.structures.character import retrieve_character_info

played_characters = re.findall(r'character=(\d+)', charachterhistory[0])

history_played_characters = [
    retrieve_character_info(id, data) for id in played_characters
]
combined_dict = combine_dicts(history_played_characters)

history_played_characters = pd.DataFrame(combined_dict)

# Reorder columns
first_columns = ['id', 'name', 'nickname', 'birth', 'death_date', 'dynasty', 'faith', 'culture', 'domain_at_death']
other_columns = [col for col in history_played_characters.columns if col not in first_columns]
history_played_characters = history_played_characters.loc[:, first_columns + other_columns]
history_played_characters = history_played_characters.drop_duplicates(subset=['id'])

# convert to datetime some columns
history_played_characters = history_played_characters.sort_values(by='birth')


logger.info(f"Played characters history extracted ({history_played_characters.shape[0]} characters)")

[32m2024-12-22 11:31:23.013[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m24[0m - [1mPlayed characters history extracted (12 characters)[0m


In [5]:
history_played_characters

Unnamed: 0,id,name,nickname,birth,death_date,dynasty,faith,culture,domain_at_death,liege_at_death,liege_title_at_death,recessive_traits,traits,skills,death_reason
1,18102,Murchad,l’Ancien,1027-01-01 00:00:00,1099-06-25 00:00:00,5117,Unknown,Unknown,"446, 447, 503, 504, 509, 516, 505, 510, 507, 517",18102.0,446.0,,"50, 66, 58, 7, 26, 244, 92, 129","10, 12, 6, 3, 3, 12",death_old_age
2,41521,Donnchad,,1068-12-24 00:00:00,1135-11-09 00:00:00,5117,Unknown,Unknown,"446, 503, 447, 504, 516, 509, 448, 454, 505, 5...",41521.0,446.0,,"50, 70, 62, 7, 26, 30, 27, 98, 158, 259, 100","10, 4, 5, 5, 4, 6",death_old_age
3,33588078,Airleid,le Juste,1091-06-09 00:00:00,1165-01-01 00:00:00,5117,Unknown,Unknown,"446, 447, 458, 448, 459, 464, 470, 512, 449, 4...",33588078.0,446.0,,"70, 50, 76, 16, 32, 28, 30, 237, 101, 260","11, 8, 5, 9, 10, 5",death_old_age
4,16801873,MA_el-MI_chI_l,le Noble,1130-11-08 00:00:00,1201-05-24 00:00:00,5117,Unknown,Unknown,"446, 447, 307, 448, 290, 322, 293, 308, 319, 4...",16801873.0,446.0,,"70, 50, 78, 6, 44, 30, 35, 101, 125, 260","13, 5, 7, 9, 11, 3",death_consumption
5,67141529,Abner,,1152-09-15 00:00:00,1207-02-10 00:00:00,5117,Unknown,Unknown,"446, 447, 289, 448, 322, 296, 449, 451, 323, 3...",67141529.0,446.0,,"61, 53, 73, 18, 31, 240, 241, 90, 172, 134, 11...","4, 9, 8, 5, 7, 10",death_depressed
6,50404874,Fiachnae,le Pieux,1178-10-11 00:00:00,1239-05-18 00:00:00,5117,Unknown,Unknown,"446, 1383, 288, 447, 175, 448, 1385, 322, 449,...",50404874.0,446.0,,"68, 50, 79, 17, 235, 103, 260","7, 9, 6, 7, 5, 7",death_old_age
7,50426903,Doedgus,le Bon,1218-11-06 00:00:00,1278-10-07 00:00:00,5117,Unknown,Unknown,"446, 288, 336, 447, 151, 448, 493, 158, 176, 3...",50426903.0,446.0,,"72, 65, 70, 1, 243, 203, 91, 260, 100","8, 9, 6, 5, 3, 4",death_natural_causes
8,33672894,I_te,la Pieuse,1256-03-11 00:00:00,1317-12-24 00:00:00,5117,Unknown,Unknown,"93, 2165, 446, 288, 336, 2078, 94, 2327, 2169,...",33672894.0,93.0,153.0,"50, 58, 79, 21, 45, 30, 35, 46, 244, 93, 92, 1...","8, 9, 5, 6, 9, 6",death_old_age
9,165529,Mo-Chonna,le Sage,1297-07-29 00:00:00,1362-10-02 00:00:00,5117,Unknown,Unknown,"93, 2165, 446, 288, 336, 2078, 94, 2327, 2169,...",165529.0,93.0,153.0,"68, 58, 74, 13, 30, 28, 36, 34, 244, 147, 134,...","5, 8, 5, 10, 8, 6",death_apoplexy
10,16961161,Finguine,l’Érudit,1354-11-01 00:00:00,1381-02-12 00:00:00,5117,Unknown,Unknown,"93, 2165, 446, 288, 336, 2078, 94, 2327, 2169,...",16961161.0,93.0,161.0,"60, 72, 62, 11, 30, 46, 88, 129","4, 8, 6, 10, 7, 8",death_smallpox


In [4]:
import linecache

def findDynastyData(dynid:str, data:str) -> str:
    i = 0
    dynData = re.findall(r'\n%s={\n\t.+?\n}' % dynid, data, re.S)
    while 'prestige=' not in dynData[i]: #we have to iterate over all hits because pdx are not nice to me
        i = i + 1
    return dynData[i]

def getTrait(traitId:str) -> str:
    line = linecache.getline('trait_indexes.lookup', traitId + 1)
    return gameStringToRead(line)

def gameStringToRead(string:str) -> str:
    string = string.replace('dynn_', '').replace('_lifestyle', '').replace('_perk', '')
    string = string.replace('nick_', '').replace('death_', '').replace('ethos_', '')
    string = string.replace('heritage_', '').replace('martial_custom_', '').replace('tradition_', '')
    string = string.replace('fp2_', '').replace('fp1_', '').replace('language_', '')
    string = string.replace('_1', '').replace('_2', '').replace('doctrine_', '')
    string = string.replace('special_', '').replace('is_', '')
    #string = string.replace('A_', 'ã').replace('O_', 'õ').replace('E_', 'ẽ')
    string = string.replace('_',' ')
    string = string.lower().capitalize()
    return string


In [None]:

            self.house = 'Lowborn'
        if 'dead_data' in rawData:
            self.dead = True
            self.date = re.findall(r'date=(.*?)\n', rawData, re.S)[0]
            self.reason = gameStringToRead(re.findall(r'reason="(.*?)"\n', rawData, re.S)[0])
            findLiege = re.findall(r'liege=(.*?)\n', rawData, re.S)
            if len(findLiege) > 0:
                liege = findLiege[0]
                if(liege != charid):
                    self.liege = liege
            findGovernment = re.findall(r'government="(.*?)"', rawData, re.S)
            if len(findGovernment) > 0:
                self.government = findGovernment[0]
                findDomain = re.findall(r'domain={(.*?)}', rawData, re.S)
                titleList = findDomain[0].split(' ')[1:-1]
                self.titles = []
                for title in titleList:
                    if title in knownTitles.keys():
                        self.titles.append(knownTitles[title])
                    else:
                        self.titles.append(gTitle(title, allData, env, path))
            else:
                self.government = 'Unlanded'
        else:
            self.dead = False
            #the char isnt dead we need to parse other stuff
            findGold = re.findall(r'gold=(.*?)\n', rawData, re.S)
            self.gold = findGold[0]
            findPiety = re.findall(r'accumulated=(.*?)\n', rawData, re.S)
            self.piety = findPiety[0]
            self.prestige = findPiety[0]
            findKills = re.findall(r'kills={(.*?)}', rawData, re.S)
            if len(findKills) > 0 and limit > 0:
                killList = findKills[0].split(' ')[1:-1]
                self.kills = []
                for dead in killList:
                    if dead in knownChars.keys():
                        self.kills.append(knownChars[dead])
                    else:
                        self.kills.append(gChar(dead, allData, env, path, limit - 1))
            findLanguages = re.findall(r'languages={(.*?)}', rawData, re.S)
            if len(findLanguages) > 0:
                self.languages = []
                for lang in findLanguages:
                    self.languages.append(lang.replace('language_', ''))
            findGovernment = re.findall(r'government="(.*?)"', rawData, re.S)
            if len(findGovernment) > 0:
                self.government = findGovernment[0]
                findDomain = re.findall(r'domain={(.*?)}', rawData, re.S)
                titleList = findDomain[0].split(' ')[1:-1]
                self.titles = []
                for title in titleList:
                    if title in knownTitles.keys():
                        self.titles.append(knownTitles[title])
                    else:
                        self.titles.append(gTitle(title, allData, env, path))
                findVassals = re.findall(r'vassal_contracts={(.*?)}', rawData, re.S)
                if len(findVassals) > 0 and limit > 0:
                    self.vassals = []
                    for vassal in findVassals[0].split(' ')[1:-1]:
                        try:
                            vassalId = findVassal(vassal, allData)
                            if vassalId in knownChars.keys():
                                self.vassals.append(knownChars[vassalId])
                            else:
                                self.vassals.append(gChar(vassalId, allData, env, path, limit - 1))
                        except:
                            pass
                findDread = re.findall(r'dread=(.*?)\n', rawData, re.S)
                if len(findDread):
                    self.dread = findDread[0]
                else:
                    self.dread = 0
                findStrength = re.findall(r'current_strength=(.*?)\n', rawData, re.S)
                if len(findStrength) > 0:
                    self.strength = findStrength[0]
                else:
                    self.strength = 0
            else:
                self.government = 'Unlanded'
            findMemories = re.findall(r'memories={(.*?)}', rawData, re.S)[0].split(' ')[1:-1]
            if len(findMemories) > 0:
                self.memories = []
                for memory in findMemories:
                    self.memories.append(gMem(memory, allData))
        #save to global variable


In [5]:
findDynastyData("0", data)

'\n0={\n\tkey=2\n\tprestige={\n\t\tcurrency=781.9568\n\t\taccumulated=2081.9568\n\t}\n\tcoat_of_arms_id=1324\n\tperk={ glory_legacy_1 glory_legacy_2 }\n}'

In [None]:
import pandas as pd
import re

# Define the data (formatted as a string for demonstration purposes)
# data = """{your_nested_data_here}"""  # Replace with your actual nested data

# Step 1: Preprocess the data
# Convert the data to a Python dictionary-like structure by removing invalid syntax
data_cleaned = re.sub(r"\n|\t|\s", "", data)  # Remove unnecessary whitespace

# Extract dynasty information using regex
matches = re.findall(r"(\d+)={(.*?)name=\"(.*?)\".*?found_date=(\d{1,4}\.\d{1,2}\.\d{1,2}).*?dynasty=(\d+).*?motto=(\{.*?\}|\"\")", data_cleaned)

# Step 2: Transform into a list of dictionaries
rows = []
for match in matches:
    dynasty_id, _, name, found_date, dynasty, motto = match

    # Clean up motto (if it's a dictionary, extract relevant information)
    if motto.startswith("{"):
        motto = re.sub(r"key=\"(.*?)\".*?value=\"(.*?)\".*?", r"\2", motto)

    rows.append({
        "Dynasty ID": int(dynasty_id),
        "Name": name,
        "Found Date": found_date,
        "Dynasty": int(dynasty),
        "Motto": motto.strip("\"{}")
    })

# Step 3: Create the DataFrame
df = pd.DataFrame(rows)

# Display the DataFrame
print(df)

In [20]:
import json
json_data = json.loads(data2, object_pairs_hook=_handle_duplicates)

JSONDecodeError: Expecting ',' delimiter: line 1 column 1781 (char 1780)