In [1]:
from IPython.display import display, HTML
import pandas as pd
from pathlib import Path

For simplicity, I suggest creating a shortcut that leads to the Stellaris directory inside Steam. Alternatively, replace steam_stellaris with the full path. The full path looks like C:\Program Files (x86)\Steam\steamapps\common\Stellaris.

In [2]:
stellaris_path = Path('steam_stellaris')
localisation_path = stellaris_path.joinpath('localisation')

This is where the modified files will be saved.

In [3]:
replace_dir = Path('localisation', 'replace')

Grab the anomaly tables from the wiki

In [4]:
anomaly_url = 'https://stellaris.paradoxwikis.com/Anomaly'
tables = pd.read_html(anomaly_url)
f'Found {len(tables)} tables'

'Found 20 tables'

Unfortunately, not all table defintions are actually useful. Inspect tables to find which and modify below if needed.

In [5]:
base_anomalies = tables[0]
distant_anomalies = tables[2]
storms_anomalies = tables[4]

This is a bit of a pain because the events are spread across many files and their identification isn't consistent. Cosmic storms in particular seems to handle the naming quite differently.

In [6]:
possible_identifiers = [
    '_category:0', '_category_temp:0', '_category: ', 
    '_CAT: ', '_CAT:1', '_cat:0', '_CAT:0', 'pc_astral_scar: "'
]
event_names = []
for file_path in localisation_path.joinpath('english').glob('*.yml'):
    with open(file_path, 'r') as f:
        for line in f.readlines():
            name = ''
            for identifier in possible_identifiers:
                if identifier in line:
                    name = line.split(identifier)[1].strip().strip('"')
                    
            for storm_anomaly in storms_anomalies['Anomaly']:
                if f'"{storm_anomaly.lower()}"' in line.lower() and not '.name' in line.lower():
                    name = line.split(':')[1].strip().strip('"')

            if name and not '$' in name:
                event_names.append(name)
                    
event_names = pd.DataFrame({'anomaly': event_names})

Merge the tables under common column names.

In [7]:
dfs = []
for table in [base_anomalies, distant_anomalies, storms_anomalies]:
    description = 'Reward' if 'Reward' in table.columns else 'Possible outcomes'
    dfs.append(
        pd.DataFrame({
            'anomaly': table['Anomaly'],  
            'description': table[description] + ' ' + table['Celestial body']
        })
    )
wiki_df = pd.concat(dfs)

Handle special cases, except Astral Scars, which is already handled.

In [8]:
wiki_df.loc[wiki_df['anomaly'] == 'Ice Giant', 'anomaly'] = 'Ice Ice Giant'

wiki_df.loc[wiki_df['anomaly'].str.contains('Debris Field'), 'anomaly'] = 'Debris Field'
new_desc = wiki_df[wiki_df['anomaly'] == 'Debris Field'].groupby(['anomaly']).agg(' OR '.join)
wiki_df.loc[wiki_df['anomaly'] == 'Debris Field', 'description'] = new_desc['description'].values[0]
wiki_df = wiki_df.drop_duplicates(subset=['anomaly', 'description'])

Merge the anomalies found in source code and anomalies from the wiki. 

- [ ] TODO handle missing anomalies (wiki has some outside the tables, plus it seems to just be missing some anomalies)

In [9]:
merged = event_names.merge(
    wiki_df, 
    left_on=event_names['anomaly'].str.lower(), 
    right_on=wiki_df['anomaly'].str.lower(),
    suffixes=('_source', '_wiki'),
    how='outer', 
)
if not merged[merged['anomaly_source'].isna()].empty:
    display(HTML(
        merged[merged['anomaly_source'].isna()].to_html()
    ))

display(
    f"{merged['anomaly_wiki'].isna().sum()} anomalies found in the source code "
    f"out of {merged['anomaly_source'].shape[0]} currently not described."
)
# display(HTML(
#     merged[merged['anomaly_wiki'].isna()].to_html()
# ))

'57 anomalies found in the source code out of 227 currently not described.'

Double unfortunately, not all of the anomalies in the wiki are defined witin neat tables.

- [ ] TODO not sure this is worth applying, it only collects a few anomalies and each might need special handling. Keeping it here for now.

In [10]:
# from bs4 import BeautifulSoup
# import requests
# def clean_with_soup(url: str) -> str:
#     r = requests.get(url).text
#     soup = BeautifulSoup(r, "html.parser")
#     return soup.get_text()

# wiki_text = requests.get(anomaly_url).text
# soup = BeautifulSoup(wiki_text, 'html.parser')
# for line in soup.get_text().split('\n'):
#     if line.strip():
#         for anomaly in merged[merged['anomaly_wiki'].isna()]['anomaly_source']:
#             if anomaly.lower() in line.strip().lower():
#                 display(line)

Drop unnecessary columns. Keep anomaly source names as those will be used to create the mod files and need to match source code naming. Handle NaN values.

In [11]:
merged = merged.drop(['key_0', 'anomaly_wiki'], axis=1, errors='ignore')
merged = merged.fillna(value='Missing description. Please contribute to the wiki :)')

Get the color definitions from the wiki.

In [12]:
color_table = pd.read_html('https://stellaris.paradoxwikis.com/Localisation_modding#Color_Codes')[3]
color_df = pd.DataFrame()

In [13]:
for c in ['Code', 'Color']:
    color_df[c.lower()] = color_table[c].apply(lambda x : str(x).split('/')).str[0].str.strip()

color_df['color'] = color_df['color'].str.split('⬛ ').str[1].str.lower().str.strip()
color_df = color_df.dropna()
color_df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
code,W,T,g,L,P,R,S,H,Y,G,...,E,C,B,M,_,c,v,d,r,l
color,white,light grey,dark grey,brown,light dirty pink,red,dark orange,mango,yellow,green,...,teal,cyan,cyan-blue,purple,magenta,blue-green,faded green,tan,light purple,light green


Pair key words in the anomaly description. Order matters such that the first category to be found is the chosen category. Ensure colors are picked from above.

In [14]:
categories = dict(
    red=['scientist dies', 'Paranoid trait'],
    green=[
        'Any Habitable Planet', 'technology', 'minerals',
        'influence', 'unity', 'corvettes'
    ],
    yellow=[
        'scaled', 'deposit', 'archaeological', 'L-Gate',
        'Physics Research', 'Society Research', 'Engineering Research'],
)

In [15]:
def key_word_function(x):
    key_words = ''
    for key, values in categories.items():
        for value in values:
            if value.lower() in x.lower():
                return value.lower()
    return key_words
    
def group_function(x):
    group = 'white'
    for key, values in categories.items():
        for value in values:
            if value.lower() in x.lower():
                return key
    return group

merged['color'] = merged['description'].apply(group_function)
merged['key_words'] = merged['description'].apply(key_word_function)
merged = merged.merge(
    color_df, 
    on='color', 
    # suffixes=('_source', '_wiki'),
)

In [16]:
merged['color'].value_counts()

color
green     86
white     76
yellow    61
red        4
Name: count, dtype: int64

In [17]:
display(HTML(
    merged[merged['color'] == 'red'].to_html()
))

Unnamed: 0,anomaly_source,description,color,key_words,code
35,Atmospheric Storms,+3 Physics Research deposit Choice between +7 Physics Research deposit but the scientist dies or a special project to give the scientist the Expertise: Field Manipulation trait Gas Giant,red,scientist dies,R
89,Heavy Readings,Special project to gain 90-250 Physics Research and 2 levels and the Paranoid trait for the scientist Any Star,red,paranoid trait,R
155,Rainbow in the Dark,+9 Physics Research deposit Scaled Physics Research and science ship scientist gains the Paranoid trait Black Hole,red,paranoid trait,R
156,Rainbow in the Dark,+9 Physics Research deposit Scaled Physics Research and science ship scientist gains the Paranoid trait Black Hole,red,paranoid trait,R


Prefer to read an existing definitions file over using created definitions so that the user may change it to their liking. Recommend backing up any modifications as the code below can overwrite it. 

In [18]:
merged['description'] = merged.pop('description')

definitions_file = Path('definitions.csv')
if definitions_file.is_file():
    merged = pd.read_csv(definitions_file)
else:
    merged.to_csv(definitions_file, index=False)

Create the actual mod files. It reads directly from Stellaris source directory and copies the modified files into the mod directory. It does not alter source files. 

In [19]:
possible_identifiers = [
    '_category:0', '_category_temp:0', '_category: ', 
    '_CAT: ', '_CAT:1', '_cat:0', '_CAT:0', 'pc_astral_scar: "'
]

for file_path in localisation_path.joinpath('english').rglob('*.yml'):
    with open(file_path, 'r') as f:
        lines = open(file_path, 'r').readlines()
        for i, line in enumerate(lines):
            for name, desc, code, key_words in zip(
                merged['anomaly_source'], merged['description'], merged['code'], merged['key_words']
            ):
                if name in line:
                    new_name_string = f'§{code}{name}§!'
                    if key_words:
                        new_name_string += f' §g({key_words})§!'
                    for identifier in possible_identifiers:
                        if identifier in line:
                            lines[i] = line.replace(name, new_name_string)

                    if 'cosmic_storms' in str(file_path):
                        for storm_anomaly in storms_anomalies['Anomaly']:
                            if f'"{storm_anomaly.lower()}"' in line.lower() and not '.name' in line.lower():
                                lines[i] = line.replace(name, new_name_string)
    with open(file_path, 'r') as f:
        original_lines = open(file_path, 'r').readlines()
        if lines != original_lines:      
            new_file_name = file_path.with_stem(file_path.stem + "_anomcat").name
            with open(replace_dir.joinpath(new_file_name), 'w') as f:
                f.writelines(lines)
                    