In [1]:
from IPython.display import display, HTML
import pandas as pd
from pathlib import Path

Grab all anomaly names from the game files. For simplicity, I suggest creating a shortcut that leads to the Stellaris directory inside Steam. Alternatively, replace steam_stellaris with the full path. The full path looks like C:\Program Files (x86)\Steam\steamapps\common\Stellaris.

In [None]:
stellaris_path = Path('steam_stellaris')
localisation_path = stellaris_path.joinpath('localisation', 'english')

This is a bit of a pain because the events are spread across many files and their identification isn't consistent.

In [111]:
possible_identifiers = [
    '_category:0', '_category_temp:0', '_category: ', 
    '_CAT: ', '_CAT:1', '_cat:0', '_CAT:0'
]
event_names = []
for file_path in localisation_path.glob('*.yml'):
    with open(file_path, 'r') as f:
        for line in f.readlines():
            name = ''
            for identifier in possible_identifiers:
                if identifier in line:
                    name = line.split(identifier)[1].strip().strip('"')
            if name and not '$' in name:
                event_names.append(name)
event_names = pd.DataFrame({'anomaly': event_names})

Grab the anomaly tables from the wiki

In [39]:
anomaly_url = 'https://stellaris.paradoxwikis.com/Anomaly'
tables = pd.read_html(anomaly_url)
f'Found {len(tables)} tables'

'Found 20 tables'

Unfortunately, not all table defintions are actually useful. Inspect tables to find which and modify below if needed.

In [106]:
useful_table_indices = [0, 2, 4]

Merge the tables under common column names.

In [107]:
dfs = []
for i in useful_table_indices:
    table = tables[i]
    description = 'Reward' if 'Reward' in table.columns else 'Possible outcomes'
    dfs.append(
        pd.DataFrame({
            'anomaly': table['Anomaly'],  
            'description': table[description] + ' ' + table['Celestial body']
        })
    )
df = pd.concat(dfs)

Handle special cases.

In [108]:
 new_desc['description'].values[0]

'+5 Minerals deposit Asteroid OR Gain 18x engineering output (potential gain of\xa0350 ~ 100\u202f000) and can choose to start a 42 year countdown Any Habitable Planet'

In [109]:
df.loc[df['anomaly'] == 'Ice Giant', 'anomaly'] = 'Ice Ice Giant'

df.loc[df['anomaly'].str.contains('Debris Field'), 'anomaly'] = 'Debris Field'
new_desc = df[df['anomaly'] == 'Debris Field'].groupby(['anomaly']).agg(' OR '.join)
df.loc[df['anomaly'] == 'Debris Field', 'description'] = new_desc['description'].values[0]
df = df.drop_duplicates(subset=['anomaly', 'description'])

In [121]:
merged = event_names.merge(
    df, 
    left_on=event_names['anomaly'].str.lower(), 
    right_on=df['anomaly'].str.lower(),
    how='outer'
)
display(HTML(
    merged[merged['anomaly_y'].isna()].to_html()
))


Unnamed: 0,key_0,anomaly_x,anomaly_y,description
3,a lush planet,A Lush Planet,,
4,a planetary machine,A Planetary Machine,,
6,abandoned observation post,Abandoned Observation Post,,
9,abnormal conditions,Abnormal Conditions,,
15,alien site,Alien Site,,
21,ancient manufactory,Ancient Manufactory,,
23,ancient signs of life,Ancient Signs of Life,,
37,backgrounds,Backgrounds,,
38,between land and sea,Between Land and Sea,,
39,billowing sands,Billowing Sands,,


Pair key words in the anomaly description. Order matters such that the first category to be found is the chosen category.

In [16]:
categories = dict(
    red=['scientist dies', 'Paranoid trait'],
    green=[
        'Any Habitable Planet', 'technology', 'minerals',
        'influence', 'unity', 'corvettes'
    ],
    yellow=[
        'scaled', 'deposit', 'archaeological', 'L-Gate',
        'Physics Research', 'Society Research', 'Engineering Research'],
)

In [8]:
def like_function(x):
    group = 'white'
    for key, values in categories.items():
        for value in values:
            if value.lower() in x.lower():
                return key
    return group

df['category'] = df['description'].apply(like_function)

In [9]:
df.shape

(154, 3)

In [10]:
df['category'].value_counts()

category
green     75
yellow    57
white     19
red        3
Name: count, dtype: int64

In [114]:
display(HTML(
    df[df['category'] == 'white'].to_html()
))

KeyError: 'category'

In [12]:
df

Unnamed: 0,anomaly,description,category
0,On Solar Sails,Message in a Bottle archaeological site Any St...,yellow
1,A Strange Resonance,Choice between +4 Physics Research deposit and...,green
2,Improbable Orbit,Choice between 40-100 Influence or a special p...,green
3,Light Phenomenon,Choice between +4 Physics Research deposit or ...,green
4,Distress Signal,Special project to gain scaled Society Researc...,yellow
...,...,...,...
35,Metallic Sands,90-250 Engineering Research Nanosands planeta...,green
36,Supply Ship Wreckage,300 minerals or 100 Influence Any Planet,green
37,Heavy Readings,Special project to gain 90-250 Physics Researc...,red
38,Interference,250-100000 Physics Research and 250-100000 Soc...,yellow
