In [1]:
import collections
import datetime
import itertools

import requests

# Prison Architect custom names

A little script-o-notebook thing to look at some of the worse names that snuck through the censors.

In [2]:
# replicate form request from http://www.introversion.co.uk/prisonarchitect/developer/nameingame/listnameingame.html
url = 'http://www.introversion.co.uk/prisonarchitect/developer/nameingame/listnameingame.php'
data = {
    'submit': 'Submit',
#     'authorised': 'Authorised',
#     'authorised': 'Un-Authorised',
    'authorised': 'ALL',
}
unauthorized = requests.post(url=url, data={'authorised': 'Authorised'})
authorized = requests.post(url=url, data={'authorised': 'Un-Authorised'})

In [3]:
print(authorized.content[:500].decode(authorized.encoding))

# Name in the game list made on 2016/10/04 05:34:27

BEGIN prisoner
  FirstName		"DoNotAuthorize"
  NickName		""
  LastName		"seeID"
  DoB			"1993.12.15"
  Bio			"steamcommunity.com/id/MakaHost/"
  BodyType		"1"
  BodyScale		"1"
  HeadType		"22"
  SkinColour	"0xfdc901ff"
  Id			"0"
  Gender		"Male"
END

BEGIN prisoner
  FirstName		"Oskar"
  NickName		"Croko"
  LastName		"Stangenberg"
  DoB			"1986.12.08"
  Bio			"Started working on computers at a young age and worked its way up to become one of 


In [4]:
def response_to_lines(response):
    """Convert the byte-response to an iterable of strings"""
    for line in response.iter_lines():
        yield line.decode(response.encoding)
        
_response = requests.get('http://httpbin.org/html')
_line = next(iter(response_to_lines(_response)))
assert not isinstance(_line, bytes) and isinstance(_line, str)

In [5]:
def parse_data_line(line):
    key, value = line.strip().split(maxsplit=1)
    return (key, value.strip()[1:-1])
    
assert parse_data_line('  DoB\t\t\t"1993.12.15"\n') == ('DoB', '1993.12.15')
assert parse_data_line('  SkinColour\t"0xfdc901ff"') == ('SkinColour', '0xfdc901ff')

In [6]:
def dob_convert(dob):
    return datetime.datetime.strptime(dob, '%Y.%m.%d')

_dob = dob_convert('1993.12.15')
assert (_dob.year, _dob.month, _dob.day) == (1993, 12, 15)

In [7]:
cleaners = {
    'DoB': dob_convert,
    'BodyScale': float,
    'BodyType': int,
    'Id': int,
    'HeadType': int,
}

def parse_prisoner(istream):
    '''Iterate through the stream until finding the end and return the data'''
    for line in istream:
        if line.startswith('END'):
            break
        key, value = parse_data_line(line)
        
        if key in cleaners:
            value = cleaners[key](value)
        
        yield key, value

assert dict(parse_prisoner(['a "a"', 'END'])) == {'a': 'a'}
assert dict(parse_prisoner(['Id "1234"', 'END'])) == {'Id': 1234}
assert (dict(parse_prisoner(['HelloThere "What\'s Up Doc"', 'END'])) 
        == {'HelloThere': 'What\'s Up Doc'})
assert (dict(parse_prisoner(['BodyType "2"', 'HeadType "3"', 'END']))
        == {'BodyType': 2, 'HeadType': 3})

In [8]:
parsers = {
    'prisoner': parse_prisoner,
}

def generate_records(stream, **extras):
    istream = iter(stream)
    for line in istream:
        line = line.strip()
        
        if not line or line.startswith('#'):
            continue
        
        assert line.startswith('BEGIN')
        
        rectype = line[5:].strip()
        
        yield dict(parsers[rectype](istream), **extras)

In [9]:
authed = list(generate_records(response_to_lines(authorized), Authorized=True))
unauthed = list(generate_records(response_to_lines(unauthorized), Authorized=False))
data = authed + unauthed

print('Authorized:    {:>5d}\n'
      'Un-Authorized: {:>5d}\n'
      'Total:         {:>5d}'
      .format(len(authed), len(unauthed), len(data)))

Authorized:     1356
Un-Authorized: 25234
Total:         26590


In [38]:
# All keys in the records
all_keys = set().union(*(d.keys() for d in data))
all_keys

{'Authorized',
 'Bio',
 'BodyScale',
 'BodyType',
 'DoB',
 'FirstName',
 'Gender',
 'HeadType',
 'Id',
 'LastName',
 'NickName',
 'SkinColour'}

In [11]:
def unique_keys(k):
    return {d[k] for d in data if k in d}

print("Body\n  Types:  {}\n  Scales: {}\n\nHeadTypes: {}".format(
    unique_keys('BodyType'),
    unique_keys('BodyScale'),
    unique_keys('HeadType'),
))

Body
  Types:  {1, 2, 3, 4}
  Scales: {0.85, 1.0, 1.1, 1.05, 0.95, 1.125, 0.8}

HeadTypes: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39}


# The Cancer

In [12]:
[d for d in data if 'inazi' in d['Bio'].lower()]

[{'Authorized': False,
  'Bio': '20 Minutes of Action. 50 Free Shots For Everyone. Professional Airplane Hijacker and pilot. Vegan and feminazi. Vapes All Day Everyday. Supports Communism and Multiculture. Religion is unknown. Never have played 33% of his Steam Games. TheEnd',
  'BodyScale': 1.0,
  'BodyType': 4,
  'DoB': datetime.datetime(1999, 5, 29, 0, 0),
  'FirstName': 'Elmo',
  'Gender': 'Male',
  'HeadType': 11,
  'Id': 258541,
  'LastName': 'Hiiva',
  'NickName': "''Leipuri Derbi''",
  'SkinColour': '0xfdc97bff'},
 {'Authorized': False,
  'Bio': "Young student,hobby playing video games all day. Gets into heated argument online, tells 'f*ck you' to another person on twitter, person is a feminazi, calls the police, accused of harassment, judge rules 'feels over realz' convicted of rape.",
  'BodyScale': 1.0,
  'BodyType': 1,
  'DoB': datetime.datetime(1996, 12, 22, 0, 0),
  'FirstName': 'Francisco',
  'Gender': 'Male',
  'HeadType': 27,
  'Id': 415061,
  'LastName': 'Corral',
  '

In [13]:
# Despite being "not authorized", this one shows up in-game
[d for d in data if 'kills asians' in d['Bio']]

[{'Authorized': False,
  'Bio': 'kills asians for a living',
  'BodyScale': 1.0,
  'BodyType': 4,
  'DoB': datetime.datetime(2000, 2, 8, 0, 0),
  'FirstName': 'Matthew',
  'Gender': 'Male',
  'HeadType': 17,
  'Id': 419969,
  'LastName': 'findlay',
  'NickName': 'asian killer',
  'SkinColour': '0x6d3e1cff'}]

# Dump to more useful file formats

In [51]:
import copy
import json

In [52]:
# Built-in serializers don't like datetype format, just use YYYY-MM-DD
sdata = copy.deepcopy(data)
for d in sdata:
    d['DoB'] = d['DoB'].strftime('%Y-%m-%d')

In [57]:
with open('prison-architect-names.json', 'w') as f:
    json.dump(sdata, f, indent=True)

In [54]:
# import csv #...pandas more easier
import pandas as pd

In [55]:
df = pd.DataFrame(data)
df.set_index('Id', inplace=True)

In [56]:
column_order = [
    'FirstName',
    'NickName',
    'LastName',
    'Gender',
    'DoB',
    'Authorized',
    'BodyType',
    'BodyScale',
    'HeadType',
    'SkinColour',
    'Bio',
]
df.to_csv('prison-architect-names.csv', columns=column_order)