In [1]:
import pandas as pd
import numpy as np

colorPalette = [
    "#F7F052",
    "#D81E5B",
    "#004FFF",
    "#011627",
    "#FFFFFF",
  ]

colorPalette = [
    "#6C7172",
    "#ABAEAF",
    "#EAEBEB",
    "#2D3436",
    "#FFFFFF",
  ]

permutations = [(0, 1), (0, 2), (3, 4), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (0, 3)]  # (0, 3) not used

def f_color(decade, index):
    if decade in [1900, 1910]:
        pointer = permutations[0]
    elif decade in [1920, 1930]:
        pointer = permutations[1]
    elif decade in [1940, 1950]:
        pointer = permutations[2]
    else:
        decade = decade // 10 - 196 + 3
        pointer = permutations[decade]
    return colorPalette[pointer[index]]

MAPPING_CATEGORY_SHAPE_FORM = {'physics': 1,
 'medicine': 0,
 'chemistry': 3,
 'literature': 2,
 'peace': 5,
 'economics': 4}

In [2]:
laureate_df = pd.read_csv('laureate.csv')  
laureate_df = laureate_df.rename(columns={"bornCountryCode": "Two_Letter_Country_Code"})
print(laureate_df.columns)

countries = pd.read_csv('countries.csv')[['Two_Letter_Country_Code', 'Continent_Name']]
laureate_df = laureate_df.join(countries.set_index('Two_Letter_Country_Code'), on='Two_Letter_Country_Code')
laureate_df = laureate_df[laureate_df['year'].notna()]
laureate_df = laureate_df[laureate_df['gender'] != 'org']

laureate_df['year'] = laureate_df['year'].astype('int64')
laureate_df['decade'] = laureate_df['year'].apply(lambda x: (x // 10) * 10)

laureate_df = laureate_df[['id', 'Two_Letter_Country_Code', 'gender', 'category', 'Continent_Name', 'decade']]
laureate_df['color'] = laureate_df['decade'].apply(lambda x: f_color(x, 0))
laureate_df['color_background'] = laureate_df['decade'].apply(lambda x: f_color(x, 1))
laureate_df['shape_form'] = laureate_df['category'].apply(lambda x: MAPPING_CATEGORY_SHAPE_FORM[x])
laureate_df = laureate_df.sort_values(by='Continent_Name', ascending=True)
laureate_df = laureate_df.reset_index()
laureate_df

Index(['id', 'firstname', 'surname', 'born', 'died', 'bornCountry',
       'Two_Letter_Country_Code', 'bornCity', 'diedCountry', 'diedCountryCode',
       'diedCity', 'gender', 'year', 'category', 'overallMotivation', 'share',
       'motivation', 'name', 'city', 'country'],
      dtype='object')


Unnamed: 0,index,id,Two_Letter_Country_Code,gender,category,Continent_Name,decade,color,color_background,shape_form
0,902,876,MA,male,physics,Africa,2010,#EAEBEB,#FFFFFF,1
1,438,417,ZA,male,medicine,Africa,1970,#ABAEAF,#EAEBEB,0
2,671,663,NG,male,literature,Africa,1980,#ABAEAF,#2D3436,2
3,577,557,EG,male,peace,Africa,1990,#ABAEAF,#FFFFFF,5
4,576,556,ZA,male,peace,Africa,1990,#ABAEAF,#FFFFFF,5
...,...,...,...,...,...,...,...,...,...,...
973,628,615,CL,female,literature,South America,1940,#2D3436,#FFFFFF,2
974,655,645,CL,male,literature,South America,1970,#ABAEAF,#EAEBEB,2
975,562,541,AR,male,peace,South America,1980,#ABAEAF,#2D3436,5
976,877,854,PE,male,literature,South America,2010,#EAEBEB,#FFFFFF,2


In [3]:
laureate_df[(laureate_df.color_background == '#FFFFFF') & (laureate_df.color == '#2D3436')]

Unnamed: 0,index,id,Two_Letter_Country_Code,gender,category,Continent_Name,decade,color,color_background,shape_form
22,640,628,DZ,male,literature,Africa,1950,#2D3436,#FFFFFF,2
23,369,352,ZA,male,medicine,Africa,1950,#2D3436,#FFFFFF,0
71,373,355,RU,male,medicine,Asia,1950,#2D3436,#FFFFFF,0
73,372,355,RU,male,medicine,Asia,1950,#2D3436,#FFFFFF,0
80,641,629,RU,male,literature,Asia,1950,#2D3436,#FFFFFF,2
...,...,...,...,...,...,...,...,...,...,...
876,527,505,US,male,peace,North America,1940,#2D3436,#FFFFFF,5
878,388,368,US,male,medicine,North America,1950,#2D3436,#FFFFFF,0
959,357,341,AU,male,medicine,Oceania,1940,#2D3436,#FFFFFF,0
973,628,615,CL,female,literature,South America,1940,#2D3436,#FFFFFF,2


In [4]:
laureate_df[laureate_df.Two_Letter_Country_Code == 'CH']

Unnamed: 0,index,id,Two_Letter_Country_Code,gender,category,Continent_Name,decade,color,color_background,shape_form
136,435,414,CH,male,medicine,Europe,1970,#ABAEAF,#EAEBEB,0
157,487,465,CH,male,peace,Europe,1900,#6C7172,#ABAEAF,5
158,486,464,CH,male,peace,Europe,1900,#6C7172,#ABAEAF,5
160,484,462,CH,male,peace,Europe,1900,#6C7172,#ABAEAF,5
165,477,456,CH,male,medicine,Europe,1990,#ABAEAF,#FFFFFF,0
197,383,363,CH,male,medicine,Europe,1950,#2D3436,#FFFFFF,0
258,607,588,CH,male,literature,Europe,1910,#6C7172,#ABAEAF,2
288,768,758,CH,male,chemistry,Europe,2000,#EAEBEB,#2D3436,3
307,767,758,CH,male,chemistry,Europe,2000,#EAEBEB,#2D3436,3
379,363,347,CH,male,medicine,Europe,1940,#2D3436,#FFFFFF,0


In [5]:
laureate_df.groupby(['decade', 'gender']).agg({'id': 'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,id
decade,gender,Unnamed: 2_level_1
1900,female,3
1900,male,55
1910,female,1
1910,male,39
1920,female,2
1920,male,52
1930,female,3
1930,male,60
1940,female,3
1940,male,39


In [6]:
laureate_df.groupby('decade').agg({'id': 'count'})

Unnamed: 0_level_0,id
decade,Unnamed: 1_level_1
1900,58
1910,40
1920,54
1930,63
1940,42
1950,87
1960,80
1970,113
1980,97
1990,105


In [7]:
laureate_df.groupby('category').agg({'id': 'count'}).sort_values(by='id', ascending=False)

Unnamed: 0_level_0,id
category,Unnamed: 1_level_1
physics,235
medicine,230
chemistry,200
literature,120
peace,106
economics,87


In [8]:
continents_grouped_df = laureate_df.groupby('Continent_Name').agg({'id': 'count'}).sort_values(by='Continent_Name', ascending=True).reset_index()
continents_grouped_df

Unnamed: 0,Continent_Name,id
0,Africa,27
1,Asia,107
2,Europe,515
3,North America,304
4,Oceania,14
5,South America,11


In [9]:
TILE_SIZE = 100
NB_TILES_W = 31

In [10]:
POSITIONS_LEFT = {}
POSITIONS = {}
for index in range(len(continents_grouped_df)):
    POSITIONS_LEFT[continents_grouped_df.iloc[index].Continent_Name] = list(range(continents_grouped_df.iloc[index].id))
    POSITIONS[continents_grouped_df.iloc[index].Continent_Name] = []

jacques_flag = False
# Generate relative position within continent
for index in range(len(laureate_df)):
    nums = POSITIONS_LEFT[laureate_df.iloc[index].Continent_Name]
    relative_pos = np.random.randint(low=0, high=len(nums), size=1)[0]
    
    if laureate_df.iloc[index].Continent_Name == 'Europe' and laureate_df.iloc[index]['shape_form'] == 3 and not jacques_flag:
        jacques_flag = True
        tile = {'x_pos': nums[relative_pos] % NB_TILES_W * TILE_SIZE, 
        'y_pos': nums[relative_pos] // NB_TILES_W * TILE_SIZE, 
        'color': '#ffbaba',
        'color_background': '#ff5252', 
        'shape_form': laureate_df.iloc[index]['shape_form']}
    
    else:
        tile = {'x_pos': nums[relative_pos] % NB_TILES_W * TILE_SIZE, 
        'y_pos': nums[relative_pos] // NB_TILES_W * TILE_SIZE, 
        'color': laureate_df.iloc[index]['color'],
        'color_background': laureate_df.iloc[index]['color_background'], 
        'shape_form': laureate_df.iloc[index]['shape_form']}
    del POSITIONS_LEFT[laureate_df.iloc[index].Continent_Name][relative_pos]
    POSITIONS[laureate_df.iloc[index].Continent_Name].append(tile)

# Generate y_pos offset
offset = 3 * TILE_SIZE + 10
offset_x = 10
for continent in POSITIONS.keys(): # TODO: put continents in wanted order
    POSITIONS[continent].sort(key=lambda x: (x['y_pos'], x['x_pos']))
    for i in range(len(POSITIONS[continent])):
        POSITIONS[continent][i]['y_pos'] += offset
        POSITIONS[continent][i]['x_pos'] += offset_x
    offset = POSITIONS[continent][-1]['y_pos'] + 2 * TILE_SIZE

In [11]:
tiles = []
for v in POSITIONS.values():
    tiles.extend(v)

In [12]:
tiles[-1]['y_pos'] + TILE_SIZE

4210

In [13]:
with open('sketch.js', 'r') as f:
    data = f.readlines()

index = 0
for i, line in enumerate(data):
    if line.startswith('{\'x'):
        data[i] = str(tiles[index]) + ',\n'
        index += 1

data[1] = 'let TILE_SIZE = {};\n'.format(TILE_SIZE)
data[2] = 'let w = {};\n'.format(TILE_SIZE * NB_TILES_W + 20)
data[3] = 'let h = {};\n'.format((tiles[-1]['y_pos']+TILE_SIZE + 10))
with open('sketch.js', 'w') as file:
    file.writelines(data)

In [14]:
legends = []
for j, i in enumerate(range(0, 32, 2)):
    tile = {'x_pos': 10 + i * TILE_SIZE, 
            'y_pos': 10,
            'color_background': 0, 
            'shape_form': 0}
    legends.append(tile)

for j, i in enumerate(range(6, len(legends))):
    legends[i]['color_background'] = colorPalette[permutations[j][0]]
    legends[i]['color'] = colorPalette[permutations[j][1]]
    
legends[-1]['color_background'] = '#ff5252'
legends[-1]['color'] = '#ffbaba'

In [15]:
legends

[{'x_pos': 10, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 210, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 410, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 610, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 810, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 1010, 'y_pos': 10, 'color_background': 0, 'shape_form': 0},
 {'x_pos': 1210,
  'y_pos': 10,
  'color_background': '#6C7172',
  'shape_form': 0,
  'color': '#ABAEAF'},
 {'x_pos': 1410,
  'y_pos': 10,
  'color_background': '#6C7172',
  'shape_form': 0,
  'color': '#EAEBEB'},
 {'x_pos': 1610,
  'y_pos': 10,
  'color_background': '#2D3436',
  'shape_form': 0,
  'color': '#FFFFFF'},
 {'x_pos': 1810,
  'y_pos': 10,
  'color_background': '#6C7172',
  'shape_form': 0,
  'color': '#FFFFFF'},
 {'x_pos': 2010,
  'y_pos': 10,
  'color_background': '#ABAEAF',
  'shape_form': 0,
  'color': '#EAEBEB'},
 {'x_pos': 2210,
  'y_pos': 10,
  'color_back