In [130]:
import pandas as pd
import numpy as np

colorPalette = [
    "#6C756B",
    "#1C2541",
    "#D2FF28",
    "#54F2F2",
#     "#D81E5B",
    "#FFFFFF",
  ]

permutations = [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

def f_color(decade, index):
    if decade in [1900, 1910]:
        pointer = permutations[0]
    elif decade in [1920, 1930]:
        pointer = permutations[1]
    else:
        decade = decade // 10 - 194 + 2
        pointer = permutations[decade]
    return colorPalette[pointer[index]]

# def f_color(decade):
#     if decade in [1900, 1910, 1920]:
#         return colorPalette[0]
#     elif decade in [1930, 1940, 1950]:
#         return colorPalette[1]
#     elif decade in [1960, 1970]:
#         return colorPalette[2]
#     elif decade in [1980, 1990]:
#         return colorPalette[3]
#     else:
#         return colorPalette[4]

In [131]:
laureate_df = pd.read_csv('laureate.csv')  
laureate_df = laureate_df.rename(columns={"bornCountryCode": "Two_Letter_Country_Code"})
print(laureate_df.columns)

countries = pd.read_csv('countries.csv')[['Two_Letter_Country_Code', 'Continent_Name']]
laureate_df = laureate_df.join(countries.set_index('Two_Letter_Country_Code'), on='Two_Letter_Country_Code')
laureate_df = laureate_df[laureate_df['year'].notna()]
laureate_df = laureate_df[laureate_df['gender'] != 'org']

laureate_df['year'] = laureate_df['year'].astype('int64')
laureate_df['decade'] = laureate_df['year'].apply(lambda x: (x // 10) * 10)

laureate_df = laureate_df[['id', 'Two_Letter_Country_Code', 'gender', 'category', 'Continent_Name', 'decade']]
laureate_df['color'] = laureate_df['decade'].apply(lambda x: f_color(x, 0))
laureate_df['color_background'] = laureate_df['decade'].apply(lambda x: f_color(x, 1))
laureate_df = laureate_df.sort_values(by='Continent_Name', ascending=True)
laureate_df = laureate_df.reset_index()
laureate_df

Index(['id', 'firstname', 'surname', 'born', 'died', 'bornCountry',
       'Two_Letter_Country_Code', 'bornCity', 'diedCountry', 'diedCountryCode',
       'diedCity', 'gender', 'year', 'category', 'overallMotivation', 'share',
       'motivation', 'name', 'city', 'country'],
      dtype='object')


Unnamed: 0,index,id,Two_Letter_Country_Code,gender,category,Continent_Name,decade,color,color_background
0,902,876,MA,male,physics,Africa,2010,#54F2F2,#FFFFFF
1,438,417,ZA,male,medicine,Africa,1970,#1C2541,#54F2F2
2,671,663,NG,male,literature,Africa,1980,#1C2541,#FFFFFF
3,577,557,EG,male,peace,Africa,1990,#D2FF28,#54F2F2
4,576,556,ZA,male,peace,Africa,1990,#D2FF28,#54F2F2
...,...,...,...,...,...,...,...,...,...
973,628,615,CL,female,literature,South America,1940,#6C756B,#54F2F2
974,655,645,CL,male,literature,South America,1970,#1C2541,#54F2F2
975,562,541,AR,male,peace,South America,1980,#1C2541,#FFFFFF
976,877,854,PE,male,literature,South America,2010,#54F2F2,#FFFFFF


In [132]:
laureate_df[laureate_df.Two_Letter_Country_Code == 'CH']

Unnamed: 0,index,id,Two_Letter_Country_Code,gender,category,Continent_Name,decade,color,color_background
136,435,414,CH,male,medicine,Europe,1970,#1C2541,#54F2F2
157,487,465,CH,male,peace,Europe,1900,#6C756B,#1C2541
158,486,464,CH,male,peace,Europe,1900,#6C756B,#1C2541
160,484,462,CH,male,peace,Europe,1900,#6C756B,#1C2541
165,477,456,CH,male,medicine,Europe,1990,#D2FF28,#54F2F2
197,383,363,CH,male,medicine,Europe,1950,#6C756B,#FFFFFF
258,607,588,CH,male,literature,Europe,1910,#6C756B,#1C2541
288,768,758,CH,male,chemistry,Europe,2000,#D2FF28,#FFFFFF
307,767,758,CH,male,chemistry,Europe,2000,#D2FF28,#FFFFFF
379,363,347,CH,male,medicine,Europe,1940,#6C756B,#54F2F2


In [133]:
laureate_df.groupby(['decade', 'gender']).agg({'id': 'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,id
decade,gender,Unnamed: 2_level_1
1900,female,3
1900,male,55
1910,female,1
1910,male,39
1920,female,2
1920,male,52
1930,female,3
1930,male,60
1940,female,3
1940,male,39


In [134]:
laureate_df.groupby('decade').agg({'id': 'count'})

Unnamed: 0_level_0,id
decade,Unnamed: 1_level_1
1900,58
1910,40
1920,54
1930,63
1940,42
1950,87
1960,80
1970,113
1980,97
1990,105


In [138]:
laureate_df.groupby('category').agg({'id': 'count'}).sort_values(by='id', ascending=False)

Unnamed: 0_level_0,id
category,Unnamed: 1_level_1
physics,235
medicine,230
chemistry,200
literature,120
peace,106
economics,87


In [139]:
continents_grouped_df = laureate_df.groupby('Continent_Name').agg({'id': 'count'}).sort_values(by='Continent_Name', ascending=True).reset_index()
continents_grouped_df

Unnamed: 0,Continent_Name,id
0,Africa,27
1,Asia,107
2,Europe,515
3,North America,304
4,Oceania,14
5,South America,11


In [140]:
TILE_SIZE = 50
NB_TILES_W = 30
# PADDING = 2
colorPalette = [
    "#6C756B",
    "#1C2541",
    "#D2FF28",
    "#54F2F2",
    "#D81E5B",
    "#FFFFFF",
  ]

In [144]:
POSITIONS_LEFT = {}
POSITIONS = {}
for index in range(len(continents_grouped_df)):
    POSITIONS_LEFT[continents_grouped_df.iloc[index].Continent_Name] = list(range(continents_grouped_df.iloc[index].id))
    POSITIONS[continents_grouped_df.iloc[index].Continent_Name] = []
    
# Generate relative position within continent
for index in range(len(laureate_df)):
    nums = POSITIONS_LEFT[laureate_df.iloc[index].Continent_Name]
    relative_pos = np.random.randint(low=0, high=len(nums), size=1)[0]
    tile = {'x_pos': nums[relative_pos] % NB_TILES_W * TILE_SIZE, 
            'y_pos': nums[relative_pos] // NB_TILES_W * TILE_SIZE, 
            'color': laureate_df.iloc[index]['color'],
            'color_background': laureate_df.iloc[index]['color_background']}
    del POSITIONS_LEFT[laureate_df.iloc[index].Continent_Name][relative_pos]
    POSITIONS[laureate_df.iloc[index].Continent_Name].append(tile)

# Generate y_pos offset
offset = 0
for continent in POSITIONS.keys(): # TODO: put continents in wanted order
    POSITIONS[continent].sort(key=lambda x: (x['y_pos'], x['x_pos']))
    for i in range(len(POSITIONS[continent])):
        POSITIONS[continent][i]['y_pos'] += offset
    offset = POSITIONS[continent][-1]['y_pos'] + 2 * TILE_SIZE

In [145]:
tiles = []
for v in POSITIONS.values():
    tiles.extend(v)

In [146]:
with open('sketch.js', 'r') as f:
    data = f.readlines()

index = 0
for i, line in enumerate(data):
    if line.startswith('{\'x'):
        data[i] = str(tiles[index]) + ',\n'
        index += 1

data[1] = 'let TILE_SIZE = {};\n'.format(TILE_SIZE)
data[2] = 'let w = {};\n'.format(TILE_SIZE * (NB_TILES_W+2))
data[3] = 'let h = {};\n'.format(TILE_SIZE * (tiles[-1]['y_pos']+2))
with open('sketch.js', 'w') as file:
    file.writelines(data)