In [39]:
code = """print('Hello World')"""

In [2]:
import pandas as pd
df = pd.read_csv("baseline/with_modifier.csv")

The modifier is based on the key *position*, not the key itself. 

As a result we can number a keyboard based on the following format:
![](keyboard-layout.jpg)

We just then need to map the key to the position:

In [13]:
key2pos = {k:i for i,k in enumerate("qwertyuiopasdfghjkl;zxcvbnm,.")}

In [16]:
df["Location"] = df["Key"].replace(key2pos)

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,Key,Average Time (s),Distance,Modifier,Location
0,0,",",0.721701,1,1.443402,27
1,1,.,0.635377,1,1.270754,28
2,2,;,0.583544,0,0.583544,19
3,3,a,0.623853,0,0.623853,10
4,4,b,0.64447,2,1.933409,24


In [18]:
key2mod = dict(zip(df["Location"], df["Modifier"]))

In [27]:
keys = list(df["Key"].unique())

In [28]:
import numpy as np
import random

In [118]:
num_weights = len(keys)
solutions_per_population = 8
num_solutions_mating = 4
def init_population(population_size):
    population = []
    for _ in range(population_size):
        genome = keys[:]
        random.shuffle(genome)
        population.append(genome)
    return population

In [131]:
def mate(board1, board2):
    idx = random.randint(0, 28)
    length = random.randint(0,28)
    child = ["_" for i in range(29)]
    # Add keys from keyboard 1
    for _ in range(length):
        if idx > 28:
            idx = 0
        child[idx] = board1[idx]
        idx += 1
    
    # Add remaining keys from keyboard 2
    child_idx = idx
    while "_" in child:
        if idx > 28:
            idx = 0
        if child_idx > 28:
            child_idx = 0
        char = board2[idx]
        if char in child:
            idx += 1
            continue
        child[child_idx] = board2[idx]
        child_idx += 1
        idx += 1
    
    # 10% chance of random mutation
    if random.random() >= 0.9:
        p1 = random.randint(0,28)
        p2 = random.randint(0,28)
        allele1 = child[p1]
        allele2 = child[p2]
        child[p1] = allele2
        child[p2] = allele1
    return child

In [132]:
def new_generation(population, sorted_evals, p_size):
    new_gen = []
    sorted_pop = []
    for idx in sorted_evals:
        sorted_pop.append(population[idx])
    
    # Best 10% of layouts in this generation
    for i in range(int(p_size*0.1)):
        new_gen.append(sorted_pop[i])
    
    # Combine two from top 50% and create a new generation
    for _ in range(int(p_size * 0.9)):
        pop = sorted_pop[:int(p_size*.5)]
        p1, p2 = random.choices(pop, k=2)
        child = mate(p1,p2)
        new_gen.append(child)
    return new_gen

In [146]:
def calculate_time(s, layout, key2mod):
    total_distance = 0
    for char in s:
        try:
            total_distance += key2mod[layout.index(char)]
        except ValueError:
            continue
    return total_distance

In [143]:
with open("text.txt", "r") as f:
    code = f.read()

In [155]:
key2mod = dict(zip(df["Location"], df["Modifier"]))

In [156]:
# Baseline:
calculate_time(code, "qwertyuiopasdfghjkl;zxcvbnm,.", key2mod)

14763.48642787899

In [158]:
populations = init_population(8)
num_generations = 1000
for generation in range(num_generations):
    distances = [calculate_time(code, population, key2mod) 
             for population in populations]
    sorted_evals = [x for _,x in sorted(zip(distances, range(len(distances) - 1)))]
    if (generation % 100) == 0 or generation == 999:
        print(f"Generation {generation} best:\n\t{list(sorted(list(distances)))[0]}")
    if generation < num_generations - 1:
        populations = new_generation(populations, sorted_evals, 8)

Generation 0 best:
	13678.682420348794
Generation 100 best:
	12241.141532324815
Generation 200 best:
	11752.819438075083
Generation 300 best:
	11348.60708928014
Generation 400 best:
	11132.162210845367
Generation 500 best:
	11014.809437703558
Generation 600 best:
	11826.981188249049
Generation 700 best:
	11770.945840310487
Generation 800 best:
	11620.645924519917
Generation 900 best:
	11348.353831719794
Generation 999 best:
	11072.730636214774


In [159]:
sorted_evals

[2, 0, 1, 3, 4, 5]

In [160]:
populations[2]

['z',
 'w',
 'x',
 'y',
 'h',
 'k',
 'p',
 's',
 'b',
 'u',
 'o',
 'a',
 'r',
 'm',
 'j',
 '.',
 'i',
 't',
 'e',
 'n',
 'f',
 'c',
 ',',
 'q',
 'v',
 'g',
 'l',
 ';',
 'd']

In [113]:
len(population)

7

In [65]:
distances = [calculate_time(code, population) 
             for population in populations]

In [68]:
sorted_evals = [x for _,x in sorted(zip(distances, range(len(distances))))]; sorted_evals

[2, 6, 3, 1, 7, 4, 5, 0]

In [74]:
new_pop = new_generation(population, sorted_evals, 8)

In [76]:
distances2 = [calculate_time(code, population) 
             for population in new_pop]

In [77]:
distances, distances2

([18.802253150939944,
  15.373806476593023,
  12.417484378814697,
  15.088770198822022,
  16.561223316192628,
  16.897821712493897,
  12.629731702804566,
  16.09697675704956],
 [12.087087392807007,
  12.915939807891846,
  16.64198336601257,
  12.915939807891846,
  16.670943403244017,
  15.500857830047606,
  11.628025388717651])

In [53]:
calculate_time(code, population[2])

13.265898180007936