In [1]:
import classifier
from scipy.optimize import curve_fit
import numpy as np

red, green, blue, charcoal = '#ff0040', '#00aa00', '#187bff', "#3b3b3b"

bistroke_times = {}
bistroke_freq = {}
bigrams = set()

with open(f"nstrokes/bistrokes_0.txt") as file:
    for l in file:
        layout, bigram, freq, *times = l.split("\t")

        if bigram not in bigrams:
            bigrams.add(bigram)
            
        bistroke_freq[(layout, bigram)] = int(freq)
        bistroke_times[(layout, bigram)] = [
            list(map(int, t.strip()[1:-1].split(", "))) for t in times
        ]

In [3]:
%matplotlib qt
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

def log_function(x, a, b, c):
    return a * np.log(x + b) + c


for layout in ("qwerty", "dvorak"):
    freqs = []
    times = []
    is_sfb = []
    c = []

    for bg in bigrams:
        if not any([c in '!@#$%^&*()QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?' for c in bg]):
            if (layout, bg) in bistroke_freq:
                try:
                    time_data = [t[1] for t in bistroke_times[(layout, bg)] if (t[0] > 1 and t[1] < 1000)]
                    x_val = bistroke_freq[(layout, bg)]
                    y_val = sum(time_data) / len(time_data)
                    
                    if x_val != 0:
                        freqs.append(x_val)
                        times.append(y_val)
                        is_sfb.append(int(classifier.same_finger(bg)))
                        c.append(red if is_sfb[-1] else green)
                except:
                    print(layout, bg, bistroke_times[(layout, bg)])

    freqs, times, is_sfb, c = zip(*sorted(zip(freqs, times, is_sfb, c), key = lambda x: x[0]))

    freqs = np.array(freqs)
    times = np.array(times)
    is_sfb = np.array(is_sfb)

    input_data = [freqs, is_sfb]
    popt, pcov = curve_fit(log_function, freqs, times)

    mycmap = LinearSegmentedColormap.from_list('custom_colormap', list(reversed(['#fff829', '#f4f730', '#e8f737', '#ddf63f', '#d1f546', '#c5f34d', '#b9f254', '#acf15b', '#a0ef62', '#92ed68', '#85ec6e', '#76ea74', '#66e77a', '#55e580', '#41e385', '#24e08b', '#00dd90', '#00da95', '#00d799', '#00d49e', '#00d1a2', '#00cda6', '#00caaa', '#00c6ae', '#00c2b1', '#00beb4', '#00bab7', '#00b6ba', '#00b2bc', '#00adbf', '#00a9c0', '#00a4c2', '#00a0c3', '#009bc4', '#0096c5', '#0091c6', '#008dc6', '#0088c6', '#0083c5', '#007ec4', '#0079c3', '#0074c2', '#006fc0', '#0069be', '#0064bc', '#005fb9', '#145ab6', '#2155b3', '#2950b0', '#304bac', '#3547a8', '#3942a4', '#3d3d9f', '#40389b', '#423396', '#442f91', '#462a8b', '#472586', '#482080', '#491c7a', '#491775', '#49116f', '#490c68', '#490562'])))

    plt.figure()

    title = f"Relationship Between English Bigram Frequency and Typing Time on {str(layout).title()}"
    
    plt.title(title)

    new_y = log_function(freqs, *popt)
    scatter = plt.scatter(freqs, times, c=charcoal, s=10)

    plt.plot(freqs, new_y, c=red)
    plt.xlabel("Number of Occurrences in Corpus ")
    plt.ylabel("Average Typing Time (Milliseconds)")
    plt.xscale("log")

    plt.show()

  return a * np.log(x + b) + c


dvorak ,q [[36, 1497]]
dvorak jq [[8, 1818]]
