In [28]:
import numpy as np
from scipy.optimize import curve_fit
from tikzplotlib import save as tikz_save
from classifier import classifier

red, green, blue, charcoal = '#ff0040', '#00aa00', '#187bff', "#3b3b3b"
wpm = 80
bg_min_samples = 50

In [29]:
# Helper function IQR average for time processing later
def get_iqr_avg(data):
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)
    IQR = Q3-Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    new_data = [x for x in data if x >= lower_bound and x <= upper_bound]
        
    return sum(new_data)/len(new_data)

# because eval is so freaking slow
def str_to_tuple(s):
    return tuple(map(int,s.strip("()").split(", ")))

with open("bistrokes.tsv") as f2:
    bistroke_data = [(eval(a), b, *[s for x in c  if ((s := str_to_tuple(x))[0] >= wpm)]) for (a, b, *c) in (l.strip().split("\t") for l in f2) if (not any([c in "QWERTYUIOPASDFGHJKL:ZXCVBNM<>? " for c in b]))] # and not any([char in "" for char in b])
    bistroke_data = [bd for bd in bistroke_data if (len(bd)-2 >= bg_min_samples)]

bigram_to_freq = {}

with open("bigrams.txt") as f:
    for k, v in (l.split("\t") for l in f):
        bigram_to_freq[k] = int(v)

In [30]:
qwerty = classifier().keyboards["qwerty"]
times = np.zeros(len(bistroke_data))
sfb = np.zeros(len(bistroke_data))
freqs = np.zeros(len(bistroke_data))
labels = []
col = [green for _ in range(len(bistroke_data))]
i = 0

for bs in bistroke_data:
    ((ax, ay), (bx, by)), bigram, *bistroke_times = bs

    if ((ax, ay), (bx, by)) != tuple(qwerty.get_pos(c) for c in bigram):
        # print((ax, ay), (bx, by)), tuple(qwerty.get_pos(c) for c in bigram)
        continue

    times[i] = get_iqr_avg([t[1] for t in bistroke_times])
    labels.append(bigram)
    
    shb = ((ax//abs(ax)) == (bx//abs(bx)))
    scb = (ax==bx)
    sfb[i] = (scb or (shb and (abs(ax) in (1,2) and abs(bx) in (1,2))))

    if sfb[i]:
        col[i] = red

    freqs[i] = bigram_to_freq[bigram]
    i += 1

times = times[:len(labels)]
sfb = sfb[:len(labels)]
freqs = freqs[:len(labels)]


In [31]:
def get_time(features, p0, p1, p2, p3):
    freq, sfb = features

    freq_pen = (p0*np.log(freq+p1)+p2)

    return freq_pen # *(sfb+p3)

bg_popt, bg_pcov = curve_fit(get_time, [freqs, sfb], times, method="trf", maxfev=750000) # "trf" p0=initial_guess

sum_of_squares = np.sum((times - np.mean(times))**2)

new_y = get_time([freqs, sfb], *bg_popt)
residuals = times-new_y
r2 = 1 - np.sum((residuals)**2)/sum_of_squares

print("R^2:", r2)
print("MAE:", np.mean(np.abs(residuals)))


R^2: 0.41060216979426034
MAE: 23.30583453936071


In [32]:
# REGULAR GRAPH FREQ
%matplotlib qt

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))

# plt.title("Typing Time for Non-SFBs and SFBs")

#xx, yy, ll, fit_y, c = zip(*sorted([r for r in zip(freqs, times, bg_labels, new_y) if r[-1] != "blue"], key=lambda x: x[0], reverse=True))
xx, yy, fit_y, cc, ll = zip(*sorted([r for r in zip(freqs, times, new_y, col, labels)], key = lambda x: -x[0]))
# xx = list((range(len(xx))))
scatter = plt.scatter(xx, yy, s=50, c=blue)

plt.plot(xx, fit_y, c="black")
plt.xlabel("Number of Occurrences", fontsize=18)  # Adjust the fontsize as needed
plt.ylabel("Average Typing Time (Milliseconds)", fontsize=18)  # Adjust the fontsize as needed
plt.xscale("log")

# Creating a legend
plt.tick_params(axis='x', labelsize=16)
plt.tick_params(axis='y', labelsize=16)


for i, l in enumerate(ll[:-2]):
    plt.text(xx[i],yy[i], l)

plt.xlim(10**3)  # Adjust max(xx) if needed

plt.savefig('plot.pdf', bbox_inches='tight', format='pdf', transparent=True)  # Export as PDF with transparent background
# plt.show()

In [33]:
# REGULAR GRAPH SFB
%matplotlib qt

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))

# plt.title("Typing Time for Non-SFBs and SFBs")

#xx, yy, ll, fit_y, c = zip(*sorted([r for r in zip(freqs, times, bg_labels, new_y) if r[-1] != "blue"], key=lambda x: x[0], reverse=True))
xx, yy, fit_y, cc, ll = zip(*sorted([r for r in zip(freqs, times, new_y, col, labels)], key = lambda x: -x[0]))
# xx = list((range(len(xx))))
scatter = plt.scatter(xx, yy, s=50, c=cc)

plt.plot(xx, fit_y, c="black")
plt.xlabel("Number of Occurrences", fontsize=18)  # Adjust the fontsize as needed
plt.ylabel("Average Typing Time (Milliseconds)", fontsize=18)  # Adjust the fontsize as needed
plt.xscale("log")

# Creating a legend
legend_elements = [plt.Line2D([0], [0], marker='o', color='w', label='SFB', markerfacecolor=red, markersize=10),
                   plt.Line2D([0], [0], marker='o', color='w', label='Non-SFB', markerfacecolor=green, markersize=10)]
plt.legend(handles=legend_elements, loc='best', ncol=1, facecolor='none', frameon=False, fontsize=16)
plt.tick_params(axis='x', labelsize=16)
plt.tick_params(axis='y', labelsize=16)

#for i, l in enumerate(ll):
    #if i not in (0,1):
    #plt.text(xx[i],yy[i], l)

plt.savefig('plot.pdf', bbox_inches='tight', format='pdf', transparent=True)  # Export as PDF with transparent background
# plt.show()

In [34]:
# DARKMODE GRAPH
"""
%matplotlib qt

import matplotlib.pyplot as plt

plt.rcParams['figure.facecolor'] = 'black'
plt.rcParams['text.color'] = 'white'

plt.figure(figsize=(16, 6))

# plt.title("Typing Time for Non-SFBs and SFBs")

#xx, yy, ll, fit_y, c = zip(*sorted([r for r in zip(freqs, times, bg_labels, new_y) if r[-1] != "blue"], key=lambda x: x[0], reverse=True))
xx, yy, fit_y, cc = zip(*sorted([r for r in zip(freqs, times, new_y, col)], key = lambda x: -x[0]))
# xx = list((range(len(xx))))
scatter = plt.scatter(xx, yy, s=50, c=cc)

plt.plot(xx, fit_y, c="white")
plt.xlabel("Number of Occurrences", fontsize=18, color="white")  # Adjust the fontsize as needed
plt.ylabel("Average Typing Time (Milliseconds)", fontsize=18, color="white")  # Adjust the fontsize as needed
plt.xscale("log")

# Setting the color of the border lines (spines) to white
for spine in plt.gca().spines.values():
    spine.set_color('white')

# Creating a legend
legend_elements = [plt.Line2D([0], [0], marker='o', color='w', label='SFB', markerfacecolor='#ffc0ff', markersize=10),
                   plt.Line2D([0], [0], marker='o', color='w', label='Non-SFB', markerfacecolor='#1DC8FF', markersize=10)]
plt.legend(handles=legend_elements, loc='best', ncol=1, facecolor='none', labelcolor='white', frameon=False, fontsize=16)
plt.tick_params(axis='x', colors='white', labelsize=16)
plt.tick_params(axis='y', colors='white', labelsize=16)

plt.savefig('plot.pdf', bbox_inches='tight', format='pdf', transparent=True)  # Export as PDF with transparent background
# plt.show()
"""

'\n%matplotlib qt\n\nimport matplotlib.pyplot as plt\n\nplt.rcParams[\'figure.facecolor\'] = \'black\'\nplt.rcParams[\'text.color\'] = \'white\'\n\nplt.figure(figsize=(16, 6))\n\n# plt.title("Typing Time for Non-SFBs and SFBs")\n\n#xx, yy, ll, fit_y, c = zip(*sorted([r for r in zip(freqs, times, bg_labels, new_y) if r[-1] != "blue"], key=lambda x: x[0], reverse=True))\nxx, yy, fit_y, cc = zip(*sorted([r for r in zip(freqs, times, new_y, col)], key = lambda x: -x[0]))\n# xx = list((range(len(xx))))\nscatter = plt.scatter(xx, yy, s=50, c=cc)\n\nplt.plot(xx, fit_y, c="white")\nplt.xlabel("Number of Occurrences", fontsize=18, color="white")  # Adjust the fontsize as needed\nplt.ylabel("Average Typing Time (Milliseconds)", fontsize=18, color="white")  # Adjust the fontsize as needed\nplt.xscale("log")\n\n# Setting the color of the border lines (spines) to white\nfor spine in plt.gca().spines.values():\n    spine.set_color(\'white\')\n\n# Creating a legend\nlegend_elements = [plt.Line2D([0],