In [None]:
![alttext](keyboard.jpg "Left/Right keyboard")

In [1]:
import pandas as pd
import re

In [41]:
'''Config'''
keys = { 'left_characters':['1','2','3','4','5','6','q','w','e','r','t','a','s','d','f','g','z','x','c','v'],
         'right_characters':['7','8','9','0','y','u','i','o','p','h','j','k','l','b','n','m']}

In [49]:
def clean_word(row):
    """Remove all non-alphanumeric characters in a word
    
    Keyword arguments:
    row -- a pandas DataFrame row, index key 'word' is used
    
    Returns: a Pandas Series object containing the clean word an its length
    """
    x = re.sub(r'\W+', '', row['word'])
    return pd.Series({'length': len(x),'word_clean': x})

def characters_per_side(row):
    """Count the amount of characters per keyboard side in a word
    
    Keyword arguments:
    row -- a pandas DataFrame row, index key 'word_clean' is used
    
    Returns: a Pandas Series object containing the numer of characters on both sides and there relative values
    """
    global keys
    x = row['word_clean'].lower()
    
    l_count = 0
    r_count = 0
    
    for y in x:
        if y in keys['left_characters']:
            l_count += 1
        elif y in keys['right_characters']:
            r_count += 1
            
    return pd.Series({'l_count':l_count,
                      'l_per':(round((l_count/len(x))*100)),
                      'r_count':r_count,
                      'r_per':(round((r_count/len(x))*100))})


def sequance_score(row):
    word = row['word_clean'].lower()
    penalty = 0
    last_known_position = ''
    for index, char in enumerate(word):
        if index == 0:
            if char in keys['left_characters']:
                last_known_position = 0
            if char in keys['right_characters']:
                last_known_position = 1
            continue
        if char in keys['left_characters']:
            if last_known_position == 0:
                penalty+=1
            last_known_position = 0;
        else:
            if last_known_position == 1:
                penalty+=1
            last_known_position = 1
    return pd.Series({'penalty':penalty})


'''
b rechts
a links
a links
n rechts

0 (b) en 2 (a) zijn even
1 (a) en 3 (n) zijn oneven


L_keys  = 2
R_keys = 2
'''
word = 'baan'

even = word[::2]         # even  - start at the beginning at take every second item
odd = word[1::2]        # odd - start at second item and take every second item

start = 'l_keys'
if even[0] in keys['r_keys']:
    start = 'r_keys'

c = sum(el not in keys[start] for el in even)
e = sum(el in keys[start] for el in odd)
print(c + e)

In [4]:
'''Load the data'''
if 'df' not in locals():
    df = pd.read_csv('data/woordenlijst.txt', sep="\n", header=None)
    df.columns = ["word"]
    df[['length','word_clean']] = df.apply(lambda row: clean_word(row), axis=1)

In [8]:
whos

Variable        Type        Data/Info
-------------------------------------
clean_word      function    <function clean_word at 0x7fc7903c9488>
cleaning        function    <function cleaning at 0x7fc7903c9268>
sequanceScore   function    <function sequanceScore at 0x7fc79040fea0>
sideLen         function    <function sideLen at 0x7fc79040fd90>


In [6]:
df[['l_count','l_per','r_count','r_per']] = df.apply(lambda row: characters_per_side(row),axis=1)

In [7]:
df['penalty'] = df.apply(lambda row: sequance_score(row),axis=1)

In [9]:
df.head(100)

Unnamed: 0,word,length,word_clean,l_count,l_per,r_count,r_per,penalty
0,'s anderendaags,13,sanderendaags,11,85,2,15,8
1,'s avonds,7,savonds,5,71,2,29,4
2,'s middags,8,smiddags,6,75,2,25,5
3,'s nachts,7,snachts,5,71,2,29,2
4,'s ochtends,9,sochtends,6,67,3,33,2
5,'s-Gravenhage,11,sGravenhage,9,82,2,18,8
6,'s-Hertogenbosch,14,sHertogenbosch,8,57,6,43,6
7,06-nummer,8,06nummer,3,38,5,62,4
8,1 aprilgek,9,1aprilgek,5,56,4,44,3
9,1 aprilgrap,10,1aprilgrap,6,60,4,40,4


In [10]:
df.to_csv('list.csv')

In [None]:
![alttext](powerBi.PNG "Dashboard")

In [50]:
whos function

Variable              Type        Data/Info
-------------------------------------------
characters_per_side   function    <function characters_per_side at 0x7fc7902012f0>
clean_word            function    <function clean_word at 0x7fc790201268>
cleaning              function    <function cleaning at 0x7fc7903c9268>
sequanceScore         function    <function sequanceScore at 0x7fc790238378>
sequance_score        function    <function sequance_score at 0x7fc790201158>
sideLen               function    <function sideLen at 0x7fc79040fd90>
test_function         function    <function test_function at 0x7fc79028df28>


In [52]:
characters_per_side?