In [None]:
# Import
import pandas as pd

In [None]:
# Test
l = [1, 2]
l[1] *= 4
print(l)

In [None]:
measure_word_pleasantness('ヴァイオリン')

# Flick

```python
{
    'origin': [0, 2],
    'direction': 'left'
}
```

In [None]:
# Data

directions = ['none', 'left', 'up', 'right', 'down']
direction_movements = [[0, 0], [-2, 0], [0, -2], [2, 0], [0, 2]]

key_map = [
    ['ア', 'イ', 'ウ', 'エ', 'オ'],
    ['カ', 'キ', 'ク', 'ケ', 'コ'],
    ['サ', 'シ', 'ス', 'セ', 'ソ'],
    ['タ', 'チ', 'ツ', 'テ', 'ト'],
    ['ナ', 'ニ', 'ヌ', 'ネ', 'ノ'],
    ['ハ', 'ヒ', 'フ', 'ヘ', 'ホ'],
    ['マ', 'ミ', 'ム', 'メ', 'モ'],
    ['ヤ',   '', 'ユ',   '', 'ヨ'],
    ['ラ', 'リ', 'ル', 'レ', 'ロ'],
    ['゛',   '',   '', '゜',   ''],
    ['ワ', 'ヲ', 'ン', 'ー',   ''],
    ['、', '。', '？', '！',   '']
]

# Voicing
voiced_katakanas = [
    'ヴ',
    'ガ', 'ギ', 'グ', 'ゲ', 'ゴ',
    'ザ', 'ジ', 'ズ', 'ゼ', 'ゾ',
    'ダ', 'ヂ', 'ヅ', 'デ', 'ド',
    'バ', 'ビ', 'ブ', 'ベ', 'ボ'
]
voiceless_katakanas = [
    'ウ',
    'カ', 'キ', 'ク', 'ケ', 'コ',
    'サ', 'シ', 'ス', 'セ', 'ソ',
    'タ', 'チ', 'ツ', 'テ', 'ト',
    'ハ', 'ヒ', 'フ', 'ヘ', 'ホ'
]

# P
p_katakanas = [
    'パ', 'ピ', 'プ', 'ペ', 'ポ'
]
non_p_katakanas = [
    'ハ', 'ヒ', 'フ', 'ヘ', 'ホ'
]

# Small
small_katakanas = [
    'ァ', 'ィ', 'ゥ', 'ェ', 'ォ',
    'ッ',
    'ャ', 'ュ', 'ョ'
]
non_small_katakanas = [
    'ア', 'イ', 'ウ', 'エ', 'オ',
    'ツ',
    'ヤ', 'ユ', 'ヨ'
]

In [None]:
# Utility

def add_positions(position1, position2):
    return [position1[0] + position2[0], position1[1] + position2[1]]

def subtract_positions(position1, position2):
    return [position1[0] - position2[0], position1[1] - position2[1]]

def direction_to_axis(direction):
    if direction == 'left' or direction == 'right':
        return 'x'
    elif direction == 'up' or direction == 'down':
        return 'y'
    else:
        return None

def is_same_axis(direction1, direction2):
    return direction_to_axis(direction1) == direction_to_axis(direction2)

In [None]:
# Convert a string of katakanas to flicks

# 'ガ' -> 'カ゛' etc.
def decompose_katakanas(string):
    # Decompose voiced katakanas
    for voiced_and_voiceless in zip(voiced_katakanas, voiceless_katakanas):
        if voiced_and_voiceless[0] == 'ヴ':
            string = string.replace(voiced_and_voiceless[0], voiced_and_voiceless[1] + '゛゛')
        else:
            string = string.replace(voiced_and_voiceless[0], voiced_and_voiceless[1] + '゛')
    
    # Decompose P-katakanas
    for p_and_non_p in zip(p_katakanas, non_p_katakanas):
        string = string.replace(p_and_non_p[0], p_and_non_p[1] + '゜')
    
    # Decompose small katakanas
    for small_and_non_small in zip(small_katakanas, non_small_katakanas):
        string = string.replace(small_and_non_small[0], small_and_non_small[1] + '゛')
    
    return string

# 'ア' -> [{'origin': [0, 0], 'direction': 'none'}] etc.
def katakanas_to_flicks(decomposed_katakana_string):
    flicks = []
    
    for katakana in decomposed_katakana_string: # Loop through the katakanas of the passed string
        for key_index, katakanas_in_key in enumerate(key_map): # Loop through the keys to find the current katakana
            if katakana in katakanas_in_key: # If the current katakana is found in the current key
                flicks.append({
                    'origin': [key_index % 3, key_index // 3],
                    'direction': directions[katakanas_in_key.index(katakana)]
                })
                break
    
    return flicks

In [None]:
# Measure pleasantness

def measure_shift_pleasantness(previous_position, next_position, previous_axis):
    delta = subtract_positions(next_position, previous_position)
    if previous_axis == 'x':
        delta[0] *= 0.4
    if previous_axis == 'y':
        delta[1] *= 0.4
    return -((delta[0] * 1.2) ** 2 + delta[1] ** 2) ** (1 / 2) / 7

# previous_flick is None if the current_flick is the first flick
def measure_single_input_pleasantness(previous_flick, current_flick):
    if previous_flick == None:
        shift_pleasantness = 0
        flick_pleasantness = -0.2 if current_flick['direction'] == 'none' else 0.5
    else:
        # Measure shift_pleasantness
        previous_end_position = add_positions(
            previous_flick['origin'],
            direction_movements[directions.index(previous_flick['direction'])]
        )
        shift_pleasantness = measure_shift_pleasantness(
            previous_end_position,
            current_flick['origin'],
            direction_to_axis(previous_flick['direction'])
        )
        
        # Measure flick_pleasantness
        if current_flick['direction'] == 'none':
            flick_pleasantness = -0.2
        else:
            is_flick_same_axis = is_same_axis(previous_flick['direction'], current_flick['direction'])
            if is_flick_same_axis:
                flick_pleasantness = 0.8 \
                    if previous_flick['direction'] == current_flick['direction'] \
                    else 1.0
            else:
                flick_pleasantness = -0.4
        
    return shift_pleasantness + flick_pleasantness

def measure_flicks_pleasantness(flicks):
    total_pleasantness = 0
    for index, flick in enumerate(flicks):
        previous_flick = None if index == 0 else flicks[index - 1]
        #print(measure_single_input_pleasantness(previous_flick, flick))
        total_pleasantness += measure_single_input_pleasantness(previous_flick, flick)
    return total_pleasantness

def measure_word_pleasantness(word):
    return measure_flicks_pleasantness(katakanas_to_flicks(decompose_katakanas(word)))

In [None]:
# Measure the pleasantness of the words in the dictionary

FILENAME = 'ipadic'
#FILENAME = 'hatena'

df = pd.read_csv('dic/' + FILENAME + '.csv', encoding='utf-8', header=None)

for i, row in df.iterrows():
    if i % 10000 == 0:
        print(i)
    row.iloc[1] = measure_word_pleasantness(row.iloc[1])

sorted_df = df.sort_values(1, ascending=False)
sorted_df.to_csv('dic/' + FILENAME + '_pleasantness.csv', encoding='utf-8', header=None, index=None)