## ウ音便 

* 子音が落ちる
    * katta じゃなくて今度は kauta がベストになれば面白い。で、欲を言えば koota も出てほしい
* 調音結合
* 子音が落ちるか母音が落ちるか。
* ウ音便だけ再現できない。
    * さらにauがoとなるような変化が必要。
    * 産出時の調音結合。
    * 2dを見るとaとuの中間のとき、最も近いのはoになる。
    * これは考察行きでいい。完璧は無理
    
母音を削るか、子音を削るか、話者の知識なのか、ノイズなのか。

In [1]:
import sys
sys.path.append('..')

In [2]:
%load_ext autoreload
%autoreload 2

from hydra.experimental import initialize, compose
from src.agent import Agent
# https://hydra.cc/docs/next/experimental/compose_api

with initialize(config_path="../hyparam"):
    config = compose(config_name="config.yml")
agent_sample = Agent(config)
agent_sample.poisson_params

array([9, 9, 9, 3, 3, 3, 9], dtype=object)

In [3]:
from src.agent import Agent
from hydra.experimental import initialize, compose
from itertools import product
import numpy as np
from collections import Counter
from joblib import Parallel, delayed


trial = list(range(20))
w_durations = [0, 1, 2, 3]
intrinsic = ["production", "update"]
n_iter = list(range(20))

def perception(prod, perc, symbol):
    phoneme, obs, states = prod.production("kawuta")
    obs = np.array(obs).astype('double')
    phoneme_hat, obs, states_hat = perc.perception(obs)
    if phoneme_hat == "kota":  # TODO: 二重母音のkotaが識別できないので修正. perceptionメソッドの修正で対応すべき
        phoneme_hat = "koota"
    return phoneme_hat

results_list = []
for t in trial:
    if t % 5 == 0:
        print(f"trial: {t}")
    for w_duration, switch in product(w_durations, intrinsic):
        with initialize(config_path="../hyparam"):
            config_prod = compose(config_name="config.yml", overrides=[f"w_duration={w_duration}"])
        a = Agent(config_prod)
        if switch == "production":
            with initialize(config_path="../hyparam"):
                config_perc = compose(config_name="config.yml")
            b = Agent(config_perc)
        elif switch == "update":
            b = Agent(config)
        else:
            raise ValueError

        perceptions = Parallel(n_jobs=-1)(delayed(perception)(prod=a, perc=b, symbol="kawuta") for n in n_iter)

        results_list.append({
            "trial": t,
            "w_duration": w_duration,  # TODO: 同上
            "intrinsic": switch,
            "n_kawuta": perceptions.count("kawuta"),
            "n_kawta": perceptions.count("kawta"),
            "n_kaQta": perceptions.count("kaQta"),
            "n_kauta": perceptions.count("kauta"),
            "n_koota": perceptions.count("koota"),
            "counter": Counter(perceptions),
        })
        print(Counter(perceptions))

trial: 0
Counter({'kauta': 19, 'tauta': 1})
Counter({'kauta': 20})
Counter({'kawuta': 11, 'kauta': 9})
Counter({'kauta': 15, 'kawuta': 5})
Counter({'kawuta': 17, 'kauta': 3})
Counter({'kawuta': 18, 'kauta': 1, 'tawuta': 1})
Counter({'kawuta': 18, 'kawutua': 1, 'kauta': 1})
Counter({'kawuta': 19, 'kawutau': 1})
Counter({'kauta': 17, 'tauta': 2, 'kautau': 1})
Counter({'kauta': 17, 'kautau': 1, 'kawuta': 1, 'kauka': 1})
Counter({'kauta': 12, 'kawuta': 8})
Counter({'kauta': 11, 'kawuta': 7, 'kawuka': 1, 'tawuata': 1})
Counter({'kawuta': 19, 'kauta': 1})
Counter({'kawuta': 17, 'kauta': 2, 'kawutao': 1})
Counter({'kawuta': 20})
Counter({'kawuta': 18, 'kauta': 1, 'kawuka': 1})
Counter({'kauta': 20})
Counter({'kauta': 17, 'kauka': 1, 'kawuta': 1, 'kautao': 1})
Counter({'kawuta': 10, 'kauta': 9, 'kauka': 1})
Counter({'kauta': 15, 'kawuta': 4, 'kawuka': 1})
Counter({'kawuta': 18, 'kauta': 2})
Counter({'kawuta': 18, 'kauta': 2})
Counter({'kawuta': 19, 'tawuta': 1})
Counter({'kawuta': 18, 'kauta':

In [4]:
import pandas as pd
results = pd.DataFrame(results_list)

In [5]:
results

Unnamed: 0,trial,w_duration,intrinsic,n_kawuta,n_kawta,n_kaQta,n_kauta,n_koota,counter
0,0,0,production,0,0,0,19,0,"{'kauta': 19, 'tauta': 1}"
1,0,0,update,0,0,0,20,0,{'kauta': 20}
2,0,1,production,11,0,0,9,0,"{'kauta': 9, 'kawuta': 11}"
3,0,1,update,5,0,0,15,0,"{'kawuta': 5, 'kauta': 15}"
4,0,2,production,17,0,0,3,0,"{'kawuta': 17, 'kauta': 3}"
...,...,...,...,...,...,...,...,...,...
155,19,1,update,9,0,0,10,0,"{'kauta': 10, 'kawuta': 9, 'kautao': 1}"
156,19,2,production,15,0,0,5,0,"{'kauta': 5, 'kawuta': 15}"
157,19,2,update,18,0,0,2,0,"{'kawuta': 18, 'kauta': 2}"
158,19,3,production,20,0,0,0,0,{'kawuta': 20}


In [6]:
# 分析は results.md の
intrinsic = results[["trial", "w_duration", "intrinsic", "n_kawuta", "n_kawta","n_kaQta", "n_kauta", "n_koota"]]
intrinsic.to_csv('../data/w_intrinsic.csv', index=False)
intrinsic.head()

Unnamed: 0,trial,w_duration,intrinsic,n_kawuta,n_kawta,n_kaQta,n_kauta,n_koota
0,0,0,production,0,0,0,19,0
1,0,0,update,0,0,0,20,0
2,0,1,production,11,0,0,9,0
3,0,1,update,5,0,0,15,0
4,0,2,production,17,0,0,3,0


In [7]:
def flatten_dict(count_i):
    flat = []
    count_dict_i = dict(count_i)
    for k, v in count_dict_i.items():
        flat += [k]*v
    return flat

In [8]:
recognized = []
for cound_dict in results.counter.to_numpy():
    recognized += flatten_dict(cound_dict)

len(recognized) # 100*900 だから

3200

In [9]:
# https://stackoverflow.com/questions/31111032/transform-a-counter-object-into-a-pandas-dataframe
d = dict(Counter(recognized))
df = pd.DataFrame.from_dict(d, orient='index').reset_index()
candidate = df.rename(columns={'index': 'candidate', 0:'count'})
candidate.to_csv('../data/w_candidate.csv', index=False)