# メンバーのデータの移行
### postgresqlへの接続

In [None]:
import psycopg2
conn = psycopg2.connect('user=honomara dbname=honomara password=honomara')

def get_data(sql):
    with conn.cursor() as cur:
        cur.execute(sql)
        data = cur.fetchall()
    return data

def get_colname(table):
    with conn.cursor() as cur:
        cur.execute('SELECT * FROM {} LIMIT 1;'.format(table))
        data = [col.name for col in cur.description]
    return data

def exec_transaction(sqls):
    with conn.cursor() as cur:
        for sql in sqls:
            cur.execute(sql)
    conn.commit()

### mysqlへの接続

In [None]:
import mysql.connector
conn2 = mysql.connector.connect(
    host = 'localhost',
    port = 3306,
    user = 'honomara',
    password =  'honomara',
    database = 'honomara',
)

cur2 = conn2.cursor(dictionary=True)

### member テーブルの作成

In [None]:
create_member_table = '''
CREATE TABLE IF NOT EXISTS member (
    `id`          INT          PRIMARY KEY AUTO_INCREMENT,
    `family_name` VARCHAR(30)  NOT NULL,
    `family_kana` VARCHAR(30),
    `first_name`  VARCHAR(30)  NOT NULL,
    `first_kana`  VARCHAR(30),
    `show_name`   VARCHAR(30)  NOT NULL,
    `kana`        VARCHAR(60)  NOT NULL, -- for index
    `year`        INT          NOT NULL,
    `sex`         INT          NOT NULL DEFAULT 0, -- 0=man, 1=woman
    `visible`     BOOL         NOT NULL DEFAULT true
);
'''

cur2.execute(create_member_table)
conn2.commit()

### postgresqlからmysqlへのデータの移行

In [None]:
import re
import MeCab

def name_len(name):
    l=len(name)
    l-=name.count(')')
    l-=name.count('(')
    l-=name.count('）')
    l-=name.count('（')
    return l
colname = get_colname('person')
mecab = MeCab.Tagger("-Ochasen")
regkana=re.compile(r'[ァ-ヶー]+') 

for fields in get_data('SELECT * FROM person ORDER BY class;'):
    item = dict(zip(colname,fields))
    if not item['name']:
        name = item['fullname']
    elif not item['fullname']:
        name = item['name']
    elif name_len(item['fullname']) > name_len(item['name']):
        name = item['fullname']
    else:
        name = item['name']
    fam_n = re.match("^[^（()）]+" , item['after_name']).group()
    
    fir_n = re.match("[^（()）　]*$" , name.replace(fam_n,''))
    if not fir_n or len(fir_n.group()) == 0:
        fir_n = '不明'
        parse_name = fam_n
    else:
        fir_n = fir_n.group()
        fir_n = fir_n.replace(' ','')
        fir_n = fir_n.replace('　','')
        parse_name = fam_n + fir_n
    if re.match(".*[（()）].*" , name):
        after_name = name
    else:
        after_name = item['after_name']
    parse_result = mecab.parse(parse_name)#ふりがなを取得
    kana_list = regkana.findall(parse_result.replace('\n',' '))  #その行のカタカナを取り出す(list)
    kana = ''.join(kana_list)
    year = item['class'] + 2000 - 9
    sex = '男' if item['sex'] == 0 else '女'  
    cur2.execute('''
    INSERT INTO member (id,family_name,first_name,show_name,kana,year,sex,visible)
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s);''',(item['person_id'],fam_n,fir_n,after_name,kana,year,item['sex'],item['visible']))
conn2.commit()

### カナ情報の修正

In [None]:
import csv


# update the csv file !!!!!!!!
csv_path = "/vagrant/notebooks/member.csv"
# in most case this path would be /vagrant/notebooks/member.csv or like that

with open(csv_path,"r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        cur2.execute('''UPDATE member SET kana = '{}' WHERE id = {};'''.format(row['kana'], int(row['person_id'])))
conn2.commit()

#### かな情報の分割
レガシーな問題で、カナを姓名つなげて生成していたので、その修正

In [None]:
import re
import MeCab

mecab = MeCab.Tagger("-Ochasen")
regkana=re.compile(r'[ァ-ヶー]+') 

csv_path = "/vagrant/notebooks/member.csv"
# in most case this path would be /vagrant/notebooks/member.csv or like that

with open(csv_path,"r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        dd = cur2.execute('''SELECT member.kana, member.family_name, member.first_name FROM member WHERE id = {} ;'''.format(int(row['person_id'])))
        d = cur2.fetchall()
        
        parse_result = mecab.parse("{} {}".format(d[0]['family_name'],d[0]['first_name']))
        kana_list = regkana.findall(parse_result.replace('\n',' '))
#         print(kana_list,d)
        if len(kana_list) == 0:
            print("[PARSE ERROR]", row['kana'],d,row)
            continue
        else:
            if not re.match(kana_list[0], d[0]['kana']):
                if len(kana_list) > 1 and re.search(kana_list[1] + '$',d[0]['kana']):
                    first_kana = kana_list[1]
                    family_kana = d[0]['kana'][:-len(first_kana)]
                else:
                    if row['family_kana']:
                        family_kana = row['family_kana']
                        first_kana = row['first_kana']
                    else:
                        continue
            else:
                family_kana = kana_list[0]
                first_kana = d[0]['kana'][len(family_kana):]
                cur2.execute('''UPDATE member SET family_kana = '{}', first_kana = '{}' WHERE id = {};'''.format(family_kana, first_kana, int(row['person_id'])))
conn2.commit()

In [None]:
cur2.execute('''
ALTER TABLE member DROP COLUMN kana;
''')

conn2.commit()

## miscellaneous info

In [None]:
cur2.execute('SELECT * FROM member ORDER BY year DESC LIMIT 10;')
cur2.fetchall()

In [None]:
cur2.execute('SHOW COLUMNS FROM member;')
data = cur2.fetchall()
for d in data:
    print(d['Field'],d['Type'])