In [1]:
import numpy as np
import re
import pandas as pd
import pickle
from collections import Counter
from openpyxl import Workbook
from openpyxl import load_workbook

# Excelのdata
- A_W_171116
  - excelの行番号1～299行目までのデータを使用
- A_W_171121
  - excelの行番号300～末尾行目までのデータを使用
- S_H_171116
  - A_W_YYMMDDとは別の人たちが作成したexcelデータ
- 斎藤_2015
  - S_H_1116の破損しているreply列のデータを修復するためのファイル

In [2]:
dirpath = "../data/excel/"

filename = "A_W_171116.xlsx"
A_W_171116_excel = pd.ExcelFile(dirpath + filename)
sheet_name = A_W_171116_excel.sheet_names[0]
A_W_171116_df = A_W_171116_excel.parse(sheet_name)

filename = "A_W_171128.xlsx"
A_W_171128_excel = pd.ExcelFile(dirpath + filename)
sheet_name = A_W_171128_excel.sheet_names[0]
A_W_171128_df = A_W_171128_excel.parse(sheet_name)

filename = "171116.xlsx"
S_H_171116_excel = pd.ExcelFile(dirpath + filename)
sheet_name = S_H_171116_excel.sheet_names[0]
S_H_171116_df = S_H_171116_excel.parse(sheet_name)

filename = "2015斉藤.xlsx"
S_2015_excel = pd.ExcelFile(dirpath + filename)
sheet_name = S_2015_excel.sheet_names[0]
S_2015_df = S_2015_excel.parse(sheet_name)

## 必要なデータ
- id
    - A_W_YYMMDDのNo. => (0列目)
    - SH_171116のid => (0列目)
    - 斉藤2015の発言番号 => (0列目)
- reply
    - A_W_YYMMDDの返信元 => (1列目)
    - SH_171116のreply => (3列目)
    - 斉藤2015の返信元 => (1列目)
- group
    - A_W_YYMMDDのgid => (6列目)
    - SH_171116のgid => (5列目)
    - 斉藤2015のグループ番号 => (6列目)
- who
    - A_W_YYMMDDのニックネーム => (8列目)
    - SH_171116のcname => (6列目)
    - 斉藤2015のニックネーム => (8列目)
- body
    - A_W_YYMMDDの発言内容 => (11列目)
    - SH_171116のbody => (8列目)
    - 斉藤2015の発言内容 => (11列目)
- argumentation_a
    - A_W_YYMMDDのArgumentation(A) => (22列目)
    - SH_171116のArgumentation(H) => (17列目)
- argumentation_b
    - A_W_YYMMDDのWatanabe.2 => (23列目)
    - SH_171116のArgumentation(S) => (18列目)
- epistemic_a
    - A_W_YYMMDDのEpistemic(A) => (12列目)
    - SH_171116のEpistemic(H) => (9列目)
- epistemic_b
    - A_W_YYMMDDのWatanabe => (13列目)
    - SH_171116のEpistemic(S) => (10列目)
- social_table_a
    - A_W_YYMMDDのSocial(A) => (27列目)
    - SH_171116のSocial(H) => (21列目)
- social_table_b
    - A_W_YYMMDDのWatanabe.3 => (28列目)
    - SH_171116のSocial(S) => (22列目)
- coordination_a
    - A_W_YYMMDDのCoordination(A) => (17列目)
    - SH_171116のCoordination(H) => (13列目)
- coordination_b
    - A_W_YYMMDDのWatanabe.1 => (18列目)
    - SH_171116のCoordination(S) => (14列目)
    
    
## 各dfで必要な列
- A_W_YYMMDD[0, 1, 6, 8, 11, 22, 23, 12, 13, 27, 28, 17, 18]
- SH_171116[0, 3, 5, 6, 8, 17, 18, 9, 10, 21, 22, 13, 14]
- 斉藤2015[0, 1, 6, 8, 11]

In [3]:
A_W_YYMMDD_use = [0, 1, 6, 8, 11, 22, 23, 12, 13, 27, 28, 17, 18]
S_H_171116_use = [0, 3, 5, 6, 8, 17, 18, 9, 10, 21, 22, 13, 14]
S_2015_use = [0, 1, 6, 8, 11]

## 使用しない列の削除

In [4]:
# A_W_YYMMDDの列名の統合
A_W_171116_df.columns = A_W_171128_df.columns

delete_columns_list = []
for i, column in enumerate(A_W_171116_df):
    if not i in A_W_YYMMDD_use:
        delete_columns_list.append(column)

A_W_171116_df = A_W_171116_df.drop(delete_columns_list, axis=1)
A_W_171128_df = A_W_171128_df.drop(delete_columns_list, axis=1)

delete_columns_list = []
for i, column in enumerate(S_H_171116_df):
    if not i in S_H_171116_use:
        delete_columns_list.append(column)
S_H_171116_df = S_H_171116_df.drop(delete_columns_list, axis=1)

delete_columns_list = []
for i, column in enumerate(S_2015_df):
    if not i in S_2015_use:
        delete_columns_list.append(column)
S_2015_df = S_2015_df.drop(delete_columns_list, axis=1)

In [5]:
A_W_171116_df.head(3)

Unnamed: 0,No.,返信元,gid,ニックネーム,発言内容,Epistemic(A),Watanabe,Coordination(A),Watanabe.1,Argumentation(A),Watanabe.2,Social(A),Watanabe.3
0,1,\N,1,まこぴす,よろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
1,31,\N,1,哲,よろしくお願いします,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
2,70,\N,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,


In [6]:
A_W_171128_df.head(3)

Unnamed: 0,No.,返信元,gid,ニックネーム,発言内容,Epistemic(A),Watanabe,Coordination(A),Watanabe.1,Argumentation(A),Watanabe.2,Social(A),Watanabe.3
0,1.0,\N,1.0,まこぴす,よろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
1,31.0,\N,1.0,哲,よろしくお願いします,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
2,70.0,\N,1.0,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,


In [7]:
S_H_171116_df.head(3)

Unnamed: 0,id,reply,gid,cname,body,Epistemic(H),Epistemic(S),Coordination(H),Coordination(S),Argumentation(H),Argumentation(S),Social(H),Social(S)
0,1593,2,2,世界のわたべ,よろしくお願いします。,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
1,1598,2,3,ざきさん,よろしくです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
2,1606,2,56,あ,よろしくです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,


In [8]:
S_2015_df.head(3)

Unnamed: 0,発言番号,返信元,グループ番号,ニックネーム,発言内容
0,1593,\N,1,世界のわたべ,よろしくお願いします。
1,1598,\N,1,ざきさん,よろしくです
2,1606,\N,1,あ,よろしくです


## データの一致の確認

In [9]:
print(not False in S_H_171116_df['id'].values == S_2015_df['発言番号'].values)
print(not False in S_H_171116_df['cname'].values == S_2015_df['ニックネーム'].values)
print(not False in S_H_171116_df['body'].values == S_2015_df['発言内容'].values)
print(A_W_171116_df.shape)
print(A_W_171128_df.shape)
print(S_H_171116_df.shape)
print(S_2015_df.shape)

True
True
True
(4943, 13)
(4945, 13)
(5017, 13)
(5017, 5)


## 欠損データの置換とA_W_YYMMDDの結合

In [10]:
S_H_171116_df["reply"] = S_2015_df['返信元']
S_H_171116_df["gid"] = S_2015_df['グループ番号']
concat_border = 299
A_W_171116_df = A_W_171116_df[0:concat_border]
A_W_171128_df = A_W_171128_df[concat_border:]
A_W_df = pd.concat([A_W_171116_df, A_W_171128_df])
print(A_W_df.shape)
print(S_H_171116_df.shape)

(4945, 13)
(5017, 13)


## 列名の統一

In [11]:
col = [
    'say_id', 'reply_id', 'group_id', 'name', 'body', 'epistemic_a',
    'epistemic_b', 'coordination_a', 'coordination_b', 'argument_a',
    'argument_b', 'social_a', 'social_b'
]

A_W_df = pd.DataFrame(A_W_df.values, columns=col)
S_H_171116_df = pd.DataFrame(S_H_171116_df.values, columns=col)

In [12]:
A_W_df

Unnamed: 0,say_id,reply_id,group_id,name,body,epistemic_a,epistemic_b,coordination_a,coordination_b,argument_a,argument_b,social_a,social_b
0,1,\N,1,まこぴす,よろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
1,31,\N,1,哲,よろしくお願いします,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
2,70,\N,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,Off task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
3,119,\N,1,まこぴす,早速課題やっちゃいましょう！,Off task,Off Task,Proceedings,Proceedings,Non-argumentative moves,Non-argumentative moves,,
4,163,\N,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,On task,On Task,,,Simple claim,Simple Claim,Externalization,
5,194,\N,1,まこぴす,使いにくいです(笑),On task,On Task,,,Simple claim,Simple Claim,Externalization,
6,302,\N,1,哲,同意です,On task,On Task,,,Simple claim,Simple Claim,Quick consensus building,Quick consensus building
7,309,\N,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,On task,On Task,,,Grounded claim,Grounded Claim,Externalization,Externalization
8,385,\N,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,On task,On Task,,,Grounded and Qualified claim,Grounded and Qualified claim,Externalization,Externalization
9,426,\N,1,まこぴす,自分の意見です！(笑),Off task,On Task,,,Non-argumentative moves,Simple Claim,,Externalization


In [13]:
S_H_171116_df

Unnamed: 0,say_id,reply_id,group_id,name,body,epistemic_a,epistemic_b,coordination_a,coordination_b,argument_a,argument_b,social_a,social_b
0,1593,\N,1,世界のわたべ,よろしくお願いします。,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
1,1598,\N,1,ざきさん,よろしくです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
2,1606,\N,1,あ,よろしくです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
3,1659,\N,1,ざきさん,みなさんファイルをアップしましたか？,Off Task,Off Task,Technical coordination,Technical coordination,Non-argumentative moves,Non-argumentative moves,,
4,1683,\N,1,あ,今アップしました！,Off Task,Off Task,,Technical coordination,Non-argumentative moves,Non-argumentative moves,,
5,1692,\N,1,世界のわたべ,今アップしました！,Off Task,No Sense,,Technical coordination,Non-argumentative moves,,,
6,1704,\N,1,ざきさん,名前バレｗｗ,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
7,1794,\N,1,ざきさん,質問特にないです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
8,1821,\N,1,あ,同じく特に無いです,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,
9,1914,\N,1,世界のわたべ,いますよー,Off Task,Off Task,,,Non-argumentative moves,Non-argumentative moves,,


In [14]:
#　順番の整理
col_1 = [
    'say_id', 'reply_id', 'group_id', 'name', 'body', 'argument_a',
    'argument_b', 'epistemic_a', 'epistemic_b', 'social_a', 'social_b',
    'coordination_a', 'coordination_b'
]
A_W_df = A_W_df[col_1]
S_H_171116_df = S_H_171116_df[col_1]

In [15]:
A_W_df

Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1,\N,1,まこぴす,よろしくお願いします！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
1,31,\N,1,哲,よろしくお願いします,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
2,70,\N,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
3,119,\N,1,まこぴす,早速課題やっちゃいましょう！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,Proceedings,Proceedings
4,163,\N,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,Simple claim,Simple Claim,On task,On Task,Externalization,,,
5,194,\N,1,まこぴす,使いにくいです(笑),Simple claim,Simple Claim,On task,On Task,Externalization,,,
6,302,\N,1,哲,同意です,Simple claim,Simple Claim,On task,On Task,Quick consensus building,Quick consensus building,,
7,309,\N,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,Grounded claim,Grounded Claim,On task,On Task,Externalization,Externalization,,
8,385,\N,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,Grounded and Qualified claim,Grounded and Qualified claim,On task,On Task,Externalization,Externalization,,
9,426,\N,1,まこぴす,自分の意見です！(笑),Non-argumentative moves,Simple Claim,Off task,On Task,,Externalization,,


In [16]:
S_H_171116_df

Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1593,\N,1,世界のわたべ,よろしくお願いします。,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
1,1598,\N,1,ざきさん,よろしくです,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
2,1606,\N,1,あ,よろしくです,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
3,1659,\N,1,ざきさん,みなさんファイルをアップしましたか？,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,Technical coordination,Technical coordination
4,1683,\N,1,あ,今アップしました！,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,Technical coordination
5,1692,\N,1,世界のわたべ,今アップしました！,Non-argumentative moves,,Off Task,No Sense,,,,Technical coordination
6,1704,\N,1,ざきさん,名前バレｗｗ,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
7,1794,\N,1,ざきさん,質問特にないです,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
8,1821,\N,1,あ,同じく特に無いです,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,
9,1914,\N,1,世界のわたべ,いますよー,Non-argumentative moves,Non-argumentative moves,Off Task,Off Task,,,,


## A_WとS_Hの結合

In [17]:
All_df = pd.concat([A_W_df, S_H_171116_df], ignore_index=True)
All_df

Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1,\N,1,まこぴす,よろしくお願いします！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
1,31,\N,1,哲,よろしくお願いします,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
2,70,\N,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,,
3,119,\N,1,まこぴす,早速課題やっちゃいましょう！,Non-argumentative moves,Non-argumentative moves,Off task,Off Task,,,Proceedings,Proceedings
4,163,\N,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,Simple claim,Simple Claim,On task,On Task,Externalization,,,
5,194,\N,1,まこぴす,使いにくいです(笑),Simple claim,Simple Claim,On task,On Task,Externalization,,,
6,302,\N,1,哲,同意です,Simple claim,Simple Claim,On task,On Task,Quick consensus building,Quick consensus building,,
7,309,\N,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,Grounded claim,Grounded Claim,On task,On Task,Externalization,Externalization,,
8,385,\N,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,Grounded and Qualified claim,Grounded and Qualified claim,On task,On Task,Externalization,Externalization,,
9,426,\N,1,まこぴす,自分の意見です！(笑),Non-argumentative moves,Simple Claim,Off task,On Task,,Externalization,,


# ラベル列のNaNを取り除いて、小文字に置換する

In [18]:
col_p = [
    'argument_a', 'argument_b', 'epistemic_a', 'epistemic_b', 'social_a',
    'social_b', 'coordination_a', 'coordination_b'
]

All_df['social_b'] = All_df['social_b'].replace(2723, 'blank')

All_df = All_df.fillna('blank')

for c in col_p:
    All_df[c] = All_df[c].str.lower()
All_df

Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1,\N,1,まこぴす,よろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
1,31,\N,1,哲,よろしくお願いします,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
2,70,\N,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
3,119,\N,1,まこぴす,早速課題やっちゃいましょう！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,proceedings,proceedings
4,163,\N,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,simple claim,simple claim,on task,on task,externalization,blank,blank,blank
5,194,\N,1,まこぴす,使いにくいです(笑),simple claim,simple claim,on task,on task,externalization,blank,blank,blank
6,302,\N,1,哲,同意です,simple claim,simple claim,on task,on task,quick consensus building,quick consensus building,blank,blank
7,309,\N,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,grounded claim,grounded claim,on task,on task,externalization,externalization,blank,blank
8,385,\N,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,grounded and qualified claim,grounded and qualified claim,on task,on task,externalization,externalization,blank,blank
9,426,\N,1,まこぴす,自分の意見です！(笑),non-argumentative moves,simple claim,off task,on task,blank,externalization,blank,blank


# スペルミスに対する処理
socialのスペルミス　＝＞　integra[[[r]]]ion-oriented consensus building

In [19]:
print(Counter(All_df['social_a']))
print(Counter(All_df['social_b']), "\n")
dict_mis = {
    'integrarion-oriented consensus building':
    'integration-oriented consensus building'
}
All_df = All_df.replace({'social_a': dict_mis})
All_df = All_df.replace({'social_b': dict_mis})
print(Counter(All_df['social_a']))
print(Counter(All_df['social_b']))

Counter({'blank': 5713, 'externalization': 1917, 'quick consensus building': 1251, 'elicitation': 514, 'integrarion-oriented consensus building': 221, 'integration-oriented consensus building': 142, 'conflict-oriented consensus building': 122, 'summary': 82})
Counter({'blank': 5572, 'externalization': 2061, 'quick consensus building': 1250, 'elicitation': 474, 'integrarion-oriented consensus building': 384, 'conflict-oriented consensus building': 170, 'summary': 51}) 

Counter({'blank': 5713, 'externalization': 1917, 'quick consensus building': 1251, 'elicitation': 514, 'integration-oriented consensus building': 363, 'conflict-oriented consensus building': 122, 'summary': 82})
Counter({'blank': 5572, 'externalization': 2061, 'quick consensus building': 1250, 'elicitation': 474, 'integration-oriented consensus building': 384, 'conflict-oriented consensus building': 170, 'summary': 51})


# 空白行を削除する

In [20]:
print(np.where(All_df.values[:, 0] == 'blank')[0])
print(All_df[All_df.values[:, 0] == 'blank'].values)

All_df = All_df[All_df['say_id'] != 'blank']

print(np.where(All_df.values[:, 0] == 'blank')[0])

[3498 3499 4944]
[['blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank'
  'blank' 'blank' 'blank' 'blank']
 ['blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank'
  'blank' 'blank' 'blank' 'blank']
 ['blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank' 'blank'
  'blank' 'blank' 'blank' 'blank']]
[]


# reply_idの\Nを-1に置換する

In [21]:
All_df = All_df.replace({'reply_id': {"\\N": -1}})
All_df

Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1,-1,1,まこぴす,よろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
1,31,-1,1,哲,よろしくお願いします,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
2,70,-1,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
3,119,-1,1,まこぴす,早速課題やっちゃいましょう！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,proceedings,proceedings
4,163,-1,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,simple claim,simple claim,on task,on task,externalization,blank,blank,blank
5,194,-1,1,まこぴす,使いにくいです(笑),simple claim,simple claim,on task,on task,externalization,blank,blank,blank
6,302,-1,1,哲,同意です,simple claim,simple claim,on task,on task,quick consensus building,quick consensus building,blank,blank
7,309,-1,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,grounded claim,grounded claim,on task,on task,externalization,externalization,blank,blank
8,385,-1,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,grounded and qualified claim,grounded and qualified claim,on task,on task,externalization,externalization,blank,blank
9,426,-1,1,まこぴす,自分の意見です！(笑),non-argumentative moves,simple claim,off task,on task,blank,externalization,blank,blank


# say_id、reply_idとgroup_idの型をstr型に統一する

In [22]:
print(type(All_df['say_id'][0]))
print(type(All_df['reply_id'][0]))
print(type(All_df['group_id'][0]), "\n")
All_df['say_id'] = All_df['say_id'].apply(int).apply(str)
All_df['reply_id'] = All_df['reply_id'].apply(int).apply(str)
All_df['group_id'] = All_df['group_id'].apply(int).apply(str)
All_df['name'] = All_df['name'].apply(str)
All_df['body'] = All_df['body'].apply(str)
print(type(All_df['say_id'][0]))
print(type(All_df['reply_id'][0]))
print(type(All_df['group_id'][0]))
All_df

<class 'float'>
<class 'numpy.int64'>
<class 'float'> 

<class 'str'>
<class 'str'>
<class 'str'>


Unnamed: 0,say_id,reply_id,group_id,name,body,argument_a,argument_b,epistemic_a,epistemic_b,social_a,social_b,coordination_a,coordination_b
0,1,-1,1,まこぴす,よろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
1,31,-1,1,哲,よろしくお願いします,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
2,70,-1,1,仙波,名前なのが恥ずかしいです…\n\nよろしくお願いします！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,blank,blank
3,119,-1,1,まこぴす,早速課題やっちゃいましょう！,non-argumentative moves,non-argumentative moves,off task,off task,blank,blank,proceedings,proceedings
4,163,-1,1,仙波,やっちゃいましょう\n\nmoodleはゴミです！,simple claim,simple claim,on task,on task,externalization,blank,blank,blank
5,194,-1,1,まこぴす,使いにくいです(笑),simple claim,simple claim,on task,on task,externalization,blank,blank,blank
6,302,-1,1,哲,同意です,simple claim,simple claim,on task,on task,quick consensus building,quick consensus building,blank,blank
7,309,-1,1,仙波,以前インタラクティブアート受講していたのですが、その時に課題が不具合で出せなくなっていた時期...,grounded claim,grounded claim,on task,on task,externalization,externalization,blank,blank
8,385,-1,1,まこぴす,学習環境として必要最低限の機能は備えていると思うが、操作性の面ではPCの最低限の知識があるこ...,grounded and qualified claim,grounded and qualified claim,on task,on task,externalization,externalization,blank,blank
9,426,-1,1,まこぴす,自分の意見です！(笑),non-argumentative moves,simple claim,off task,on task,blank,externalization,blank,blank


In [23]:
# 特別対応（マージするために）
print(All_df.values[8266])
All_df = All_df.replace('\x08SU', '_x0008_SU')
print(All_df.values[8266])

['4916' '4915' '1' '\x08SU' '> 英語わかんね＾ｑ＾\nおなじく＾ｑ＾'
 'non-argumentative moves' 'non-argumentative moves' 'off task' 'off task'
 'blank' 'blank' 'blank' 'blank']
['4916' '4915' '1' '_x0008_SU' '> 英語わかんね＾ｑ＾\nおなじく＾ｑ＾'
 'non-argumentative moves' 'non-argumentative moves' 'off task' 'off task'
 'blank' 'blank' 'blank' 'blank']


In [24]:
[print(c, ":", Counter(All_df[c]), '\n') for c in col_p]

argument_a : Counter({'non-argumentative moves': 5804, 'simple claim': 3204, 'grounded claim': 509, 'blank': 299, 'qualified claim': 120, 'grounded and qualified claim': 23}) 

argument_b : Counter({'non-argumentative moves': 5567, 'simple claim': 3501, 'blank': 434, 'grounded claim': 382, 'qualified claim': 61, 'grounded and qualified claim': 14}) 

epistemic_a : Counter({'off task': 5244, 'on task': 4415, 'no sense': 299, 'blank': 1}) 

epistemic_b : Counter({'off task': 5223, 'on task': 4409, 'no sense': 323, 'blank': 4}) 

social_a : Counter({'blank': 5710, 'externalization': 1917, 'quick consensus building': 1251, 'elicitation': 514, 'integration-oriented consensus building': 363, 'conflict-oriented consensus building': 122, 'summary': 82}) 

social_b : Counter({'blank': 5569, 'externalization': 2061, 'quick consensus building': 1250, 'elicitation': 474, 'integration-oriented consensus building': 384, 'conflict-oriented consensus building': 170, 'summary': 51}) 

coordination_a : 

[None, None, None, None, None, None, None, None]

# indexを連番に振り直す

In [25]:
All_df_r = All_df.reset_index(drop=True)

# フィアルに保存する

In [26]:
excel_writer = pd.ExcelWriter(
    "../data/5dimensions.xlsx",
    engine='xlsxwriter',
    options={'strings_to_urls': False})
All_df_r.to_excel(excel_writer, "sheet1")
excel_writer.save()
All_df_r.to_pickle("../data/5dimensions.pickle")