### Load libraries

In [1]:
import numpy as np
import pandas as pd

### Parse html into csv for easier manual work

In [2]:
SUFFIX = (
    " rated on a five point scale (1-5; -1 if missed). "
    '1 was labeled as "strongly disagree", '
    '2 was labeled as "disagree", '
    '3 was labeled as "neither agree nor disagree", '
    '4 was labeled as "agree" '
    'and 5 was labeled as "strongly agree".'
)

In [3]:
df = pd.read_html("codebook.html")[0]
df = df[df.Field.str.startswith("Q")]

assert df.Description.str.contains(SUFFIX, regex=False).all()
df["question_raw"] = df.Description.str.split(SUFFIX, regex=False, expand=True)[0]
assert (df.question_raw.str.get(0) == '"').all()
assert (df.question_raw.str.get(-2) == ".").all()
assert (df.question_raw.str.get(-1) == '"').all()
df.question_raw = df.question_raw.str.slice(1, -2)

df = df.drop(columns=["Description", "Format"])
df = df.rename(columns={"Field": "Q_id"})  # type: ignore

assert len(df) == 36
df.tail()

Unnamed: 0,Q_id,question_raw
31,Q32,I get frustrated if romantic partners are not ...
32,Q33,It helps to turn to my romantic partner in tim...
33,Q34,"When romantic partners disapprove of me, I fee..."
34,Q35,"I turn to my partner for many things, includin..."
35,Q36,I resent it when my partner spends time away f...


In [4]:
# First person questions
df["question_1p"] = "Do " + df.question_raw + "?"
print(df.question_1p.iloc[0])
print(df.question_1p.iloc[-1])
df.tail()

Do I prefer not to show a partner how I feel deep down?
Do I resent it when my partner spends time away from me?


Unnamed: 0,Q_id,question_raw,question_1p
31,Q32,I get frustrated if romantic partners are not ...,Do I get frustrated if romantic partners are n...
32,Q33,It helps to turn to my romantic partner in tim...,Do It helps to turn to my romantic partner in ...
33,Q34,"When romantic partners disapprove of me, I fee...","Do When romantic partners disapprove of me, I ..."
34,Q35,"I turn to my partner for many things, includin...","Do I turn to my partner for many things, inclu..."
35,Q36,I resent it when my partner spends time away f...,Do I resent it when my partner spends time awa...


In [5]:
# Second person questions
df["question_2p"] = (
    df.question_1p.str.replace(pat=" I | me ", repl=" you ", regex=True)
    .str.replace(pat=" me,", repl=" you.", regex=False)
    .str.replace(pat=" me.", repl=" you.", regex=False)
    .str.replace(pat=" me?", repl=" you?", regex=False)
    .str.replace(pat=" my ", repl=" your ", regex=False)
    .str.replace(pat=" my,", repl=" your,", regex=False)
    .str.replace(pat=" my.", repl=" your.", regex=False)
    .str.replace(pat=" my?", repl=" your?", regex=False)
)
print(df.question_2p.iloc[0])
print(df.question_2p.iloc[-1])
df.tail()

Do you prefer not to show a partner how you feel deep down?
Do you resent it when your partner spends time away from you?


Unnamed: 0,Q_id,question_raw,question_1p,question_2p
31,Q32,I get frustrated if romantic partners are not ...,Do I get frustrated if romantic partners are n...,Do you get frustrated if romantic partners are...
32,Q33,It helps to turn to my romantic partner in tim...,Do It helps to turn to my romantic partner in ...,Do It helps to turn to your romantic partner i...
33,Q34,"When romantic partners disapprove of me, I fee...","Do When romantic partners disapprove of me, I ...",Do When romantic partners disapprove of you. y...
34,Q35,"I turn to my partner for many things, includin...","Do I turn to my partner for many things, inclu...","Do you turn to your partner for many things, i..."
35,Q36,I resent it when my partner spends time away f...,Do I resent it when my partner spends time awa...,Do you resent it when your partner spends time...


In [6]:
df_0p = df[["Q_id", "question_raw"]].rename(columns={"question_raw": "prompt"})
df_0p.prompt = df_0p.prompt + ":"
df_0p["pov"] = 1
df_0p["editted"] = 0

df_1p = df[["Q_id", "question_1p"]].rename(columns={"question_1p": "prompt"})
df_1p["pov"] = 1
df_1p["editted"] = 1

df_2p = df[["Q_id", "question_2p"]].rename(columns={"question_2p": "prompt"})
df_2p["pov"] = 2
df_2p["editted"] = 1

df_intermediate = pd.concat((df_0p, df_1p, df_2p)).reset_index(drop=True)
df_intermediate = df_intermediate[["Q_id", "pov", "editted", "prompt"]]
df_intermediate

Unnamed: 0,Q_id,pov,editted,prompt
0,Q1,1,0,I prefer not to show a partner how I feel deep...
1,Q2,1,0,I worry about being abandoned:
2,Q3,1,0,I am very comfortable being close to romantic ...
3,Q4,1,0,I worry a lot about my relationships:
4,Q5,1,0,Just when my partner starts to get close to me...
...,...,...,...,...
103,Q32,2,1,Do you get frustrated if romantic partners are...
104,Q33,2,1,Do It helps to turn to your romantic partner i...
105,Q34,2,1,Do When romantic partners disapprove of you. y...
106,Q35,2,1,"Do you turn to your partner for many things, i..."


In [7]:
df_intermediate.to_csv("intermediate.csv")

### Load manually adjusted data

In [8]:
df2 = pd.read_csv("intermediate_mod.csv").drop(columns="Unnamed: 0")

df2.answer_index = np.concatenate((df2.answer_index[:36], df2.answer_index[:36], df2.answer_index[:36]))
df2.answer_index = df2.answer_index.astype(int)

df2.head()

Unnamed: 0,Q_id,pov,editted,answer_index,prompt
0,Q1,1,0,0,I prefer not to show a partner how I feel deep...
1,Q2,1,0,0,I worry about being abandoned:
2,Q3,1,0,1,I am very comfortable being close to romantic ...
3,Q4,1,0,0,I worry a lot about my relationships:
4,Q5,1,0,0,Just when my partner starts to get close to me...


In [9]:
df_final = df2.copy()
df_final["classes"] = "[' No', ' Yes']"
df_final["source_dataset"] = "ECR"
df_final["binarized"] = True
df_final["no_prompt"] = True
df_final["front"] = False
df_final["body"] = False

df_final

Unnamed: 0,Q_id,pov,editted,answer_index,prompt,classes,source_dataset,binarized,no_prompt,front,body
0,Q1,1,0,0,I prefer not to show a partner how I feel deep...,"[' No', ' Yes']",ECR,True,True,False,False
1,Q2,1,0,0,I worry about being abandoned:,"[' No', ' Yes']",ECR,True,True,False,False
2,Q3,1,0,1,I am very comfortable being close to romantic ...,"[' No', ' Yes']",ECR,True,True,False,False
3,Q4,1,0,0,I worry a lot about my relationships:,"[' No', ' Yes']",ECR,True,True,False,False
4,Q5,1,0,0,Just when my partner starts to get close to me...,"[' No', ' Yes']",ECR,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...
103,Q32,2,1,0,Do you get frustrated if romantic partners are...,"[' No', ' Yes']",ECR,True,True,False,False
104,Q33,2,1,1,Does it helps to turn to your romantic partner...,"[' No', ' Yes']",ECR,True,True,False,False
105,Q34,2,1,0,"When romantic partners disapprove of you, do y...","[' No', ' Yes']",ECR,True,True,False,False
106,Q35,2,1,1,"Do you turn to your partner for many things, i...","[' No', ' Yes']",ECR,True,True,False,False


In [10]:
df_final.to_csv("final.csv")