In [21]:
# imports
import pandas as pd

In [22]:
# make df, loading 2deep_transript_cleaned.csv from /data directory
df = pd.read_csv('data/2deep_with_all_columns.csv')

In [23]:
# check the unique values in the ExperiencedEmotion1 column
df['EmotionRegulation1'].unique()

array(['REST', 'DEPRECIATION', 'AVOIDANCE', 'STABILIZE_SELF',
       'ATTACK_OTHER', 'WITHDRAWAL', 'ATTACK_SELF'], dtype=object)

In [24]:
columns_to_merge = ['Eyes', 'Gaze', 'Head', 'HeadTilt', 'Smile', 'Speech', 'UpperBody', 'SmileControl',
                   'EkmanExpression', 'ExperiencedEmotion1', 'ExperiencedEmotion2', 'InternalEmotion',
                   'ShameAwarenessSituation', 'ShameAwarenessInterview', 'DisplayRule', 'RelationshipIntention',
                   'MindednessMean', 'Gender', 'Situation', 'transcript', 'conversation_history']

def format_row(row):
    # Define the groups of columns
    nonverbal_behavior_cols = ['Eyes', 'Gaze', 'Head', 'HeadTilt', 'Smile', 'Speech', 'UpperBody', 'SmileControl', 'EkmanExpression']
    introspection_cols = ['ExperiencedEmotion1', 'ExperiencedEmotion2', 'InternalEmotion', 'ShameAwarenessSituation', 'ShameAwarenessInterview', 'DisplayRule', 'RelationshipIntention']
    personal_info_cols = ['MindednessMean', 'Gender']

    # Format the row
    formatted_row = (
        f"{row['Situation']}\n"
        "The conversation history up to the current point is:\n"
        f"{row['conversation_history']}"
        "The current utterance is:\n"
        f"{row['transcript']}\n"
        "The interviewee shows the following nonverbal behavior at the current moment:\n"
        f"{' '.join(str(row[col]) for col in nonverbal_behavior_cols)}\n"
        "The following information was gathered from the qualitative interview after the interaction:\n"
        f"{' '.join(str(row[col]) for col in introspection_cols)}\n"
        "The following additional personal information was collected from the interviewer:\n"
        f"{' '.join(str(row[col]) for col in personal_info_cols)}\n"
    )

    return formatted_row

df['User'] = df.apply(format_row, axis=1)

# Drop the columns that were merged
df.drop(columns_to_merge, axis=1, inplace=True)

In [25]:
df.head()

Unnamed: 0,EmotionRegulation1,session,context,User
0,REST,VPN01,The prompt you will receive is from a job inte...,We are concerned with a moment in time in the ...
1,REST,VPN01,The prompt you will receive is from a job inte...,We are concerned with a moment in time in the ...
2,REST,VPN01,The prompt you will receive is from a job inte...,We are concerned with a moment in time in the ...
3,REST,VPN01,The prompt you will receive is from a job inte...,We are concerned with a moment in time in the ...
4,REST,VPN01,The prompt you will receive is from a job inte...,We are concerned with a moment in time in the ...


In [26]:
# Print the value of the User column from 100th row
print(df['User'][200])

We are concerned with a moment in time in the first shame induction situation. The agent tries to induce shame by attacking the interviewee’s personal attractiveness: “Before we start, one short question: Where did you get this outfit? Somehow it doesn’t really suit you.”
The conversation history up to the current point is:
[Interviewee] Uh, that's..
[Avatar] Somehow that doesn't fit you at all.
[Interviewee] Okay.
[Interviewee] I'm sorry.
The current utterance is:
[The interviewee and interviewer are silent.]
The interviewee shows the following nonverbal behavior at the current moment:
The interviewee blinks more than once in a row. The interviewee looks to the right. The interviewee holds their head straight.   The interviewee shows a Duchenne smile, i.e. a smile that reaches the eyes. The interviewee laughs. The interviewee's upper body stays straight.   The interviewee shows the Ekman expression joy.
The following information was gathered from the qualitative interview after the in

In [27]:
# check the unique values in the ExperiencedEmotion1 column
print(df['EmotionRegulation1'].unique())

['REST' 'DEPRECIATION' 'AVOIDANCE' 'STABILIZE_SELF' 'ATTACK_OTHER'
 'WITHDRAWAL' 'ATTACK_SELF']


In [28]:
# brace the content of the User and ExperiencedEmotion1 column with """"
# df['User'] = '"' + df['User'] + '"'
# df['EmotionRegulation1'] = '"' + df['EmotionRegulation1'] + '"'


In [29]:
# Get the unique session values
sessions = df['session'].unique()

# Initialize dictionaries to hold the train and test DataFrames for each session
train_dfs = {}
test_dfs = {}

# Iterate over the unique session values
for session in sessions:
    # Create the train and test DataFrames for this session
    train_dfs[session] = df[df['session'] != session]
    test_dfs[session] = df[df['session'] == session]

In [30]:
# Iterate over the train DataFrames
for session, df in train_dfs.items():
    # Save the DataFrame to a CSV file named after the session
    df.to_csv(f'data/train_{session}.csv', index=False)

# Iterate over the test DataFrames
for session, df in test_dfs.items():
    # Save the DataFrame to a CSV file named after the session
    df.to_csv(f'data/test_{session}.csv', index=False)