# Connect to Google Drive

In [None]:
from google.colab import drive 
drive.mount('/content/drive')
%cd /content/drive/MyDrive/BachelorThesis/datasets/cookversational_search

Mounted at /content/drive
/content/drive/MyDrive/BachelorThesis/datasets/cookversational_search


# Load Data

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
data = pd.read_csv("cookversational_search_dataset.csv")
data.head(10)

# Prepare Data

Drop columns that are not needed and rename relevant ones.

In [None]:
data = data[['id', 'level_1', 'utterance_english', 'tp', 'seq']] # extract relevant columns
data = data.rename(columns={'utterance_english': 'utterance_no_context'}) # rename column
data.head(10)

## Add Data for Condition "1 Previous Turn"
The CookversationalSearch dataset only contains one utterance that should be classified. Previous utterances can help to better understand the context and thus the previous utterance is added to each utterance.

In [None]:
utterances_1_prev_turn = []
prev_utterance = ""
for index, row in data.iterrows():
  if row['seq'] == 1: # first utterance of test person --> no context available yet
    prev_utterance = ""
  utt = prev_utterance + " " + row['utterance_no_context'] # append the previous utterance before the current utterance
  utterances_1_prev_turn.append(utt)
  prev_utterance = row['utterance_no_context'] # put current utterance in memory as it will be the predecessor of the next one

data['utterance_1_prev_turn'] = utterances_1_prev_turn # append the utterances with context as column to the dataframe
data.head(5)

data_no_context = data[['id', 'level_1', 'utterance_no_context']] # select the data for the "no context" condition
data_no_context = data_no_context.rename(columns={'level_1': 'label', 'utterance_no_context': 'text'}) # rename columns

data_prev_utterance = data[['id', 'level_1', 'utterance_1_prev_turn']] # select the data for the "1 previous utterance" condition
data_prev_utterance = data_prev_utterance.rename(columns={'level_1': 'label', 'utterance_1_prev_turn': 'text'}) # rename columns
print("data with no context: ")
print(data_no_context.head(10))
print("\ndata with no one previous utterance as context: ")
print(data_prev_utterance.head(10))

Save the datasets for the different conditions in csv files:

In [None]:
data_no_context.to_csv("cookversational_search_no_context.csv")
data_prev_utterance.to_csv("cookversational_search_1_prev_utterance.csv")