In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats as ss

In [2]:
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', None)

In [3]:
# #get rid of scientific notation
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [4]:
# failed sound check 1957441

# Fish

In [7]:
# import file
data = pd.read_csv('data_exp_26806-v3_task-3ggj.csv', sep = ",")
# data_2 = pd.read_csv("pilot_2/data_exp_24005-v8_task-lhx6.csv", sep = ",")


In [8]:
data.task.unique()

array([nan, 'fishing_pos', 'mood_rating_control'], dtype=object)

In [9]:
data = data.replace('null', np.nan)
data  = data.replace('nan', np.nan)


In [10]:
data['start_time'] = round(data['start_time'].astype(float))

In [11]:
data = data[['Participant Private ID', 'UTC Timestamp','key pressed', 'price_displayed', 'environment', 'task', 'function id',
       'builtin rt', 'end_time', 'start_time', 'response', 'round_instance']]


In [12]:
data.rename(columns={"Participant Private ID": "Subject"}, inplace = True)


In [13]:
data = data.sort_values(by = ['Subject', 'start_time'])

In [14]:
data['trial_label'] = data[['environment', 'round_instance']].apply(lambda x: ' '.join(x.astype(str)), axis=1)


In [15]:
# exclude
# data = data[data.Subject != 1972447]

In [16]:
data["environment_binary"] = np.where(data["environment"] == 'pre_mood', 0, 1)

In [17]:
data["mood"] = data["task"].map({"mood_rating_pos":"pos",
                                    "mood_rating_neg":"neg"})

In [18]:
data = data.sort_values(by = ["Subject", "UTC Timestamp"])

In [19]:
data.reset_index(inplace = True)

In [20]:
data.Subject.nunique()

27

# Latency

In [21]:
fish = data[(data.task == "fishing_pos")|(data.task == "fishing_neg")]


In [22]:
fish = fish[fish['start_time'].notna()]

In [23]:
fish['start_time'] = fish['start_time'].astype(int)

In [24]:
fish.environment.unique()

array(['pre_mood', 'post_mood'], dtype=object)

In [25]:
d = fish.groupby(['Subject','environment_binary', 'round_instance'])['start_time'].diff().shift(-1).reset_index(name = 'latency')
d = d.set_index('index')


In [26]:
fish['latency'] = d['latency']
fish = fish.sort_index().sort_values(['Subject', 'start_time'])

In [27]:
def mask_first(x):
    """
    get rid of the first spike by creating a mask of 0, 1, 1, 1, ...
    """
    result = np.ones_like(x)
    result[0] = 0
    return result

In [28]:
mask = fish.groupby(['Subject', 'environment'])['round_instance'].transform(mask_first).astype(bool)

# data_useful = data_useful.loc[mask]


In [29]:
fish = fish.loc[mask]

In [30]:
fish.head()
fish["mood"] = fish["task"].map({'fishing_pos': "pos", 'fishing_neg':'neg',
                                "mood_rating_control":"control"})


In [31]:
fish.groupby("mood")["Subject"].nunique()

mood
pos    27
Name: Subject, dtype: int64

In [32]:
fish['mood'].unique()

array(['pos'], dtype=object)

In [33]:
fish.latency.isnull().values.any()
fish = fish[fish.latency.notnull()]

In [34]:
# mad
fish.shape

(34548, 17)

In [35]:
median = np.median(fish.latency)
low = median - 3*ss.median_absolute_deviation(fish.latency)
above = median + 3*ss.median_absolute_deviation(fish.latency)
fish = fish[(fish.latency>=low)&(fish.latency<=above)]

In [36]:
# mad
fish.shape

(32903, 17)

# Rating

In [37]:
# response
rating = data[(data.task == "mood_rating_pos")|(data.task == "mood_rating_neg")|(data.task == "mood_rating_control")]


In [38]:
rating = rating[rating['response'].notna()]

In [39]:
rating.shape

(159, 16)

# Concat 

In [40]:
rating["latency"] = np.nan*len(rating)

In [41]:
data_complete = pd.concat([rating, fish])


In [42]:
data_complete = data_complete.sort_values(by = "index")

# Backfilling

In [43]:
data_complete["response"].bfill(inplace = True)

# df.bfill(axis ='rows')


In [48]:
data_complete["block"] = data_complete["round_instance"].map({
    1:1,
    2:1,
    3:1,
    4:1,
    5:2,
    6:2,
    7:2,
    8:2,
    9:3,
    10:3,
    11:3,
    12:3
})


In [49]:
data_complete.head(800)

Unnamed: 0,index,Subject,UTC Timestamp,key pressed,price_displayed,environment,task,function id,builtin rt,end_time,start_time,response,round_instance,trial_label,environment_binary,mood,latency,block
4317,5,2016669.0,1601310230418.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,45.875,1601310227565.0,1601310227519.0,84.0,1.0,pre_mood 1.0,0,pos,148.0,1
4318,6,2016669.0,1601310230559.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,32.125,1601310227699.0,1601310227667.0,84.0,1.0,pre_mood 1.0,0,pos,137.0,1
4319,7,2016669.0,1601310230668.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,17.14,1601310227822.0,1601310227804.0,84.0,1.0,pre_mood 1.0,0,pos,120.0,1
4320,8,2016669.0,1601310230808.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,34.485,1601310227959.0,1601310227924.0,84.0,1.0,pre_mood 1.0,0,pos,139.0,1
4321,9,2016669.0,1601310230949.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,36.3,1601310228100.0,1601310228063.0,84.0,1.0,pre_mood 1.0,0,pos,139.0,1
4322,10,2016669.0,1601310231090.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,35.305,1601310228237.0,1601310228202.0,84.0,1.0,pre_mood 1.0,0,pos,140.0,1
4323,11,2016669.0,1601310231214.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,2.705,1601310228345.0,1601310228342.0,84.0,1.0,pre_mood 1.0,0,pos,106.0,1
4324,12,2016669.0,1601310231340.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,35.345,1601310228484.0,1601310228448.0,84.0,1.0,pre_mood 1.0,0,pos,139.0,1
4325,13,2016669.0,1601310231464.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,31.035,1601310228618.0,1601310228587.0,84.0,1.0,pre_mood 1.0,0,pos,133.0,1
4326,14,2016669.0,1601310231607.0,40.0,1.0,pre_mood,fishing_pos,fishing_pos,26.5,1601310228747.0,1601310228720.0,84.0,1.0,pre_mood 1.0,0,pos,132.0,1


In [50]:
data_complete.to_csv("data_3_no_wof_latency_backfilled_happiness.csv")
