In [1]:
import pandas as pd
import re
import os
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)


def _memoize(func):
    return _decorate_(func, _call_)

def _get_valid_attributes(object_name, parent_object_names):
    return _call_(_func_, object_name, parent_object_names)

def _memoize(func):
    return _decorate_(func, _call_)

def _get_role(object_name, attribute, value_type, parent_object_names):
    return _call_(_func_, object_name, attribute, value_type, parent_object_names)



## Create a class that reads, stores and plots marker information

In [29]:
class MarkerFile():

    def __init__(self, file):

        self.subject = re.search('\d+', file).group()
        
        df = pd.read_csv(file, skiprows=11, header=None)
        df.columns = ['Marker_number_type', 'Description', 'Position', 'Size', 'Channel', 'X']

        df['Marker_ID'] = df.Description.str.extract('([0-9]+)')
        df['Marker_phase'] = df.Marker_ID

        df.Marker_phase = df.Marker_phase.replace('1', 'choice')
        df.Marker_phase = df.Marker_phase.replace('3', 'correct')
        df.Marker_phase = df.Marker_phase.replace('8', 'reward')
        df.Marker_phase = df.Marker_phase.replace('64', 'confidence_rating')
        df.Marker_phase = df.Marker_phase.replace('5', 'incorrect')
        df.Marker_phase = df.Marker_phase.replace('16', 'loss')

        counts = df.Marker_phase.value_counts()

        counts = pd.DataFrame(counts).T.reset_index()
        counts['Subject'] = self.subject
        counts = counts.iloc[:, 1:]

        self.marker_df = df
        self.marker_counts = counts
        
        self.outcomes = self.marker_df.Marker_phase[(self.marker_df.Marker_phase == 'reward') | 
                                                    (self.marker_df.Marker_phase == 'loss')]
        self.outcomes = self.outcomes.replace('loss', 0)
        self.outcomes = self.outcomes.replace('reward', 1)
        self.outcomes = self.outcomes.values

    def plot_markers(self):

        traces = []
        
        unique_ids = self.marker_df.Marker_ID.unique()
        
        for i in unique_ids[1:]:
            if i != 'nan':
                traces.append(go.Scatter(
                            x=self.marker_df.Position[self.marker_df.Marker_ID == i],
                            y=self.marker_df.Marker_ID[self.marker_df.Marker_ID == i],
                            mode='markers',
                            name=self.marker_df.Marker_phase[self.marker_df.Marker_ID == i].values[0],
                            text=range(0, len(self.marker_df.Marker_ID == i))
                        ))
                

        iplot(traces, filename='marker_plot_{0}'.format(self.subject))
        

## Load the data

In [30]:
data_dir = 'C:\Users\Toby\Downloads\learning_marker_files'

marker_dict = {}

for f in os.listdir(data_dir):

    marker_dict[re.search('\d+', f).group()] = MarkerFile(os.path.join(data_dir, f))



currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)



### Plot 

In [13]:
marker_dict['035'].plot_markers()

## Load behavioural data

In [35]:
import numpy as np

data_dir = 'C:\Users\Toby\Google Drive\PhD\Things\Laura\learning_task\data_jan18'

data_files = [f for f in os.listdir(data_dir) if re.match('learning_data.+\d{4}\.csv', f)]

behavioural = {}

for f in data_files:
    subject = re.search('\d{2}(?=_)', f).group()
    subject = '0' + str(subject)
    print subject
    data = pd.read_csv(os.path.join(data_dir, f))
    if len(data) == 200:
        data['Response_binary'] = (data.Response == 'a').astype(int)  # convert keypresses to zeros and ones
        data.Response_binary[data.Response == 'Invalid'] = np.nan
        behavioural[subject] = data[['id', 'Response_binary', 'A_reward', 'Confidence', 'trial_number', 'Rewarded']]  
    else:
        print "Subject did not complete all trials, skipping"

011
002
003
004
005
007
008
009
010
011
Subject did not complete all trials, skipping
012
014
015
016
018
019
020
021
022
023
024
027
029
030
033
034
035
036
026




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [53]:
print marker_dict['035'].marker_counts

   choice  correct  reward  loss  incorrect  confidence_rating Subject
0     202      129     126    73         72                 37     035
