In [5]:
import pandas as pd
import numpy as np

import plotly
import plotly.express as px
from plotly.offline import iplot
import plotly.graph_objects as go


# Plotting functions {-}

In [6]:
# basic dot plot functions 

def delta(x,y):
    return 1 if x == y and x!= 'nan' and y!='nan' else 0

def M(seq1,seq2,i,j,k):
    return sum(delta(x,y) for x,y in zip(seq1[i:i+k],seq2[j:j+k]))

def makeMatrix(seq1,seq2,k):
    n = len(seq1)
    m = len(seq2)
    return [[M(seq1,seq2,i,j,k) for j in range(m-k+1)] for i in range(n-k+1)]

In [7]:
def indiv_dotplot_new_plotly(subjID, 
                             seqx, diag_x, listnum_x, 
                             seqy, diag_y, listnum_y, 
                             valid_items_list, filename_str):
    # new dotplots: plot missing on axes & repeats
    # plotly used for interactive plotting

    noblanks_x = [i for i in seqx if i!='nan']
    noblanks_y = [i for i in seqy if i!='nan']
    new_dotplot = np.array(makeMatrix(noblanks_y,noblanks_x,1))
    fig = go.Figure()

    #dotplot: 
    fig.add_trace(go.Heatmap(
                    z=new_dotplot,
                    name="dotplot",
                    colorscale = 'Blues',
                    showscale=False))
    
    x_labels = noblanks_x
    y_labels = noblanks_y
    

    # to plot missing on axis: 
    
    paired_seqs = [noblanks_x, noblanks_y]
    seqx_p,seqy_p=paired_seqs
    in1_not2 = list(set(seqx_p)-set(seqy_p))
    in2_not1 = list(set(seqy_p)-set(seqx_p))
    
    x_pts =[]
    enum_x = enumerate(noblanks_x)
    enum_x_list = list(enum_x)
    for j in range(len(enum_x_list)):
        for missing in in1_not2:
            if enum_x_list[j][1]==missing:
                x_pts += [enum_x_list[j][0]]
    x_axis_points = x_pts
    
    fig.add_trace(
        go.Scatter(
            x=x_axis_points,
            y=[len(noblanks_y) for i in range(len(x_axis_points))],
            name = 'in L1, not L2',
        mode='markers',
            marker=dict(
                color='Orange',
                symbol = 'square',
                line=dict(
                    color='DarkSlateGrey',
                ))))
    y_pts =[]
    enum_y = enumerate(noblanks_y)
    enum_y_list = list(enum_y)
    for k in range(len(enum_y_list)):
        for missing in in2_not1:
            if enum_y_list[k][1]==missing:
                y_pts += [enum_y_list[k][0]]
    y_axis_points = y_pts
    
    fig.add_trace(
        go.Scatter(
            x=[-1 for i in range(len(y_axis_points))],
            y=y_axis_points,
            name = 'in L2, not L1',
        mode='markers',
            marker=dict(
                color='Orange',
                symbol = 'square',
                line=dict(
                    color='DarkSlateGrey',
                ))))
   
    
    # marking repeats:
    
    x_tick_repeats = [i for i, x in enumerate(noblanks_x) if noblanks_x.count(x) > 1]
    y_tick_repeats = [i for i, x in enumerate(noblanks_y) if noblanks_y.count(x) > 1]
    x_coords_reps = []
    y_coords_reps = []
    if x_tick_repeats != []:
        for idx in x_tick_repeats:
            if noblanks_x[idx] in noblanks_y:
                x_coords_reps += [idx]
                y_coords_reps_list = [i for i, y in enumerate(noblanks_y) if y==noblanks_x[idx]]
                y_coords_reps += [y_coords_reps_list[0]]
                
                y_coords_reps_list = y_coords_reps_list[1:]
                while y_coords_reps_list!=[]:
                    x_coords_reps += [idx]
                    y_coords_reps += [y_coords_reps_list[0]]
                    y_coords_reps_list = y_coords_reps_list[1:]
            else:
                x_coords_reps += [idx]
                y_coords_reps += [len(noblanks_y)]
    if y_tick_repeats != []:
        for idx in y_tick_repeats:
            if noblanks_y[idx] in noblanks_x:
                y_coords_reps += [idx]
                x_coords_reps_list = [i for i, x in enumerate(noblanks_x) if x==noblanks_y[idx]]
                x_coords_reps += [x_coords_reps_list[0]]
                
                x_coords_reps_list = x_coords_reps_list[1:]
                while x_coords_reps_list!=[]:
                    y_coords_reps += [idx]
                    x_coords_reps += [x_coords_reps_list[0]]
                    x_coords_reps_list = x_coords_reps_list[1:]
            else:
                y_coords_reps += [idx]
                x_coords_reps += [-1]
    fig.add_trace(
        go.Scatter(
            x=x_coords_reps,
            y=y_coords_reps,
            name = 'repeated item',
        mode='markers',
            marker=dict(
                color='lightgreen',
                symbol = 'triangle-right',
                line=dict(
                    color='DarkSlateGrey',
                    width=.5
                ))))
                    
    
    #showing invalid entries (intrusions):
    
    def find_intrusions(list_1):
        valid_list_lowercase = [str.lower(item) for item in valid_items_list]
        not_in_valid_list = []
        for item_j in list_1:
            if item_j not in valid_list_lowercase:
                not_in_valid_list += [item_j]
        return not_in_valid_list
     
    not_in_list = find_intrusions(noblanks_x) + find_intrusions(noblanks_y)
    
    invalids = not_in_list
    
    x_tick_invalids = [i for i, x in enumerate(noblanks_x) if x in invalids]
    y_tick_invalids = [i for i, x in enumerate(noblanks_y) if x in invalids]
    
    if x_tick_invalids != []:
        for idx in x_tick_invalids:
            x_labels[idx]=x_labels[idx]+'*'
        fig.add_trace(
            go.Scatter(
                x=x_tick_invalids,
                y=[len(noblanks_y) for i in range(len(x_tick_invalids))],
                name = 'invalid item',
            mode='markers',
                marker=dict(
                    color='Red',
                    symbol = 'x',
                    line=dict(
                        color='DarkSlateGrey',
                    ))))
            
    if y_tick_invalids != []:
        for idx in y_tick_invalids:
            y_labels[idx]=y_labels[idx]+'*'
        fig.add_trace(
            go.Scatter(
                x=[-1 for i in range(len(y_tick_invalids))],
                y=y_tick_invalids,
                name = 'invalid item',
            mode='markers',
                marker=dict(
                    color='Red',
                    symbol = 'x',
                    line=dict(
                        color='DarkSlateGrey',
                    ))))

    # figure layout:
    fig.update_layout(
            title=go.layout.Title(
                text='SubjID:  {0}'.format(subjID),
                xref="paper",
                x=0
                ),
            width=500,
            height=500,
            xaxis=go.layout.XAxis(
                title=go.layout.xaxis.Title(
                    text='L1: {0}, List {1}'.format(diag_x,listnum_x),
                    font=dict(
                        family="Helvetica",
                        size=18,
                        color="#7f7f7f"
                    ))),
            yaxis=go.layout.YAxis(
                title=go.layout.yaxis.Title(
                    text='L2: {0}, List {1}'.format(diag_y,listnum_y),
                    font=dict(
                        family="Helvetica",
                        size=18,
                        color="#7f7f7f"
                    )))
        )

    fig.update_xaxes(showline=True, linewidth=1, linecolor='grey', 
                     tickangle=270,
                    ticktext=x_labels,
                     tickvals = [i for i in range(len(x_labels))],
                    ticks="inside",
                     showgrid=True, 
                     gridcolor='LightGrey')
    fig.update_yaxes(showline=True, linewidth=1, linecolor='grey',
                    ticktext=y_labels,
                     tickvals = [i for i in range(len(y_labels))],
                    showgrid=True, 
                     gridcolor='LightGrey')

    fig.update_yaxes(autorange="reversed")
    fig.update_layout(legend=dict(x=1, y=1))

    #plotly.offline.plot(fig, filename=filename_str)  #uncomment this line to open plot in new window
    #fig.write_image("filename_str")
    
    fig.show()


# Import data {-}

## fluency data {-}

sample data provided was downloaded from https://osf.io/j6qea/ (Zemla & Austerweil, 2019), and then was cleaned to remove typos

In [21]:
data = pd.read_csv("ucsd_fluency_cleaned.csv", 
    converters={'item': eval})
data.head()

Unnamed: 0,id,listnum,group,SEX,EDUC,DRS,MMSE,category,rank,item
0,5,4,NC,2,16,144,30,animals,1.0,"[armadillo, horse, cow, goat, pig, rabbit, mou..."
1,6,4,NC,1,19,144,30,animals,1.0,"[mouse, rat, raccoon, dog, cat, elephant, cow,..."
2,93,1,NC,2,14,143,30,animals,1.0,"[dog, cat, ape, elephant, monkey, chimpanzee, ..."
3,93,2,NC,2,14,138,26,animals,1.0,"[owl, rhinoceros, buffalo, horse, dog, cat, el..."
4,93,3,NC,2,14,143,30,animals,1.0,"[dog, rat, camel, horse, cat, ferret, hamster,..."


## list of valid words {-}

list of words allowed for each category, used to indicate when a word is an out-of-category intrusion (list provided is for "animals" category)


In [11]:
valid_items = list(np.loadtxt("animals_list.txt", dtype=np.str, delimiter='\n'))

# Example plots {-}


In [14]:
#example plot 1

list1 = ['camel', 'elephant', 'hippopotamus', 'monkey', 'gorilla', 'seal', 'parakeet', 'cockatiel', 'cockatoo', 'crow', 'hawk', 'donkey', 'lion', 'tiger', 'stork', 'duck', 'goose', 'panda']
list2 = ['lion', 'tiger', 'monkey', 'elephant','ball','leash', 'dog', 'cat', 'camel', 'snake', 'mouse', 'dog', 'monkey', 'hippopotamus', 'fish', 'cow', 'horse', 'sheep', 'goat']

subjID = '0000' #subject identifier
group1 = '[group]' #group info or diagnosis, e.g. MCI or AD
group2 = '[group]'
list1num = '#1, MM/DD/YY' #x-axis list number and/or date
list2num = '#2, MM/DD/YY' #y-axis list number and/or date
valid_items_list = valid_items
filename = 'test_plotly.html'


indiv_dotplot_new_plotly(subjID,  
                         list1, group1, list1num, 
                         list2, group2, list2num, 
                         valid_items_list, filename)


In [16]:
#example plot 2, from imported data

row1 = data.iloc[2]
row2 = data.iloc[3]

list1 = row1['item']
list2 = row2['item']

subjID = str(row1['id']) #subject identifier
group1 = str(row1['group']) #group info or diagnosis, e.g. MCI or AD
group2 = str(row2['group'])
list1num = str(row1['listnum']) #x-axis list number and/or date
list2num = str(row2['listnum']) #y-axis list number and/or date
valid_items_list = valid_items
filename = 'test_plotly.html'


indiv_dotplot_new_plotly(subjID,  
                         list1, group1, list1num, 
                         list2, group2, list2num, 
                         valid_items_list, filename)


In [19]:
#example plot 3, from imported data

row1 = data.iloc[1155]
row2 = data.iloc[1156]

list1 = row1['item']
list2 = row2['item']

subjID = str(row1['id']) #subject identifier
group1 = str(row1['group']) #group info or diagnosis, e.g. MCI or AD
group2 = str(row2['group'])
list1num = str(row1['listnum']) #x-axis list number and/or date
list2num = str(row2['listnum']) #y-axis list number and/or date
valid_items_list = valid_items
filename = 'test_plotly.html'


indiv_dotplot_new_plotly(subjID,  
                         list1, group1, list1num, 
                         list2, group2, list2num, 
                         valid_items_list, filename)


In [18]:
#example plot 4, from imported data

row1 = data.iloc[1147]
row2 = data.iloc[1148]

list1 = row1['item']
list2 = row2['item']

subjID = str(row1['id']) #subject identifier
group1 = str(row1['group']) #group info or diagnosis, e.g. MCI or AD
group2 = str(row2['group'])
list1num = str(row1['listnum']) #x-axis list number and/or date
list2num = str(row2['listnum']) #y-axis list number and/or date
valid_items_list = valid_items
filename = 'test_plotly.html'


indiv_dotplot_new_plotly(subjID,  
                         list1, group1, list1num, 
                         list2, group2, list2num, 
                         valid_items_list, filename)
