In [1]:
%cd "../fiberphotopy"

C:\Users\mceau\Documents\fiberphotopy\fiberphotopy


In [2]:
import pandas as pd               # data management as "dataframe", ie basically table form
import numpy as np                # math operation module based mostly on arrays (vectors conatining any type of data)
import matplotlib.pyplot as plt   # plotting library
import os                         # module to interact with the system (ie browsing files on the computer and getting paths)
import datetime                   # time

# I. Reading .dat files
Pandas 'read_csv' function. Skip the 12 first rows that don't contain tabular data. Separator (sep=) is ',' by default, so you need to change it to '\t' (tab).

For clarity, it's best to change the header names. You can find them in the Imetronic manuals.

+ 'TIME': timestamps
+ 'F' and 'ID' are the two numbers that identify the time of information (ie. 2 1 is Lever 1 (L1)).
+ '_P','_V','_L','_R','_T','_W','_X','_Y','_Z' are the variables that give different information depending on the category of information.

In [None]:
df = pd.read_csv('../../DATA/lever/J43sac/bsa01042022s1c01_01.dat',sep='\t',
                 skiprows=12,header=None,names=['TIME','F','ID','_P','_V','_L','_R','_T','_W','_X','_Y','_Z'])
df

# II. Extracting relevant information
Here we are mainly interested in 4 types of informations:

1. saccharine administration, coded as injections (6,1)
2. nosepokes (3,1)
3. levers: (2,1) whether they are in or out, when there are lever presses (and what is the status of the lever in when pressed)
4. licks: (5,1) start and end of of licking periods and duration

As you'll see, apart from a few necessary modifications, the main idea behind extracting is the same across all types of data, which means you should be able to expand and adapt these pieces of code to anything related to Imetronic .dat file extraction and visualization (+/- analysis on which I will not expand).

*NB: Some words about nomenclature. There will be a lot of created variables so I'll stay consistent with the names. Every dataframe or dataframe slice will be named* **df_{something}**, *every list of events will be named* **{lower_case_description}** *whereas intervals will be named* **{DESCRIPTION_IN_ALL_CAPS}**.

## II.1 Administration
We will first look at a "slice" of the dataframe, by selecting only the rows where (F,ID) = (6,1), *i.e.* "injections.

In [None]:
df_adm = df[(df['F']==6) & (df['ID']==1)]
df_adm

In [None]:
administrations = df_adm['TIME'].values/1000

We'll use our first plotting function **eventplot**, to visualize the events:

In [None]:
plt.figure(figsize=(20,5))
plt.eventplot(administrations)

## II.2 Nosepokes

In [None]:
df_nosepokes = df[(df['F']==3) & (df['ID']==1)] #same procedure
df_nosepokes

In [None]:
nosepokes = df_nosepokes['TIME'].values/1000

In [None]:
plt.figure(figsize=(20,5))
plt.eventplot(nosepokes)

## II.3 Levers

In [None]:
df_levers = df[(df['F']==2) & (df['ID']==1)]

The information about whether the lever is in or out is coded by the '_P' column, 1 if out, 0 if in.

### II.3.a lever status
Now we are interested in knowing when the lever is retracted vs when it is out. There isn't a clear coding for this very information. However, every 'lever message' conatins information about the lever's status. Thus, by using a function that account for the possible redundancy of the information, we can determine the status of the lever.

Additionnally, we wille extract the end of the session (last timestamp of the .dat file) which will be useful for intervals and for plotting.

In [None]:
lever_out_info = df_levers[(df_levers['_P'] == 1)]['TIME'].values/1000 #the timestamps are extracted and converted from ms to s
lever_in_info  = df_levers[(df_levers['_P'] == 0)]['TIME'].values/1000
session_end    = list(df['TIME'])[-1]/1000

In [None]:
# the following function take a list of on and off messages and outputs the resulting intervals,
# taking into account multiple consecutive identical commands
def _interval(on,off,end):
    on  = list(set([i for i in on if i not in off]))
    off = list(set([i for i in off if i not in on]))
    on_series = pd.Series([1]*len(set(on)),index=on,dtype='float64')
    off_series = pd.Series([0]*len(set(off)),index=off,dtype='float64')
    s = pd.concat((on_series,off_series)).sort_index()
    status, intervals, current = 0, [], [None,None]
    for n in s.index:
        if status == 0 and s[n] == 1:
            status = 1
            current[0] = n
        if status == 1 and s[n] == 0:
            status = 0
            current[1] = n
            intervals.append(current)
            current = [None,None]
    if current != [None,None]:
        current[1] = end
        intervals.append(current)
    return [tuple(i) for i in intervals if i[0]-i[1] != 0]

# ex: 
on_commands  = [10,30,50,65]
off_commands = [15,35,55]
end_time     = 100
_interval(on_commands,off_commands,end_time)

LEVER_OUT = _interval(lever_out_info,lever_in_info,session_end)
LEVER_OUT

We'll now use **axvspan**, the second plotting function we will need.

In [None]:
plt.figure(figsize=(20,5))
plt.eventplot(lever_out_info,lineoffsets=0,linewidths=0.5) #used for timestamps
plt.eventplot(lever_in_info,lineoffsets=1,colors='r',linewidths=0.5)
for a,b in LEVER_OUT:
    plt.axvspan(a,b,color='gray',alpha=0.5)
plt.xlim((0,session_end))

### II.3.b lever presses
Lever presses are contained in the same section fo the dataframe. We have to take a more precise selection: '_V' must be equal to 1, signifying that the lever is being 'visited'. Additionnally, '_L', the number of presses since the start of the current line of the exercise must be different than 0 (the logic behind it is mysterious but we tested and confirmed this with Jeff by comparing the results from Imetronic extraction to my extraction with this final condiftion).

In [None]:
## visits / lever out
lever_press_out = df_levers[(df_levers['_V'] == 1) & (df_levers['_P'] == 1) & (df_levers['_L'] != 0)]['TIME'].values/1000
# visits / lever in
lever_press_in = df_levers[(df_levers['_V'] == 1) & (df_levers['_P'] == 0) & (df_levers['_L'] != 0)]['TIME'].values/1000

## II.4 Licks

In [None]:
df_licks         = df[(df['F']==5) & (df['ID']==1)] #same as before, datframe with only 'lick-related' data
lick_start       = df_licks[df_licks['_V']==1]['TIME'].values/1000
lick_end         = df_licks[df_licks['_V']==0]['TIME'].values/1000
LICKING          = _interval(lick_start,lick_end,session_end) #same function as before
licking_duration = np.array([b-a for a,b in LICKING])

plt.figure(figsize=(20,5))
plt.eventplot(lick_start,lineoffsets=0,linewidths=0.5) #used for timestamps
plt.eventplot(lick_end,lineoffsets=1,colors='r',linewidths=0.5)
for a,b in LICKING:
    plt.axvspan(a,b,color='gray',alpha=0.5)
plt.xlim((0,session_end))

The third and final plotting function **bar**, simply makes bar plots.

In [None]:
plt.bar(lick_start, licking_duration)

# III. Putting everything in a function (or two)

Now that you understand all the steps, you need to find a way to automatize them by including them all in a function.

This way, with any .dat filename, you can immediately obtain the graph we want.

In [None]:
def _plot(filename,ax,title):
        
    # READING THE DAT FILE
    df             = pd.read_csv(filename,sep='\t',skiprows=12,header=None,
                                 names=['TIME','F','ID','_P','_V','_L','_R','_T','_W','_X','_Y','_Z'])
    session_end    = list(df['TIME'])[-1]/1000
    
    # EXTRACTING ALL THE DATA
    #administrations
    df_adm           = df[(df['F']==6) & (df['ID']==1)]
    administrations  = df_adm['TIME'].values/1000
    #nosepokes
    df_nosepokes     = df[(df['F']==3) & (df['ID']==1)]
    nosepokes        = df_nosepokes['TIME'].values/1000
    #levers
    df_levers        = df[(df['F']==2) & (df['ID']==1)]
    lever_out_info   = df_levers[(df_levers['_P'] == 1)]['TIME'].values/1000 
    lever_in_info    = df_levers[(df_levers['_P'] == 0)]['TIME'].values/1000
    lever_press_out  = df_levers[(df_levers['_V'] == 1) & (df_levers['_P'] == 1) & (df_levers['_L'] != 0)]['TIME'].values/1000
    lever_press_in   = df_levers[(df_levers['_V'] == 1) & (df_levers['_P'] == 0) & (df_levers['_L'] != 0)]['TIME'].values/1000
    LEVER_OUT        = _interval(lever_out_info,lever_in_info,session_end) #remember you need the _interval function
    #licks
    df_licks         = df[(df['F']==5) & (df['ID']==1)]
    lick_start       = df_licks[df_licks['_V']==1]['TIME'].values/1000
    lick_end         = df_licks[df_licks['_V']==0]['TIME'].values/1000
    LICKING          = _interval(lick_start,lick_end,session_end)
    licking_duration = np.array([b-a for a,b in LICKING])
    
    # PLOTTING
    # lever intervals
    for n,(a,b) in enumerate(LEVER_OUT):
        ax.axvspan(a,b,alpha=0.5,color='gray',label='_'*n+'Lever Out')
    # licks
    ax.bar(lick_start, licking_duration,label='Licking Time',width=1,color='mediumblue')
    ax.eventplot(lick_start,lineoffsets=-0.3,linelengths=0.5,linewidths=0.6,color='b',label='Licks')
    # levers
    ax.eventplot(lever_press_in,  lineoffsets=-1.2, linelengths=1, linewidths=0.5, color='purple',    label='Lever Press (in)')
    ax.eventplot(lever_press_out, lineoffsets=-1.2, linelengths=1, linewidths=0.5, color='darkgreen', label='Lever Press (out)')
    # adlinistration
    ax.eventplot(administrations, lineoffsets=-2.2, linelengths=1, linewidths=0.5, color='k',         label='Administration')
    # nosepokes
    ax.eventplot(nosepokes,       lineoffsets=-3.2, linelengths=1, linewidths=0.5, color='r',         label='Nose Pokes')
    
    ax.set_ylim((-3.7,licking_duration.max()))
    ax.set_ylabel('(seconds)')
    ax.set_xlabel('(seconds)')
    ax.set_title(title)
    ax.legend(loc='upper right')

In [None]:
fig,ax1 = plt.subplots(1,figsize=(40,10))
_plot(filename='../../DATA/lever/J43sac/bsa01042022s1c01_01.dat',
      ax=ax1,
      title='Test title')

# IV. Applying the function to multiple files at once
*ie.* "putting the function in a function"

Now that you are able to plot one session, it would be nice to be able to plot multiple sessions all at once.

To do that, the simplest way is to group the files in a folder structure as follows:

MAIN FOLDER
- SESSION 10
    + bsaxxxxxxxc12
    + bsaxxxxxxxc23
    + ...
- SESSION 11
    + ...
- ...

Additionally, to automatically assign a rat name to a folder based on the cage number, you should create a correspondance dictionnary, see below.

*NB: As the following information extraction techniques are solely based on file nomenclature (as there is currently no session metadata inside the datfile), this nomenclature shoudld stay consistent (date order, underscores, etc.)*

## IV.1 Metadata extraction

datafolder = "../../DATA/lever/"                # where the information si located
imagefolder = "../../DATA/graphical_summaries/" # to save pictures

In [None]:
# correspondance dictionnary
correspondance = {'rat 2' : 1,
                  'rat 10': 2,
                  'rat 11': 3,
                  'rat 14': 4,
                  'rat 31': 6,
                  'rat 32': 7,
                  'rat 39': 8}

In [None]:
paths = [datafolder+folder+'/'+i for folder in os.listdir(datafolder) for i in os.listdir(datafolder+folder)]
paths[:2]

In [None]:
names = [i.split('/')[-1].split('.dat')[0] for i in paths]
names[:2]

In [None]:
dates = [datetime.datetime(day=int(a[3:5]),month=int(a[5:7]),year=int(a[7:11])) for a in names]
dates[:2]

In [None]:
cages = [int(i.split('c')[-1].split('_')[0]) for i in names]
cages[:2]

In [None]:
df = pd.DataFrame({'name' : names,
                   'cage' : cages,
                   'date' : dates,
                   'path' : paths})
df

## IV.2 Multi-plotting

In [None]:
def multiplot(data_folder,image_folder,correspondance_dict,by='cage'):
    # Metadata extraction
    paths = [data_folder+folder+'/'+i for folder in os.listdir(data_folder) for i in os.listdir(data_folder+folder)]
    names = [i.split('/')[-1].split('.dat')[0] for i in paths]
    dates = [datetime.datetime(day=int(a[3:5]),month=int(a[5:7]),year=int(a[7:11])) for a in names]
    cages = [int(i.split('c')[-1].split('_')[0]) for i in names]
    df = pd.DataFrame({'name' : names,
                       'cage' : cages,
                       'date' : dates,
                       'path' : paths})
    
    if by == 'cage': df.sort_values(['cage','date'],inplace=True)
    if by == 'date': df.sort_values(['date', 'cage'],inplace=True)
    size = len(df)
    fig,axes = plt.subplots(size,figsize=(40,10*size))

    for n,i in enumerate(df.index):
        name,cage,date,path = df.iloc[i,:]
        rat = [a for a,b in correspondance_dict.items() if b == cage][0]
        title = f"{rat.upper()} (c{cage}) {str(date)[:10]} ({path})"
        _plot(path,title=title,ax=axes[n])
    plt.savefig(image_folder+str(datetime.datetime.now())[:10]+'_'+by+'.png')
    plt.savefig(image_folder+str(datetime.datetime.now())[:10]+'_'+by+'.svg',dpi=400)
    plt.savefig(image_folder+str(datetime.datetime.now())[:10]+'_'+by+'.pdf',dpi=400)

In [None]:
correspondance = {'rat 2' : 1,
                  'rat 10': 2,
                  'rat 11': 3,
                  'rat 14': 4,
                  'rat 31': 6,
                  'rat 32': 7,
                  'rat 39': 8}

multiplot(data_folder         = "../../DATA/lever/",
          image_folder        = "../../DATA/graphical_summaries/",
          correspondance_dict = correspondance,
          by='cage')