# Intro to Visualisation

In [None]:
from matplotlib import pyplot as plt  # main basic plotting library in Python

In [None]:
# import previous data
import pandas as pd
dlc = pd.read_csv('../files/20230428_VGC_0647_LaserOFFDLC_testdataset.csv').iloc[:200, 1:7]
dlc

In [None]:
dlc.columns = dlc.apply(lambda x: x[0]+'_'+x[1])
dlc = dlc.iloc[2:].reset_index(drop=True).astype(float)
dlc

## Static plotting

### Pandas

In [None]:
dlc.plot()  # pandas dataframes have an in-built plotting method (very cool!)

In [None]:
dlc.plot(kind='line', x='nose_x', y='nose_y', xlabel='Position [x]', ylabel='Position[y]', xlim=[450, 550], ylim=[200, 550],
        title='Mouse nose tracking data')

In [None]:
dlc.plot(kind='scatter', x='nose_x', y='nose_y', xlabel='Position [x]', ylabel='Position[y]', xlim=[450, 550], ylim=[200, 550],
        title='Mouse nose tracking data')

all pandas plotting parameters: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html#pandas.DataFrame.plot

### Matplotlib

partly modified from https://matplotlib.org/stable/tutorials/pyplot.html

In [None]:
plt.plot(dlc.nose_x, dlc.nose_y, '-g.')  # plot green lines with dots
plt.plot(dlc['left paw_x'], dlc['left paw_y'], '-b.')  # plot blue lines with dots
plt.ylabel('Position[y]')
plt.title('Mouse nose tracking data')

In [None]:
plt.style.use('dark_background')

plt.plot('nose_x', 'nose_y', linewidth=0.5, data=dlc)
plt.scatter('nose_x', 'nose_y', c='nose_likelihood', data=dlc, cmap='gist_rainbow')
plt.colorbar()

In [None]:
plt.style.use('default')

fig, axs = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True)
axs[0].scatter('nose_x', 'nose_y', c='nose_likelihood', data=dlc, cmap='gist_rainbow')
axs[0].set_title('Nose')
axs[1].scatter('left paw_x', 'left paw_y', c='left paw_likelihood', data=dlc, cmap='gist_rainbow')
axs[1].set_title('Left paw')

### Seaborn

In [None]:
import seaborn as sns

In [None]:
sns.scatterplot(data=dlc, x='nose_x', y='nose_y', hue='nose_likelihood', size='left paw_likelihood', palette='viridis')

In [None]:
# pandas correlation matrix
dlc.corr()

In [None]:
sns.heatmap(dlc.corr(), label=True, center=0, cmap='seismic', annot=True)  # make a heatmap of cross-correlations

In [None]:
sns.lineplot(data=dlc)

In [None]:
epm = pd.read_feather('../files/EPM_data.feather')
epm.columns = epm.columns.get_level_values(0)
epm.head()

### Transforming data from wide into long-table format

While many of us are used to collecting data in "wide format" in which different kinds of measurements are simply added as additional columns, this format quickly breaks down if we're dealing with multidimensional data (imagine we were collecting both behavioural performance values for each mouse per day as well as their respective weight). To get the most out of data frames, we need to transform data frames into a long format then, in which the kind of measurement is specified in one column, and the values are collected in another. This enables enhanced data interrogation and statistics.

In [None]:
epm_long = epm.melt(var_name='measurement', id_vars=['Group', 'Mouse', 'Opto'])  # transformation into long format
epm_long.head()

In [None]:
sns.catplot(data=epm_long, kind='bar', hue='Opto', row='Group', col='measurement', y='value')

## Interactive plotting

In [None]:
dlc.plot(backend='plotly')  # set pandas plotting backend to the interactive visualisation platform 'plotly'

In [None]:
dlc.plot(backend='plotly', kind='box')

In [None]:
dlc.plot(backend='plotly', kind='scatter')

# Statistics

## Classical statistics

In [None]:
dlc  # let's have another look at our data frame

In [None]:
dlc.describe()  # pandas gives us basic statistics, ignoring NA values

Pingouin offers a wide range of test statistics that are both easy and intuitive to deploy and interpret; it also offers some nice plotting options. All functions can be found here: https://pingouin-stats.org/build/html/api.html#

In [None]:
import pingouin as pg
pg.ttest(dlc['nose_x'], dlc['left paw_x'])  # run a simple independent t-test between nose and paw x-values

In [None]:
pg.ttest(dlc['nose_x'], dlc['left paw_x'], paired=True)  # assuming these are paired samples

In [None]:
pg.corr(dlc['nose_x'], dlc['nose_y'])

In [None]:
plt.ioff()  # turns automatic display of plots off (otherwise plots below are shown twice)

In [None]:
pg.plot_shift(dlc['nose_x'], dlc['left paw_x'])  # pingouin can also do some plotting

In [None]:
# let's make a sample dataframe and transform into long format:
import numpy as np
array = np.random.rand(20, 4)
df = pd.DataFrame(data=array, index=range(20), columns=['col1', 'col2', 'col3', 'col4'])
df['id'] = df.index  # we're adding an 'id' column based on the data frame index
df_long = df.melt(var_name='measurement', id_vars='id')  # transformation into long format
df_long

In [None]:
plt.ion()

In [None]:
pg.plot_paired(data=df_long, within='measurement', subject='id', dv='value')  # let's plot the data pretending we have four measurements from 20 subjects
plt.show()

In [None]:
pg.rm_anova(data=df_long, within='measurement', subject='id', dv='value')  # running a repeated-measures ANOVA across measurements

In [None]:
pg.pairwise_tests(data=df_long, within='measurement', subject='id', dv='value', padjust='bonf')  # post-hoc tests using Bonferroni correction

# Exercise:
Are there differences in the time mice spent in the open arm between times with and without optogenetic stimulation? Are there differences between ChR-injected and control mice? Is there an interaction?
Investigate the EPM dataframe by plotting and running statistics on it.