# Dynamical Neuroscience in Ukraine Academy: Day 6
# Real data with Python
Twitter: @mmyros

__Content creator:__ Maxym Myroshnychenko

__Content reviewer:__ Anneke Duin

__Data ownership:__ UCL Cortexlab data.cortexlab.net

In [1]:
# @title Helper functions
# Install packages
# Warning:the following two commands will not work on windows. use pip install but without exclamation mark >/dev/null
!pip install -q -U hvplot bokeh
!pip install -q matplotlib pandas xarray numpy tqdm spykes
import os

import pandas as pd
import xarray as xr
import requests
from pathlib import Path
import zipfile


def download_data():
    print('Downloading data... Please wait. Should take less than 1 min')
    # Get link:
    r = requests.get('http://data.cortexlab.net/singlePhase3/data/dataset.zip')
    path = Path('data/dataset.zip')
    path.parent.mkdir(exist_ok=True)
    if not path.exists():
        with open(path, "wb") as fid:
            # Write out content of link:
            fid.write(r.content)
    # Unzip
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(path.parent)


def load_spikes_from_phy(path_to_data='/Users/myroshnychenkm2/Downloads/dataset/', sampling_frequency=30000):
    """
    Get spikes from a kilosort/phy result folder
    :param path_to_data:
    :param sampling_frequency:
    :return:
    :id: neuron id, 1xN
    :ts: corresponding spiketime, 1xN
    """
    groupfname = os.path.join(path_to_data, 'cluster_groups.csv')
    groups = pd.read_csv(groupfname, delimiter='\t')

    # load spike times and cluster IDs
    with open(path_to_data + 'spike_clusters.npy', 'rb') as f:
        ids = np.load(f).flatten()
    with open(path_to_data + 'spike_times.npy', 'rb') as f:
        ts = np.load(f).flatten()

    # Create the list of our "good" labeled units
    ids_to_take = groups[(groups.group == 'good')].cluster_id
    # Find which spikes beloing to our "good" groups
    spikes_to_take = []
    for i in tqdm(ids_to_take, desc='Selecting only good spikes'):
        spikes_to_take.extend((ids == i).nonzero()[0])
    # only take spikes that are in our list
    ids = np.array(ids[spikes_to_take])
    ts = np.array(ts[spikes_to_take]).astype(float) / sampling_frequency

    return ids, ts


def bin_neuron(spike_times, bin_size=.100, window=None):
    """
    Make binned raster for a single neuron
    :param spike_times:
    :param bin_size: in sec
    :param window:
    :return:
    """
    if window is None:
        window = [0, spike_times.max()]
    bins = np.arange(window[0], window[1] + bin_size, bin_size)
    return np.histogram(spike_times, bins)[0]


download_data()

Downloading data... Please wait. Should take less than 1 min


In [2]:
# Import basic libraries
import matplotlib.pylab as plt
import numpy as np
from tqdm import tqdm

In [3]:
Neuron_IDs_huge, spike_times_all_neurons_huge = load_spikes_from_phy('data/')

Selecting only good spikes: 100%|██████████| 242/242 [00:03<00:00, 66.29it/s]


# Extra credit 1
Plot a neuron's ISI distribution
Review: What are interspike intervals (ISIs)? We covered this in lecture

In [4]:
id_of_interest = Neuron_IDs_huge[241]
one_neuron_spike_times = spike_times_all_neurons_huge[Neuron_IDs_huge == id_of_interest]


NameError: name 'Neuron_IDs' is not defined

In [None]:
isis=np.diff(one_neuron_spike_times)
x=np.arange(0, 4, .01)

plt.hist(isis, bins=x, density=True)
plt.xlabel('Bins of interspike intervals')
plt.ylabel('Count per bin');

**Questions**
1. Are there more long intervals between spikes, or short ones?
2. During periods of high neuronal activity, should there be more long intervals, or short ones?

In [None]:
# TODO for students:
# Find the mean of ISIs in this neuron

In [None]:
# to_remove solution
sum(isis)/len(isis)

**Questions**
1. How does this describe our ISI distribution? How would the mean ISI change for a slower neuron?
A faster neuron?

# Extra credit 2
Fit a statistial model to reproduce a neuron's ISI distribution

Prerequisites:
1. Understand the concept of distribution
1. Play with sliders at https://distribution-explorer.github.io/continuous/gamma.html#pdf-and-cdf-plots

Questions:
1. Look at the PDF plot (left). Does it resemble the ISI distribution at any slider positions? What are they?
1. What does changing gamma distribution's mean (alpha, a.k.a. shape) do?
1. What about its variance (beta, a.k.a. scale)?


In [None]:
# @title Maintenance
# Version for Colab:
!pip install -q numpyro@git+https://github.com/pyro-ppl/numpyro arviz
# Version for Python on you own computer:
# !pip install numpyro jax jaxlib arviz scipy
from jax import random
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS
import arviz as az
import numpyro
import scipy.stats as stats

In [None]:
def model(isis):
    # Our initial broad guesses at scale and shape:
    scale = numpyro.sample('scale', dist.Gamma(1., 4))
    shape = numpyro.sample('shape', dist.Gamma(1., 4))
    # Provide obvserved data:
    numpyro.sample('obs', dist.Gamma(scale, shape), obs=isis)


# Try to reproduce ISIs by drawing some samples from our model.
# It will arrive at an estimate of scale and shape
mcmc = MCMC(NUTS(model), num_warmup=1000, num_samples=2000)
mcmc.run(random.PRNGKey(0),isis)

# Convert to dataarray
data = az.from_numpyro(mcmc)
# Take a mean
best_guess=data.posterior.mean(['chain','draw'])

In [None]:
best_guess

In [None]:
isi_fit=stats.gamma.pdf(x=x,
                    a=best_guess['scale'],
                    scale=best_guess['shape'])

plt.plot(x, isi_fit)
plt.hist(isis, bins=x, density=True);
