# Slope Analysis

This project use the change of fluorecent intensity slope to identify responders from calcium imaging experiment.

## Analysis Steps

The `getBaselineAndMaxStimulationSlopeFromCSV` function smoothes the raw data by the moving window decided by `filterSize`, and analyzes the smoothed Ca intensity in an CSV and returns baseline slope and drug slope.

The _slope of baseline_ is calculated as the linear regression slope during the 3 minutes period before stimulation onset.

In addition, the smoothed data are separated into segments which n = regressionSize data points are included. The linear regression slope is then calculated for each segment. 

The _peak slope of stimulation_ is the most negative slope during the chosen stimulation period.

## Set-Up the Environment

In [None]:
%load_ext autoreload
import sys
sys.path.append("../src")
import os
import glob
import slopeTools
import plotTools
import statsTools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
from scipy.optimize import curve_fit

# Pandas DataFrame as a Database

This script creates a single dataframe containing AFU data for every structure in all experiments. 

Columns define group, drug, distance, structure, etc.

This large dataframe can be queried as needed, or exported as a CSV.

In [None]:
database = pd.DataFrame(columns = ['Group', 'Distance', 'MCN','Sniffer','AFU'])

def addGroup(folderPath, groupName):    
    filePaths = glob.glob(folderPath+"/*.xls")
    filePaths = [x for x in filePaths if x.endswith("um.xls")]
    for filePath in filePaths:
        fileName = os.path.basename(filePath)

        addExperiment(filePath, groupName)
        
def addExperiment(filePath, groupName):    
    global database   #global reads a variable outside the function
    df = pd.read_csv(filePath, delimiter="\t")
    roiNames = df.columns[1:] #return to the column labels 
    for roiName in roiNames:
        mcn, distance, sniffer = roiName.split(".")
        distance = int(distance.replace("um", ""))
        afu = df[roiName].values
        row = {'Group': groupName, 'Distance': distance, 'MCN': mcn, 'Sniffer':sniffer, 'AFU':afu}
        database = database.append(row,ignore_index = True)
        
addGroup(R"X:\Data\OT-Cre\OT-GCaMP-nonspecific\04-03-19 evoke OT\04-30-2020 Cs-GLU analyze", "CsGlu")
addGroup(R"X:\Data\OT-Cre\OT-GCaMP-nonspecific\04-03-19 evoke OT\04-30-2020 K-GLU analyze", "KGlu")
addGroup(R"X:\Data\OT-Cre\OT-GCaMP-nonspecific\04-03-19 evoke OT\04-30-2020 L368 - Cs analyze", "L368")

database

# Convert raw AFU into dF/F (%)

In [None]:
baselineStartIndex = 20
treatmentStartIndex = 30
experimentPeriod = 5/60 #min
dFoFs = []
analyzed = database.copy()

analyzed["BaselineAFUMeans"] = [np.mean(x[baselineStartIndex:treatmentStartIndex]) for x in analyzed["AFU"].values]
for i in range(len(analyzed["AFU"])):
    dFoF = (analyzed["AFU"][i]-analyzed["BaselineAFUMeans"][i])*100/analyzed["BaselineAFUMeans"][i]
    dFoFs.append(dFoF)
    
analyzed["Raw dF/F (%)"] = dFoFs

analyzed = analyzed.drop(columns=["AFU", "BaselineAFUMeans"], axis=1)


analyzed

# Smooth raw data by filtersize

In [None]:
filterSize = 5
regressionSize = 10

length = [len(x) for x in analyzed["Raw dF/F (%)"].values]
maxLength = max(length)
time = np.arange(maxLength)*experimentPeriod 
smoothTimes = statsTools.smoothY(time, filterSize)
analyzed["dF/F (%)"] = [statsTools.smoothY(x, filterSize) for x in analyzed["Raw dF/F (%)"].values]
display(analyzed)



# Calculate the slope difference for each sniffer cell

# Plot the calcium signal and slops over time of individual cell

In [None]:
baselineStartIndex = 15  # index is right shifted for 2.5 indexes after smoothing
treatmentStartIndex = 30
treatmentDuration = 2

baselineSlopes =[]
stimulationSlopes =[]
slopeDifference = []
baselineTime = smoothTimes[baselineStartIndex:treatmentStartIndex]


for index in range(len(analyzed["dF/F (%)"].values)):
    dFoF = analyzed["dF/F (%)"].values[index]


    baselineSlope, baselineIntercept, r, p, stdErr = scipy.stats.linregress(baselineTime, dFoF[baselineStartIndex:treatmentStartIndex])
    baselineRegressionXs = np.linspace(smoothTimes[baselineStartIndex], smoothTimes[treatmentStartIndex])
    baselineRegressionYs = baselineRegressionXs * baselineSlope + baselineIntercept
    plt.figure(figsize=(8, 6))
    ax1 = plt.subplot(211)
    plt.plot(smoothTimes[:len(dFoF)], dFoF, 'o-', color='b', )
    plt.plot(baselineRegressionXs, baselineRegressionYs, color='b', ls='--')
    plt.title(analyzed["Group"].values[index]+"-"+str(analyzed["Distance"].values[index])+"-"+analyzed["MCN"].values[index]+analyzed["Sniffer"].values[index])
    plt.ylabel("dF/F (%)")
    
    
    plt.axvspan(smoothTimes[treatmentStartIndex], smoothTimes[treatmentStartIndex]+treatmentDuration, color='r', alpha=.1)
    baselineSlopes.append(baselineSlope)
    
    plt.subplot(212, sharex = ax1)
    
    plt.ylabel("slope (%/min)")
    
    segments = statsTools.getMovingWindowSegments(dFoF, regressionSize)
    segSlopes = slopeTools.getAllSegmentSlopes(segments, experimentPeriod)
    #segSlopesList.append(segSlopes)
    segTimes = statsTools.smoothY(smoothTimes, filterSize)
    plt.axvspan(segTimes[treatmentStartIndex], segTimes[treatmentStartIndex]+treatmentDuration, color='r', alpha=.1)
    treatmentStartTime = segTimes[treatmentStartIndex]
    treatmentEndTime = treatmentStartTime + treatmentDuration
    treatmentSlopeMax = statsTools.rangeMax(segSlopes, segTimes, treatmentStartTime, treatmentEndTime)
    treatmentSlopeMaxIndex = segSlopes.index(treatmentSlopeMax)
    treatmentSlopeMaxTime = segTimes[treatmentSlopeMaxIndex]
    stimulationSlopes.append(treatmentSlopeMax)   
    plt.plot(segTimes[:len(segSlopes)], segSlopes, 'o-', color='r', )
    plt.axhline(baselineSlope, color='b', ls='--')
    plt.axhline(treatmentSlopeMax, color='r', ls='--')
    
    slopeDifference.append(treatmentSlopeMax-baselineSlope)

analyzed["Treatment Slop (%)"] = stimulationSlopes
analyzed["SlopeDifference (%)"] = slopeDifference



In [None]:
analyzed
analyzed.to_csv('AllSniffer.csv')

# Plot response rate by groups

In [None]:
def identifyResponders(df, groupName, distance, slopeDifferenceThreshold):
    """
    Given a dataframe, group, and distance, return just the cells that change
    more than the given threshold.
    """
    matching = df.loc[analyzed['Group'] == group]
    matching = matching.loc[matching['Distance'] == distance]
    totalCellCount = len(matching)
    matching = matching.loc[matching['SlopeDifference (%)'] > slopeDifferenceThreshold] 
    matching = matching.loc[matching['Treatment Slop (%)'] > 5]
    responderCount = len(matching)
    return matching, totalCellCount, responderCount
    

groups = ["KGlu", "CsGlu", "L368"]
#groups = [ "KGlu","L368"]
distances = [25, 50, 75, 100, 125]
threshold = 10


for group in groups:
    responseByDistance = []
    for distance in distances:
        matching, totalCellCount, responderCount = identifyResponders(analyzed, group, distance, threshold)
        #sniffers = sorted(list(set(matching['MCN'])))
        responseRate = responderCount*100/totalCellCount
        responseByDistance.append(responseRate)
        responseDftemperol = pd.DataFrame({'Distance (µm)': [distance], 'Group': [group],'Responder': [responderCount], 'non-responder': [totalCellCount-responderCount]})
        display(responseDftemperol)
    plt.plot(distances, responseByDistance, 'o-', label=group)

plt.legend()
plt.ylabel("Response Rate (%)")
plt.xlabel("Distance (µm)")
plt.title(f"Threshold = {threshold}%")
plt.show()





# Group sniffers by MCNs

In [None]:
groups = ["KGlu", "CsGlu", "L368"]
distances = [25, 50, 75, 100, 125]
columnsForCellCounts = ['Group', 'MCN', 'Distance (µm)', 'Sniffer Count']
cellCounts = pd.DataFrame(columns = columnsForCellCounts)

for group in groups:
    cellCounts = pd.DataFrame(columns = columnsForCellCounts)
    mcnCount = []
    for distance in distances:
        matching = analyzed.loc[analyzed['Group'] == group]
        matching = matching.loc[matching['Distance'] == distance]
        MCNs = sorted(list(set(matching['MCN'])))
        mcnCount.append(len(MCNs))
        for MCN in MCNs:
            matchingSniffer = matching.loc[matching['MCN'] == MCN]
            matchingSnifferCount = len(matchingSniffer)
            cellCount = pd.DataFrame({'Group': [group], 'MCN': [MCN], 'Distance (µm)': [distance], 'Sniffer Count': [matchingSnifferCount]})
            #print(cellCount)
            cellCounts = cellCounts.append(cellCount, ignore_index=True)
            
            #print(f"{group}-{MCN} has {matchingSnifferCount} sniffers at {distance} um.")
    display(cellCounts)        
    plt.plot(distances, mcnCount, 'o-', label=group)

plt.legend()
plt.ylabel("n number")
plt.xlabel("Distance (µm)")
plt.title(f"MCN number at each distance")

# Delete sniffers that show severe exponential decay 

In [None]:
def monoExp(x, m, t, b):
    return m * np.exp(-t * x) + b

def calcRsquared(actual, expected):
    """Calculate rSquared discretely so we don't need sklearn."""
    squaredDiffs = np.power(actual - expected, 2)
    squaredDiffsFromMean = np.power(actual - np.mean(actual), 2)
    rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)
    return rSquared

def exponentialDecayFit(xs, ys):

    xs = np.array(xs)
    ys = np.array(ys)

    #plt.plot(xs, ys, '.')
    
    params, cv = curve_fit(monoExp, xs, ys,bounds=(-40, [2000, 30, 50]))
    m, t, b = params
    sampleRate = 20_000 # Hz
    tauSec = (1 / t) / sampleRate

    expCurveFitted = monoExp(xs, *params)
    expRsquared = calcRsquared(ys, expCurveFitted)
    #plt.plot(xs, monoExp(xs, m, t, b), '--', label="fitted")
    #print(expRsquared)
    return expRsquared


indexes =[]
for ysIndex in range(len(analyzed["dF/F (%)"])):
    ys = analyzed["dF/F (%)"].values[ysIndex]
    xs = smoothTimes[:len(ys)]
    rSquare = exponentialDecayFit(xs, ys)

    if rSquare > 0.90:
        indexes.append(ysIndex)
        #analyzedCleaned = analyzed.drop(index=indexes, axis=1)


#analyzedCleaned
