# Spatial distribution of L5 cells in M1/M2

## List data files

In [75]:
import os
import openpyxl as px
import numpy as np
import matplotlib.pyplot as plt
from sklearn import decomposition as deco
from matplotlib.mlab import PCA as mlabPCA
import scipy
from scipy.spatial import ConvexHull
import math
from tabulate import tabulate
import xlsxwriter
import scipy.stats.stats as st


path_data=r'data/'    
path_images='images/'
log_filename ="input/10x_depth-log_blank.xlsx"
z_resolution = 2.89 #um thickness of the 3D slice

def list_files(path):
    # returns a list of names (with extension, without full path) of all files 
    # in folder path
    files = []
    for name in os.listdir(path):
        if os.path.isfile(os.path.join(path, name)):
            files.append(name)
    return files

def get_xydata(ws):
    # returns xy data 2D array from the work sheet ws
    last_row = ws.get_highest_row() + 1
    sheetdata = []
    for row in range(2,last_row):
            for col in ["A","B"]:
                cellname=col+str(row)
                if ws[cellname].value:
                    sheetdata.append(ws[cellname].value)

    xydata = np.reshape(sheetdata, (len(sheetdata)/2, 2))
    xydata.shape
    return xydata

def areatype(sheet):
    # define area type from the text in the sheet name
    if "m1" in sheet:
        return "M1"
    elif "M1" in sheet:
        return "M1"
    else:
        return "M2"

def genotype(file):
    # define genotype from the text in the file name
    if "CKO" in file:
        return "CKO"
    elif "WT" in file:
        return "WT"
    else:
        return "NA"    
    
def plot_pcadata(path_images,cadata,hull,file,sheet):
    # plot pcadata and hull around
    fig = plt.figure()
    ay = fig.add_subplot(1, 1, 1)
    if "M1" in areatype(sheet):
        ay.scatter(pcadata[:,0], pcadata[:,1], color="blue")
    else:
        ay.scatter(pcadata[:,0], pcadata[:,1], color="red")

 

    for simplex in hull.simplices:
        plt.plot(pcadata[simplex, 0], pcadata[simplex, 1], 'k-')
    plt.grid()
    plt.axis('equal')
    plt.title(file+"   ["+sheet+"]", fontsize=10)


    plt.axis([-1000, 1000, -1000, 1000])
    plt.savefig(path_images+file+'_'+sheet+'.png', bbox_inches='tight')
    
def file_log(log_filename, path_data):
    # write preliminary file log in file
    workbook_depth = xlsxwriter.Workbook(log_filename)
    worksheet_depth = workbook_depth.add_worksheet()


    row = 0
    col = 0

    # executing the main script
    files = list_files(path_data)

    for file in files:
        worksheet_depth.write(0 + row, 0, file)
        row = row +1

    workbook_depth.close()

## Create a file log

In [76]:
# this is just to create the log of all files and then to use it to populate 
file_log(log_filename, path_data)

In [77]:
wb_depth = px.load_workbook("input/10x_depth-log.xlsx")
ws_depth = wb_depth["Sheet1"]

last_row_depth = ws_depth.get_highest_row() + 1
logdata = []

for row in range(2,last_row_depth):
        for col in ["A","B","C"]:
            cellname=col+str(row)
            logdata.append(ws_depth[cellname].value)
alllog = np.reshape(logdata, (len(logdata)/3, 3))
print(alllog)

[['10x position_EMXCKO1-slice-3.xlsx' '24' '16']
 ['10x position_EMXCKO1-slice-5.xlsx' '23' '23']
 ['10x position_EMXCKO2-slice-3.xlsx' '26' '15']
 ['10x position_EMXCKO2-slice-4.xlsx' '29' '19']
 ['10x position_EMXCKO2-slice-5.xlsx' '24' '23']
 ['10x position_EMXCKO3-slice-3-stitch2.xlsx' '24' '16']
 ['10x position_EMXCKO3-slice-4.xlsx' '26' '25']
 ['10x position_EMXCKO4-column-1-slice-7.xlsx' '18' '16']
 ['10x position_EMXCKO4-column-1-slice-8.xlsx' '15' '16']
 ['10x position_EMXCKO4-column-2-slice-7-stitch2.xlsx' '17' '16']
 ['10x position_EMXCKO4-column-2-slice-8.xlsx' '24' '16']
 ['10x position_EMXCKO5-column-1-slice-3.xlsx' '18' '23']
 ['10x position_EMXCKO5-column-2-slice-1.xlsx' '20' '17']
 ['10x position_EMXCKO5-column-2-slice-2.xlsx' '22' '16']
 ['10x position_EMXCKO5-column-2-slice-3.xlsx' '26' '25']
 ['10x position_WT1-N5-column-1-slice-1.xlsx' '29' '16']
 ['10x position_WT1-N5-column-2-slice-1.xlsx' '25' '15']
 ['10x position_WT1-N5-column-2-slice-2.xlsx' '40' '22']
 ['10x

## Open selected sheet 

In [79]:
# preparing the file for the results to be written to
analysisdata = []
analysisdata.append( ["Filename", "Area", "Neurons", "Density", "H", "L", "SD_H", "SD_L", "Genotype"] )

workbook = xlsxwriter.Workbook('results/10x_results.xlsx')
worksheet = workbook.add_worksheet()

for roww in range(0,len(analysisdata[0])):
    worksheet.write(0, 0 + roww, analysisdata[0][roww])

row = 1
col = 0

# executing the main script
files = list_files(path_data)
fileno = 0

for file in files:
    wb = px.load_workbook(path_data+file)
    print("")
    print("=================================================")
    print(file)
    sheets = wb.get_sheet_names()
 
    z_depth=alllog[fileno][1]
    print(z_depth)


    for sheet in sheets:
        # listing the sheets in the doc
        print(sheet)
        ws = wb[sheet]
        # extract data from the sheet
        xydata = get_xydata(ws)
        
        # perform PCA
        pca = deco.PCA(2) # n_components is the components number after reduction
        pcadata = pca.fit(xydata).transform(xydata)

        # LH is dimetion array keeping ranges of the area covered by cells
        LH = np.ptp(pcadata, axis=0)
        print("NEURONS: "+str(xydata.shape[0]))
        print("LENGTH: "+str(LH[0]))
        print("HEIGHT: "+str(LH[1]))
        # Calculate ConvexHull
        hull = ConvexHull(pcadata, qhull_options="FA")
        #print(hull)
        SD_H = np.std(pcadata[0])
        SD_L = np.std(pcadata[1])
        
        print("SD_H: "+str(SD_H))
        print("SD_L: "+str(SD_L))
        print ("H_skewness:", st.skew(pcadata[0], bias=False))
    
        
        densityvolume = SD_L*SD_H*math.pi*int(z_resolution)*int(z_depth)
        
        #density calculation
        print("DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: "+str(xydata.shape[0]/(densityvolume)))
        analysisdata.append( [file[13:], areatype(sheet), xydata.shape[0], xydata.shape[0]/(densityvolume), LH[0], LH[1], SD_H, SD_L, genotype(file)] )

        print("")
        plot_pcadata(path_images,pcadata,hull,file,sheet)
        
        worksheet.write(row, col + 0, file[13:])
        worksheet.write(row, col + 1, areatype(sheet))
        worksheet.write(row, col + 2, xydata.shape[0])
        worksheet.write(row, col + 3, xydata.shape[0]/(densityvolume))
        worksheet.write(row, col + 4, LH[0])
        worksheet.write(row, col + 5, LH[1])
        worksheet.write(row, col + 6, SD_H)
        worksheet.write(row, col + 7, SD_L)
        worksheet.write(row, col + 8, genotype(file))
        
        row += 1
    
    
    fileno=fileno+1
    print("")

print(tabulate(analysisdata, tablefmt="fancy_grid",headers="firstrow"))
workbook.close()


10x position_EMXCKO1-slice-3.xlsx
24
1-M1
NEURONS: 192
LENGTH: 1270.75128879
HEIGHT: 514.57825698
SD_H: 339.930281575
SD_L: 197.545094335
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 1.89606846578e-05

1-M2
NEURONS: 62
LENGTH: 858.12890116
HEIGHT: 770.249828369
SD_H: 50.0471798046
SD_L: 134.351641523
H_skewness: 8.126047694861378e-16
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 6.11473996064e-05

2-M1
NEURONS: 196
LENGTH: 1350.13621237
HEIGHT: 557.789068854
SD_H: 441.881793028
SD_L: 128.03547404
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 2.29735727457e-05

2-M2
NEURONS: 48
LENGTH: 760.261830496
HEIGHT: 644.799444471
SD_H: 223.091311193
SD_L: 19.769848624
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 7.21712333848e-05



10x position_EMXCKO1-slice-5.xlsx
23
1-m1
NEURONS: 122
LENGTH: 1301.18957527
HEIGHT: 433.951601712
SD_H: 157.905131405
SD_L: 185.873654495
H_skewness: -5.322233423353847e-16
DENSITY NOT ADJUSTED TO BRAIN SLICE TH

  if self._edgecolors == str('face'):



NEURONS: 52
LENGTH: 1006.19622771
HEIGHT: 777.367253361
SD_H: 223.811140888
SD_L: 9.14776504597
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 0.000155472443662

2-M1
NEURONS: 140
LENGTH: 925.371382074
HEIGHT: 444.344063661
SD_H: 287.754681424
SD_L: 109.942383848
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 2.70886443998e-05

2-M2
NEURONS: 79
LENGTH: 1012.03117956
HEIGHT: 912.065264146
SD_H: 306.956734539
SD_L: 40.8170815886
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 3.85971074836e-05



10x position_EMXCKO2-slice-4.xlsx
29
1-m1
NEURONS: 148
LENGTH: 1399.52184686
HEIGHT: 382.738802894
SD_H: 310.336107831
SD_L: 176.76965229
H_skewness: 0.0
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 1.48062075348e-05

1-m2
NEURONS: 42
LENGTH: 827.76912482
HEIGHT: 409.803620679
SD_H: 200.915836978
SD_L: 42.9895318694
H_skewness: 1.1483059559099953e-16
DENSITY NOT ADJUSTED TO BRAIN SLICE THICKNESS: 2.6686679341e-05

2-m1
NEURONS: 136
LENGTH: 1409.6