In [1]:
import numpy as np
import pandas as pd
import sklearn.linear_model
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [2]:
pd.set_option('display.max_columns', None)
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

-------

# Preprocess Data

Reformat and filter original data set to meet the expectations of our pipeline.

In [3]:
rawDataDir = './data/kinsler-2020-elife-data/'

In [4]:
outdir     = './data/kinsler-2020-preprocessed-data/'

## Load & organize raw data

##### Load the raw Kinsler et al. barcode counts data table

(rows: variants; cols: barcode counts for each experimental treatment/rep/timept and variant metadata)

In [5]:
rawCountsTable = pd.read_csv(rawDataDir+'BarcodeCounts_merged+flaskswapcorrected_092519_withBCinfo.csv')
rawCountsTable

Unnamed: 0,1.4%-R1-T1,1.4%-R1-T2,1.4%-R1-T3,1.4%-R1-T4,1.4%-R2-T1,1.4%-R2-T2,1.4%-R2-T3,1.4%-R2-T4,1.4%-T0,1.5%-R1-T1,1.5%-R1-T2,1.5%-R1-T3,1.5%-R1-T4,1.5%-R2-T1,1.5%-R2-T2,1.5%-R2-T3,1.5%-R2-T4,1.5%-T0,1.6%-R1-T1,1.6%-R1-T2,1.6%-R1-T3,1.6%-R1-T4,1.6%-R2-T1,1.6%-R2-T2,1.6%-R2-T3,1.6%-R2-T4,1.6%-T0,1.7%-R1-T1,1.7%-R1-T2,1.7%-R1-T3,1.7%-R1-T4,1.7%-R2-T1,1.7%-R2-T2,1.7%-R2-T3,1.7%-R2-T4,1.7%-T0,1.8%-R1-T1,1.8%-R1-T2,1.8%-R1-T3,1.8%-R1-T4,1.8%-R2-T1,1.8%-R2-T2,1.8%-R2-T3,1.8%-R2-T4,1.8%-T0,13-0,13-0_pool,13-1-1,13-1-2,13-1-3,13-2-1,13-2-2,13-2-3,13-3-1,13-3-2,13-3-3,13-4-1,13-4-2,13-4-3,18-1-1,18-1-2,18-1-3,18-2-1,18-2-2,18-2-3,18-3-1,18-3-2,18-3-3,18-4-1,18-4-2,18-4-3,2.5%-R1-T1,2.5%-R1-T2,2.5%-R1-T3,2.5%-R1-T4,2.5%-R2-T1,2.5%-R2-T2,2.5%-R2-T3,2.5%-R2-T4,2.5%-T0,20-1-1,20-1-2,20-1-3,20-2-1,20-2-2,20-2-3,20-3-1,20-3-2,20-3-3,20-4-1,20-4-2,20-4-3,21+wt,21-0-1,21-0-2,21-0-4,21-1-1,21-1-2,21-1-3,21-2-1,21-2-2,21-2-3,21-3-1,21-3-2,21-3-3,21-4-1,21-4-2,21-4-3,23-0,23-1-1,23-1-2,23-1-3,23-2-1,23-2-2,23-2-3,23-3-1,23-3-2,23-3-3,23-4-1,23-4-2,23-4-3,3-0,3-1-1,3-1-2,3-1-3,3-2-1,3-2-2,3-2-3,3-3-1,3-3-2,3-3-3,3-4-1,3-4-2,3-4-3,6-0,6-1-1,6-1-2,6-1-3,6-2-1,6-2-2,6-2-3,6-3-1,6-3-2,6-3-3,6-4-1,6-4-2,6-4-3,A0,A1,A2,A3,A4,AA0,AA1,AA2,AA3,AA4,B0,B1,B2,B3,B4,BB0,BB1,BB2,BB3,BB4,Ben0.4-R1-T1,Ben0.4-R1-T2,Ben0.4-R1-T3,Ben0.4-R1-T4,Ben0.4-R2-T1,Ben0.4-R2-T2,Ben0.4-R2-T3,Ben0.4-R2-T4,Ben0.4-T0,Ben10-R1-T1,Ben10-R1-T2,Ben10-R1-T3,Ben10-R1-T4,Ben10-R2-T1,Ben10-R2-T2,Ben10-R2-T3,Ben10-R2-T4,Ben10-T0,Ben2-R1-T1,Ben2-R1-T2,Ben2-R1-T3,Ben2-R1-T4,Ben2-R2-T1,Ben2-R2-T2,Ben2-R2-T3,Ben2-R2-T4,Ben2-T0,C0,C1,C2,C3,C4,CC0,CC1,CC2,CC3,CC4,D0,D1,D2,D3,D4,DD0,DD1,DD2,DD3,DD4,E0,E1,E2,E3,E4,EE0,EE1,EE2,EE3,EE4,F0,F1,F2,F3,F4,FF0,FF1,FF2,FF3,FF4,G0,G1,G2,G3,G4,GG0,GG1,GG2,GG3,GG4,H0,H1,H2,H3,H4,I0,I1,I2,I3,I4,J0,J1,J2,J3,J4,K0,K1,K2,K3,K4,L0,L1,L2,L3,L4,M0,M1,M2,M3,M4,N0,N1,N2,N3,N4,O0,O1,O2,O3,O4,P0,P1,P2,P3,P4,Q0,Q1,Q2,Q3,Q4,R0,R1,R2,R3,R4,S0,S1,S2,S3,S4,T0,T1,T2,T3,T4,Tneg,U0,U1,U2,U3,U4,V0,V1,V2,V3,V4,W0,W1,W2,W3,W4,X0,X1,X2,X3,X4,Y0,Y1,Y2,Y3,Y4,Z0,Z1,Z2,Z3,Z4,additional_muts,barcode,class,gene,ploidy,type
0,174.0,109.0,85.0,68.0,3.0,91.0,56.0,42.0,22.0,74.0,85.0,64.0,70.0,66.0,69.0,66.0,68.0,51.0,121.0,113.0,120.0,245.0,102.0,212.0,146.0,116.0,56.0,130.0,227.0,122.0,109.0,108.0,141.0,85.0,142.0,95.0,102.0,181.0,184.0,216.0,176.0,239.0,45.0,42.0,69.0,2.0,10.0,512.0,700.0,594.0,594.0,403.0,104.0,319.0,299.0,361.0,51.0,70.0,59.0,90.0,133.0,122.0,40.0,65.0,481.0,365.0,102.0,574.0,486.0,516.0,380.0,27.0,19.0,17.0,24.0,10.0,11.0,7.0,6.0,20.0,773.0,2336.0,2192.0,2785.0,2493.0,2199.0,1855.0,1569.0,1775.0,954.0,1495.0,1990.0,0.0,3276.0,3105.0,3372.0,962.0,760.0,1889.0,1883.0,574.0,1691.0,985.0,1144.0,1945.0,1025.0,1070.0,1302.0,2786.0,2019.0,2406.0,1856.0,2104.0,2892.0,2817.0,1529.0,2292.0,1772.0,1172.0,1750.0,1686.0,111.0,244.0,376.0,223.0,183.0,164.0,177.0,81.0,95.0,104.0,101.0,69.0,79.0,212.0,190.0,140.0,1.0,65.0,1.0,38.0,40.0,50.0,51.0,23.0,24.0,22.0,5430.0,6191.0,8361.0,574.0,2148.0,5659.0,2713.0,3435.0,198.0,236.0,18106.0,2775.0,17050.0,10057.0,18295.0,5765.0,3076.0,1751.0,167.0,193.0,25.0,21.0,29.0,42.0,34.0,22.0,32.0,45.0,15.0,52.0,18.0,46.0,10.0,0.0,36.0,36.0,46.0,0.0,80.0,74.0,0.0,84.0,47.0,37.0,80.0,104.0,63.0,2367.0,8268.0,15562.0,1846.0,11209.0,5624.0,1263.0,739.0,272.0,312.0,2722.0,5916.0,3468.0,3720.0,2945.0,6186.0,1103.0,520.0,662.0,395.0,5251.0,11213.0,3158.0,908.0,4044.0,2456.0,7453.0,3962.0,1371.0,237.0,3315.0,8696.0,4901.0,7853.0,4079.0,4798.0,12477.0,21019.0,3361.0,1481.0,1065.0,5289.0,1933.0,249.0,495.0,3758.0,1879.0,442.0,299.0,607.0,1239.0,4131.0,1553.0,150.0,235.0,6639.0,6157.0,2544.0,380.0,466.0,1539.0,4670.0,2662.0,363.0,341.0,1013.0,9165.0,5227.0,1091.0,1569.0,9030.0,4746.0,0.0,870.0,1449.0,3110.0,10152.0,10870.0,1247.0,907.0,5903.0,11731.0,0.0,0.0,0.0,8704.0,6361.0,7231.0,780.0,1934.0,7391.0,11230.0,6963.0,1183.0,1828.0,4329.0,3765.0,4583.0,3246.0,314.0,2835.0,6727.0,3805.0,502.0,87.0,1371.0,4424.0,1987.0,1161.0,204.0,1859.0,3879.0,6424.0,1474.0,3.0,714.0,6671.0,8651.0,5340.0,1204.0,486.0,4538.0,6111.0,48518.0,1625.0,441.0,4993.0,4660.0,1320.0,3161.0,1561.0,3778.0,0.0,1979.0,169311.0,1133.0,1125.0,1940.0,5496.0,2243.0,786.0,535.0,477.0,281.0,15.0,128.0,TIP1-upstream_point_variant; YKR012C-upstream_...,53,Diploid,Diploid,Diploid,Diploid
1,7374.0,18479.0,44673.0,96279.0,104.0,13081.0,26243.0,50295.0,629.0,2713.0,8975.0,19736.0,63889.0,2161.0,8175.0,19028.0,50199.0,1102.0,5096.0,13055.0,30405.0,141123.0,3330.0,17235.0,32195.0,65247.0,975.0,3784.0,15314.0,33243.0,34457.0,3473.0,8792.0,13059.0,80332.0,1314.0,3447.0,13525.0,32522.0,97939.0,6641.0,18834.0,7138.0,19516.0,1379.0,2625.0,3914.0,43106.0,54385.0,42068.0,160273.0,107883.0,20536.0,399304.0,308884.0,267212.0,223043.0,241795.0,145296.0,22844.0,14132.0,7847.0,6260.0,14745.0,66171.0,179028.0,49741.0,171156.0,491772.0,661420.0,367527.0,1017.0,810.0,1284.0,4544.0,211.0,612.0,670.0,2104.0,328.0,5215.0,14656.0,12802.0,26566.0,24995.0,21013.0,35823.0,31769.0,25010.0,34463.0,49171.0,41128.0,0.0,23186.0,22200.0,22599.0,14470.0,11899.0,27178.0,50457.0,16668.0,36587.0,47319.0,49533.0,58512.0,79224.0,76326.0,70336.0,16449.0,18428.0,21698.0,16341.0,40070.0,42476.0,40354.0,57945.0,64251.0,43842.0,83239.0,93228.0,73539.0,48524.0,54722.0,88884.0,49070.0,102971.0,127795.0,95476.0,101137.0,187235.0,150720.0,217453.0,209290.0,229493.0,37374.0,114306.0,83526.0,167.0,186670.0,6951.0,108068.0,396925.0,568191.0,452685.0,1128412.0,370212.0,496030.0,102665.0,165765.0,422624.0,62916.0,367191.0,91956.0,98775.0,289984.0,30196.0,67673.0,200215.0,66810.0,915046.0,1357544.0,4230603.0,90000.0,122581.0,173982.0,31380.0,56512.0,39.0,15.0,14.0,30.0,41.0,23.0,13.0,33.0,69.0,8.0,3.0,13.0,3.0,1.0,3.0,10.0,6.0,0.0,218.0,56.0,1.0,21.0,129.0,19.0,28.0,85.0,496.0,36142.0,224264.0,751263.0,187575.0,1956473.0,55424.0,86131.0,31299.0,21039.0,47331.0,23585.0,121243.0,138770.0,293532.0,443175.0,49635.0,108918.0,15508.0,33281.0,52225.0,34768.0,134512.0,75757.0,39864.0,404244.0,23205.0,98207.0,54107.0,25283.0,5671.0,24826.0,123926.0,128460.0,407379.0,497347.0,37028.0,121413.0,201175.0,49571.0,27860.0,14677.0,136115.0,103853.0,25061.0,105103.0,67840.0,59385.0,17611.0,16887.0,31111.0,14822.0,123119.0,93059.0,15122.0,53362.0,69494.0,107559.0,89549.0,20215.0,55134.0,16720.0,94363.0,105430.0,23514.0,54599.0,10470.0,66131.0,22936.0,4449.0,4553.0,87303.0,43685.0,0.0,2850.0,4560.0,21635.0,7665.0,589.0,27.0,6.0,48439.0,1958.0,0.0,1.0,0.0,94463.0,59101.0,47035.0,3860.0,7283.0,81043.0,5267.0,237.0,46.0,46.0,74138.0,126185.0,189699.0,246186.0,49288.0,56031.0,284912.0,204316.0,59761.0,21219.0,20454.0,108000.0,100327.0,98361.0,20794.0,34760.0,126723.0,240864.0,122373.0,199.0,5331.0,47233.0,67560.0,227000.0,79618.0,67880.0,32584.0,42930.0,2308950.0,136251.0,42379.0,56138.0,106525.0,7687.0,12892.0,6216.0,46925.0,13.0,8215.0,390970.0,2018.0,33696.0,59033.0,1073318.0,1056267.0,562172.0,15967.0,10445.0,67856.0,3506.0,184186.0,SEH1-missense_variant; ZIP1-missense_variant; ...,151,PKA,IRA1,Haploid,stop_gained
2,461.0,256.0,200.0,152.0,3.0,271.0,117.0,65.0,118.0,196.0,177.0,122.0,112.0,140.0,203.0,131.0,119.0,165.0,421.0,267.0,254.0,419.0,261.0,508.0,288.0,236.0,160.0,330.0,460.0,227.0,243.0,325.0,278.0,137.0,336.0,302.0,286.0,423.0,425.0,426.0,471.0,549.0,69.0,97.0,268.0,15.0,46.0,2909.0,2977.0,2776.0,2582.0,1640.0,442.0,1524.0,1028.0,1036.0,195.0,161.0,209.0,466.0,681.0,613.0,149.0,331.0,2356.0,1429.0,417.0,2165.0,1460.0,1711.0,1312.0,109.0,36.0,41.0,74.0,26.0,42.0,24.0,33.0,72.0,562.0,2189.0,2077.0,2223.0,2185.0,1772.0,1335.0,1225.0,1724.0,542.0,1056.0,1680.0,0.0,2938.0,2818.0,3181.0,701.0,437.0,1650.0,1422.0,415.0,1471.0,680.0,761.0,1614.0,897.0,887.0,1146.0,2595.0,1794.0,1867.0,1674.0,1776.0,2385.0,2276.0,1117.0,1902.0,1406.0,935.0,1350.0,1448.0,677.0,1148.0,1873.0,1199.0,751.0,733.0,703.0,280.0,443.0,369.0,214.0,205.0,233.0,1389.0,961.0,553.0,0.0,289.0,3.0,163.0,149.0,218.0,173.0,114.0,64.0,57.0,4357.0,4856.0,6961.0,523.0,1654.0,4272.0,2120.0,2587.0,147.0,198.0,13083.0,2054.0,11295.0,7406.0,12487.0,4183.0,2314.0,1222.0,124.0,156.0,67.0,65.0,65.0,101.0,80.0,67.0,67.0,74.0,32.0,141.0,36.0,67.0,19.0,0.0,116.0,74.0,123.0,0.0,297.0,235.0,0.0,206.0,181.0,119.0,213.0,294.0,314.0,1697.0,5251.0,11435.0,1357.0,8488.0,4353.0,956.0,561.0,277.0,216.0,2214.0,4522.0,2632.0,2995.0,2326.0,4836.0,914.0,489.0,591.0,307.0,4169.0,8543.0,2355.0,798.0,2852.0,1800.0,5765.0,2518.0,1037.0,171.0,2569.0,6808.0,3839.0,6019.0,2907.0,3799.0,10434.0,13601.0,2604.0,1106.0,877.0,3970.0,1650.0,195.0,355.0,2810.0,1647.0,379.0,267.0,462.0,966.0,3238.0,1243.0,129.0,218.0,4786.0,4727.0,1970.0,306.0,440.0,1395.0,3453.0,2171.0,312.0,272.0,759.0,6451.0,3838.0,756.0,1250.0,6843.0,3465.0,0.0,714.0,1216.0,2523.0,8470.0,6964.0,1012.0,608.0,4609.0,10477.0,0.0,0.0,0.0,6215.0,4498.0,5200.0,605.0,1666.0,5370.0,8196.0,5068.0,1016.0,1497.0,3401.0,2931.0,3039.0,2237.0,202.0,2249.0,5060.0,2420.0,359.0,54.0,1025.0,3185.0,1375.0,881.0,97.0,1276.0,2653.0,3939.0,978.0,0.0,439.0,5721.0,7159.0,3924.0,866.0,313.0,3780.0,5084.0,31675.0,1318.0,314.0,4419.0,3943.0,1000.0,2582.0,1082.0,2480.0,0.0,1405.0,128274.0,838.0,743.0,1405.0,3214.0,1366.0,413.0,351.0,344.0,119.0,6.0,111.0,NotSequenced,262,NotSequenced,NotSequenced,NotSequenced,NotSequenced
3,4294.0,5325.0,6792.0,7633.0,57.0,4170.0,4132.0,4534.0,525.0,1546.0,2666.0,3129.0,5504.0,1437.0,2785.0,3453.0,5872.0,1019.0,3180.0,4374.0,5983.0,15873.0,2385.0,6422.0,6826.0,8019.0,842.0,2908.0,6319.0,5848.0,6153.0,2618.0,3807.0,3588.0,14075.0,1363.0,2399.0,6302.0,9329.0,17580.0,4586.0,8297.0,2010.0,3422.0,1377.0,710.0,1187.0,36510.0,32361.0,30864.0,92924.0,59293.0,14493.0,149876.0,114393.0,102630.0,45139.0,50325.0,32077.0,8375.0,9166.0,5969.0,3659.0,10151.0,57614.0,110050.0,27822.0,106658.0,206957.0,240271.0,156816.0,1062.0,626.0,855.0,2473.0,213.0,499.0,451.0,1235.0,345.0,1617.0,4669.0,4572.0,9278.0,8024.0,6011.0,8884.0,8257.0,7533.0,6847.0,9549.0,10797.0,0.0,9515.0,8650.0,9261.0,4008.0,2997.0,8336.0,14116.0,3846.0,9536.0,9158.0,10795.0,14683.0,17543.0,17963.0,16884.0,7478.0,7283.0,8379.0,6361.0,13690.0,13810.0,13057.0,14120.0,17528.0,12541.0,19420.0,20655.0,19372.0,17986.0,44047.0,69906.0,40006.0,50323.0,62321.0,48286.0,33279.0,63313.0,54598.0,61405.0,51856.0,63709.0,43657.0,60648.0,45097.0,65.0,72380.0,1437.0,39614.0,72903.0,102486.0,89767.0,113298.0,40227.0,51220.0,31310.0,50871.0,111168.0,12981.0,64484.0,32593.0,28351.0,66414.0,5804.0,9854.0,85099.0,23085.0,272459.0,289504.0,759227.0,34029.0,35398.0,40071.0,6373.0,9042.0,100.0,44.0,15.0,26.0,79.0,41.0,9.0,14.0,89.0,4.0,1.0,3.0,1.0,0.0,11.0,9.0,1.0,0.0,637.0,334.0,0.0,90.0,341.0,80.0,32.0,46.0,882.0,13032.0,86444.0,230444.0,45782.0,390609.0,20268.0,22251.0,8321.0,4822.0,7310.0,11648.0,42988.0,42505.0,73141.0,91313.0,20791.0,29997.0,5055.0,8339.0,9154.0,18407.0,67806.0,33209.0,16715.0,145974.0,9009.0,28690.0,11812.0,4228.0,806.0,13585.0,64492.0,64553.0,193034.0,213798.0,17216.0,42711.0,60135.0,12029.0,5901.0,5623.0,43059.0,28458.0,5785.0,18732.0,23773.0,17469.0,4596.0,4148.0,6269.0,6046.0,39863.0,26022.0,3529.0,9660.0,25108.0,35574.0,26487.0,4974.0,10050.0,6912.0,30660.0,29291.0,5363.0,9120.0,3890.0,31964.0,16440.0,3543.0,4241.0,33488.0,18158.0,0.0,2253.0,4084.0,10134.0,14992.0,7803.0,507.0,132.0,20754.0,10223.0,0.0,2.0,0.0,33758.0,25329.0,26904.0,2371.0,5193.0,27004.0,10366.0,1853.0,97.0,54.0,28938.0,46180.0,58783.0,50718.0,9006.0,27435.0,123059.0,78374.0,15711.0,5270.0,8283.0,34409.0,22785.0,16668.0,3877.0,12974.0,36159.0,65792.0,20704.0,61.0,2122.0,20232.0,28189.0,71305.0,20435.0,14749.0,14245.0,19954.0,854789.0,37758.0,11734.0,25540.0,39161.0,3372.0,5917.0,2949.0,23507.0,2.0,4155.0,221003.0,1176.0,11928.0,17243.0,228659.0,166093.0,78766.0,5738.0,3892.0,21563.0,935.0,26450.0,,273,PKA,IRA1,Haploid,frameshift_variant
4,2827.0,3556.0,5306.0,7026.0,39.0,2642.0,2744.0,2915.0,357.0,936.0,1763.0,2240.0,3824.0,796.0,1665.0,2167.0,3269.0,509.0,2047.0,3268.0,4498.0,13125.0,1330.0,4385.0,4424.0,5924.0,523.0,1608.0,3845.0,4014.0,4108.0,1358.0,2034.0,1759.0,7099.0,685.0,1486.0,3484.0,5506.0,9684.0,2580.0,4854.0,1031.0,1895.0,791.0,373.0,631.0,21956.0,23483.0,22294.0,46277.0,33707.0,7626.0,79897.0,57846.0,56967.0,25842.0,25732.0,14473.0,5276.0,6048.0,3852.0,1810.0,6676.0,23651.0,44467.0,14450.0,58251.0,98782.0,123080.0,70724.0,396.0,266.0,327.0,920.0,88.0,188.0,180.0,438.0,163.0,2401.0,9021.0,9131.0,15875.0,15272.0,11231.0,17416.0,12063.0,10482.0,8853.0,11886.0,16967.0,0.0,12496.0,12151.0,11895.0,6132.0,4184.0,14282.0,20009.0,5851.0,17392.0,13641.0,17577.0,23714.0,22004.0,25565.0,18469.0,10128.0,10771.0,13978.0,10719.0,21947.0,21518.0,19548.0,19119.0,22756.0,19028.0,24951.0,25391.0,21651.0,8509.0,21699.0,30903.0,27190.0,30602.0,30253.0,41487.0,20805.0,34032.0,35132.0,34762.0,27915.0,42078.0,18841.0,35168.0,26369.0,44.0,35198.0,928.0,19909.0,43792.0,54895.0,53047.0,63584.0,26462.0,32320.0,16656.0,30016.0,61338.0,5636.0,26463.0,18681.0,13604.0,27372.0,1546.0,2836.0,48423.0,11163.0,119669.0,134225.0,307002.0,19814.0,17285.0,14678.0,1765.0,2547.0,114.0,96.0,72.0,97.0,135.0,75.0,41.0,73.0,75.0,1.0,1.0,2.0,0.0,0.0,1.0,3.0,1.0,0.0,738.0,506.0,0.0,294.0,449.0,174.0,150.0,168.0,947.0,5961.0,36974.0,102814.0,19134.0,156159.0,13557.0,9308.0,3895.0,1795.0,2864.0,6281.0,25379.0,19195.0,28529.0,32723.0,14891.0,10486.0,2108.0,3310.0,3126.0,11115.0,50235.0,22132.0,9878.0,82992.0,4194.0,14662.0,6913.0,1945.0,246.0,7641.0,47021.0,45894.0,131776.0,150927.0,7818.0,22029.0,34358.0,4755.0,1470.0,2977.0,24466.0,14523.0,1956.0,9075.0,12230.0,6691.0,1490.0,1241.0,1698.0,3251.0,24428.0,12904.0,1217.0,3397.0,15516.0,22488.0,11867.0,1657.0,3783.0,3364.0,17533.0,12238.0,2012.0,3159.0,1851.0,21584.0,10500.0,1671.0,2832.0,21306.0,11646.0,0.0,1195.0,2245.0,5965.0,18999.0,17636.0,1468.0,399.0,11200.0,20180.0,0.0,0.0,0.0,21049.0,14632.0,14957.0,1163.0,3159.0,17811.0,13880.0,3763.0,255.0,284.0,13721.0,19168.0,25367.0,25924.0,2452.0,11565.0,49725.0,34320.0,4872.0,1058.0,3960.0,15064.0,10940.0,5214.0,1073.0,6075.0,16985.0,33175.0,8555.0,8.0,969.0,11993.0,16413.0,30346.0,6889.0,2909.0,8716.0,12362.0,389512.0,14733.0,2813.0,12027.0,16477.0,2468.0,4632.0,1393.0,9803.0,2.0,3616.0,226901.0,663.0,4410.0,7485.0,69827.0,49628.0,17444.0,1970.0,1470.0,3452.0,163.0,6759.0,NotSequenced,323,NotSequenced,NotSequenced,NotSequenced,NotSequenced
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,4.0,4.0,3.0,0.0,2.0,1.0,1.0,0.0,1.0,4.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,6.0,4.0,4.0,0.0,17.0,12.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1952.0,4932.0,9663.0,821.0,8528.0,4001.0,3732.0,6003.0,628.0,1488.0,4097.0,1263.0,13183.0,14277.0,33577.0,3266.0,4113.0,3531.0,711.0,1109.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,510.0,4356.0,6071.0,1651.0,7489.0,2600.0,3312.0,977.0,665.0,1250.0,798.0,3536.0,2261.0,3046.0,3010.0,1873.0,4905.0,343.0,718.0,534.0,702.0,1475.0,856.0,170.0,1288.0,1050.0,2349.0,560.0,187.0,21.0,346.0,1791.0,871.0,1308.0,846.0,1588.0,2744.0,1753.0,460.0,130.0,442.0,5216.0,4282.0,732.0,2500.0,4442.0,3042.0,876.0,670.0,951.0,533.0,5348.0,3651.0,556.0,1664.0,2594.0,3322.0,1762.0,399.0,945.0,521.0,2350.0,2023.0,358.0,676.0,342.0,665.0,56.0,1.0,0.0,3088.0,675.0,0.0,3.0,1.0,1481.0,46.0,0.0,0.0,0.0,3047.0,14.0,0.0,0.0,0.0,3628.0,624.0,142.0,3.0,2.0,2777.0,4.0,1.0,1.0,0.0,3089.0,2708.0,1523.0,2579.0,672.0,2045.0,4496.0,1683.0,501.0,238.0,762.0,3016.0,1284.0,1498.0,587.0,807.0,2375.0,2940.0,1585.0,3.0,169.0,3610.0,3128.0,6139.0,2356.0,1527.0,2751.0,1591.0,53247.0,3218.0,658.0,3646.0,5032.0,245.0,504.0,171.0,1949.0,0.0,162.0,5848.0,20.0,1592.0,1990.0,28920.0,19958.0,12090.0,454.0,264.0,1794.0,120.0,2833.0,,9000107,PKA,IRA1,Haploid,stop_gained
546,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,9000108,PKA,IRA1,Haploid,stop_gained
547,4.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,3.0,4.0,0.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,14.0,18.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2256.0,4515.0,11090.0,975.0,6747.0,2648.0,2670.0,6360.0,361.0,876.0,3880.0,1278.0,13314.0,19459.0,41518.0,2175.0,2604.0,2461.0,302.0,518.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,522.0,3595.0,9610.0,2260.0,16705.0,1959.0,2164.0,526.0,354.0,748.0,495.0,2361.0,1691.0,3294.0,3389.0,1197.0,2537.0,169.0,399.0,341.0,518.0,1444.0,516.0,172.0,1634.0,489.0,1014.0,278.0,86.0,13.0,269.0,1380.0,710.0,1878.0,1480.0,707.0,1183.0,907.0,172.0,51.0,241.0,3202.0,2575.0,345.0,1651.0,2695.0,2054.0,394.0,341.0,479.0,279.0,3641.0,2334.0,298.0,944.0,1796.0,2101.0,1093.0,183.0,548.0,308.0,1718.0,1430.0,214.0,472.0,164.0,432.0,28.0,2.0,1.0,2143.0,462.0,0.0,1.0,1.0,655.0,96.0,7.0,0.0,0.0,1236.0,7.0,0.0,0.0,0.0,2577.0,454.0,97.0,4.0,1.0,2152.0,11.0,1.0,0.0,0.0,1178.0,1090.0,592.0,1225.0,229.0,857.0,1946.0,865.0,182.0,78.0,288.0,1183.0,575.0,598.0,158.0,351.0,1207.0,1524.0,751.0,2.0,112.0,1456.0,1345.0,2896.0,1043.0,513.0,1137.0,717.0,26963.0,1368.0,201.0,1335.0,2140.0,120.0,203.0,42.0,825.0,0.0,73.0,3243.0,10.0,633.0,1120.0,21616.0,18073.0,5050.0,192.0,135.0,767.0,58.0,1121.0,,9000109,PKA,IRA1,Haploid,stop_gained
548,15.0,9.0,8.0,1.0,0.0,7.0,5.0,2.0,6.0,11.0,0.0,0.0,1.0,7.0,2.0,2.0,3.0,6.0,16.0,6.0,5.0,5.0,7.0,9.0,6.0,5.0,5.0,11.0,12.0,1.0,2.0,6.0,5.0,4.0,12.0,16.0,15.0,17.0,16.0,7.0,84.0,100.0,12.0,11.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,2.0,4.0,3.0,6.0,5.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12302.0,23460.0,53630.0,6713.0,47657.0,14320.0,12215.0,30117.0,2092.0,4474.0,22549.0,8837.0,71001.0,96137.0,239720.0,12059.0,13369.0,13336.0,2072.0,3214.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,4.0,3142.0,20600.0,51870.0,10435.0,84038.0,10076.0,10879.0,2867.0,2097.0,3741.0,2803.0,13971.0,10716.0,22450.0,23981.0,6378.0,15684.0,1097.0,2225.0,2059.0,3177.0,7507.0,3271.0,1315.0,12668.0,2924.0,5291.0,1779.0,478.0,53.0,1910.0,7143.0,4500.0,11890.0,9349.0,3949.0,6475.0,5485.0,1052.0,309.0,1315.0,17418.0,11478.0,1999.0,7981.0,15236.0,13194.0,2281.0,1805.0,2719.0,1485.0,17215.0,10768.0,1832.0,4960.0,10570.0,10760.0,5677.0,1065.0,2705.0,1753.0,9102.0,6952.0,1218.0,2492.0,1350.0,2696.0,207.0,12.0,4.0,12163.0,2462.0,0.0,8.0,6.0,3947.0,90.0,60.0,4.0,2.0,7269.0,58.0,0.0,0.0,0.0,15908.0,2612.0,505.0,6.0,6.0,12053.0,57.0,4.0,1.0,1.0,8008.0,7607.0,4291.0,6204.0,2106.0,5707.0,12772.0,5668.0,1269.0,860.0,1909.0,7794.0,3850.0,4631.0,1670.0,2398.0,7293.0,8907.0,4397.0,15.0,965.0,7979.0,8836.0,15623.0,5309.0,4980.0,6035.0,3492.0,149156.0,7830.0,1859.0,8036.0,10700.0,605.0,902.0,352.0,5982.0,1.0,401.0,13572.0,47.0,4630.0,6353.0,125579.0,105963.0,36411.0,1316.0,709.0,6922.0,431.0,7834.0,,9000110,PKA,IRA1,Haploid,stop_gained


##### Separate the dataset into variant and ancestor data

In [6]:
ancCountsTable = rawCountsTable[rawCountsTable['class'] == 'Ancestor']

In [7]:
mutCountsTable = rawCountsTable[rawCountsTable['class'] != 'Ancestor']

##### Extract the counts data (excluding metadata) from the variant and ancestor datasets

In [8]:
metadata_columns = ['additional_muts', 'barcode', 'class', 'gene', 'ploidy', 'type']

In [9]:
rawAncCounts = ancCountsTable.drop(columns=metadata_columns)

In [10]:
rawCounts = mutCountsTable.drop(columns=metadata_columns)

##### Extract the metadata for variants from the variants dataset

In [11]:
variantsInfo = mutCountsTable[metadata_columns].copy()  # this .copy() prevents variantsInfo from being seen as a slice internally

##### Drop certain exception samples

In [12]:
drop_samples = ['13-0_pool', 'Tneg', '21+wt', '21-0-2', '21-0-4'] # '21-0-1'

In [13]:
rawAncCounts = rawAncCounts.drop(columns=drop_samples)

In [14]:
rawCounts = rawCounts.drop(columns=drop_samples)

##### Parse treatment, rep, and timepoint information from sample labels (column names)

In [15]:
sample_columns_raw = rawCounts.columns

In [16]:
# Load table that maps alpha-character labeled samples to their corresponding treatments and replicate numbers
sampleAlphaLabelInfo = pd.read_csv('/Users/ryan/Dropbox/Projects/ResearchProjects/tikhonov/debias_evo_timeseries/Code/pipeline/okletsdothis/sample-alphalabel_treatment-rep.csv')

In [17]:
samples_info = []
for sample in sample_columns_raw:
    sample_components = sample.split('-')
    #------------------------------
    if(len(sample_components) == 3):
        # Sample label is in {treatment}-R{rep#}-T{timept#} -or- {treatment}-{timept#}-{rep#} format
        treatment   = sample_components[0]
        rep         = int(sample_components[1][-1]) if 'R' in sample_components[1] else int(sample_components[2])
        timept      = int(sample_components[2][-1]) if 'T' in sample_components[2] else int(sample_components[1])
    elif(len(sample_components) == 2):
        # Sample label is in {treatment}-{timept0} format
        treatment   = sample_components[0]
        rep         = 0
        timept      = int(sample_components[1][-1])
    else:
        # Sample label is in {treatment/rep label}{timept0} format
        # Replicate num needs to be interpreted from treatment/rep label, 
        #   and label should be replaced with descriptive treatment label.
        label       = sample_components[0][:-1]
        treatment   = sampleAlphaLabelInfo[sampleAlphaLabelInfo['sample_label'] == label]['treatment'].values[0]
        rep         = int(sampleAlphaLabelInfo[sampleAlphaLabelInfo['sample_label'] == label]['rep'].values[0])
        timept      = int(sample_components[0][-1])
    #------------------------------
    sample_info = {'kinsler_sample_label': sample, 'treatment': treatment, 'rep': rep, 'timept': timept}
    #------------------------------
    # Treatments with simply numeric labels are 'evolutionary conditions' (EC) treatments
    if(sample_info['treatment'].isdigit()):
        sample_info['treatment'] = 'EC'+sample_info['treatment']
    # Treatments that read simply 1.6% should be edited to 1.6%Gluc
    if(sample_info['treatment'] == '1.6%'):
        sample_info['treatment'] = '1.6%Gluc'
    #------------------------------    
    samples_info.append(sample_info)
#------------------------------
samplesInfo = pd.DataFrame(samples_info)

##### Reconcile handling of zero timepoints across treatments/replicates

For most treatments, there is a single zero timepoint that is interpreted as shared by all replicates of that treatment (at this point, the common zero timepoint is listed as replicate 0).
However, some treatments have a separate zero timepoint for each replicate, and other treatments don't have a zero timepoint listed.
This step reconciles the different handling of zero timepoints across treatments by doing the following:
- If a treatment does not have a zero timepoint -- i.e., the first timepoint is T=1 for each replicate -- then shift the timepoint labels down such that the first timepoint is labeled timepoint T=0.
- If a treatment has a single, shared zero timepoint, expand the table such that there is a separate copy of that zero timepoint data for each replicate.

This results in all treatments having a zero timepoint listing for each replicate.

In [18]:
for treatment in samplesInfo['treatment'].unique():
    #------------------------------
    # If the treatment data is missing the zero timepoint, this will relabel the first timepoint as timepoint zero. 
    treatmentInfo = samplesInfo[samplesInfo['treatment'] == treatment]
    treatment_timepts = treatmentInfo['timept'].values
    treatment_timepts_relabeled = treatment_timepts - treatment_timepts.min()  
    samplesInfo.loc[samplesInfo['treatment'] == treatment, 'timept'] = treatment_timepts_relabeled
    #------------------------------
    treatmentInfo = samplesInfo[samplesInfo['treatment'] == treatment] # reload in case timept vals shifted above
    if(treatmentInfo['timept'].value_counts()[0] == 1):
        #------------------------------
        # There is a single zero timepoint that is the same for all replicates; expand the table.
        treatment_numReps = int(treatmentInfo['rep'].max())
        treatmentInfo_timept0 = treatmentInfo[treatmentInfo['timept'] == 0]
        treatmentInfo_timept0_expanded = pd.concat([treatmentInfo_timept0] * treatment_numReps)
        treatmentInfo_timept0_expanded.loc[:, 'rep'] = range(1, treatment_numReps+1)
        # Drop the existing shared zero timept row for this treatment
        samplesInfo.drop(samplesInfo[ (samplesInfo['treatment'] == treatment) & (samplesInfo['timept'] == 0) ].index, inplace=True)
        # Append the dataframe with the expanded replicate zero timept rows
        samplesInfo = pd.concat([samplesInfo, treatmentInfo_timept0_expanded])


In [19]:
# Store a new standardized label for each sample
samplesInfo['sample'] = samplesInfo['treatment'] +'-R'+ samplesInfo['rep'].astype(str) +'-T'+ samplesInfo['timept'].astype(str)

In [20]:
# Store a new standardized label for each assay (unique serial transfer)
samplesInfo['assay'] = samplesInfo['treatment'] +'-R'+ samplesInfo['rep'].astype(str)

In [21]:
# Sort dataframe for convenience:
samplesInfo.sort_values(by=['treatment', 'rep', 'timept'], ascending=[False, True, True], ignore_index=True, inplace=True)

##### Update the counts dataframes to reflect the changes from reconciliation of zero timepoints and standardized sample labels

In [22]:
rawCounts_updated = pd.DataFrame(index=rawCounts.index)
rawAncCounts_updated = pd.DataFrame(index=rawAncCounts.index)

In [23]:
for new_sample_label in samplesInfo['sample'].values:
    kinsler_sample_label = samplesInfo[samplesInfo['sample'] == new_sample_label]['kinsler_sample_label'].values[0]
    rawCounts_updated[new_sample_label] = rawCounts[kinsler_sample_label]
    rawAncCounts_updated[new_sample_label] = rawAncCounts[kinsler_sample_label]
rawCounts = rawCounts_updated.copy() # building the new df col-by-col results in a fragmented df; the .copy() here resolves that
rawAncCounts = rawAncCounts_updated.copy()

##### Reorder columns for cosmetic reasons

In [24]:
samplesInfo = samplesInfo[['kinsler_sample_label', 'sample', 'assay', 'treatment', 'rep', 'timept']]

In [25]:
samplesInfo

Unnamed: 0,kinsler_sample_label,sample,assay,treatment,rep,timept
0,A0,M3-R1-T0,M3-R1,M3,1,0
1,A1,M3-R1-T1,M3-R1,M3,1,1
2,A2,M3-R1-T2,M3-R1,M3,1,2
3,A3,M3-R1-T3,M3-R1,M3,1,3
4,A4,M3-R1-T4,M3-R1,M3,1,4
...,...,...,...,...,...,...
349,O0,0.2MKCl-R1-T0,0.2MKCl-R1,0.2MKCl,1,0
350,O1,0.2MKCl-R1-T1,0.2MKCl-R1,0.2MKCl,1,1
351,O2,0.2MKCl-R1-T2,0.2MKCl-R1,0.2MKCl,1,2
352,O3,0.2MKCl-R1-T3,0.2MKCl-R1,0.2MKCl,1,3


## Filter samples by total counts

In [26]:
# Create a column in samplesInfo to indicate which samples are excluded from analysis.
samplesInfo['exclude_from_analysis'] = False

##### Exclude treatments that have one or more samples with total count below threshold

In [27]:
MIN_SAMPLE_COUNT_TOTAL = 1e5

In [28]:
for sample in samplesInfo['sample']:
    sample_countTotal = rawCounts[sample].sum()
    samplesInfo.loc[samplesInfo['sample'] == sample, 'total_raw_count'] = sample_countTotal

In [29]:
good_coverage_samples = []
for treatment in samplesInfo['treatment'].unique():
    treatmentInfo = samplesInfo[samplesInfo['treatment'] == treatment]
    if(np.all((treatmentInfo['timept'] == 0) | (treatmentInfo['total_raw_count'] >= MIN_SAMPLE_COUNT_TOTAL))):
        good_coverage_samples.extend( treatmentInfo['sample'].values )
    else:
        samplesInfo.loc[samplesInfo['treatment'] == treatment, 'exclude_from_analysis'] = True

In [30]:
rawAncCounts = rawAncCounts[good_coverage_samples]
rawCounts    = rawCounts[good_coverage_samples]

##### Exclude other abnormal treatments

In [31]:
drop_treatments = ['1.7%', '1%Gly', '2ugFlu', '8.5uMGdA']
for drop_treatment in drop_treatments:
    rawCounts = rawCounts.loc[:, ~rawCounts.columns.str.contains(drop_treatment)]
    samplesInfo.loc[samplesInfo['treatment'] == drop_treatment, 'exclude_from_analysis'] = True

##### Drop excluded samples from the data set

In [32]:
samplesInfo = samplesInfo[~samplesInfo['exclude_from_analysis']]
samplesInfo = samplesInfo.drop(columns=['exclude_from_analysis'])

In [33]:
print(samplesInfo['treatment'].unique())
samplesInfo['treatment'].nunique()

['M3' 'EC3' 'EC23' 'EC21' 'EC20' 'EC18' 'EC13' 'Baffled' '1.8%Gluc'
 '1.6%Gluc' '1.5%Suc1%Raf' '1.4%Gluc' '0.5MKCl' '0.5%Raf' '0.2MKCl']


15

In [34]:
print(samplesInfo['assay'].unique())
samplesInfo['assay'].nunique()

['M3-R1' 'M3-R2' 'M3-R3' 'M3-R4' 'EC3-R1' 'EC3-R2' 'EC3-R3' 'EC23-R1'
 'EC23-R2' 'EC23-R3' 'EC21-R1' 'EC21-R2' 'EC21-R3' 'EC20-R1' 'EC20-R2'
 'EC20-R3' 'EC18-R1' 'EC18-R2' 'EC18-R3' 'EC13-R1' 'EC13-R2' 'EC13-R3'
 'Baffled-R1' 'Baffled-R2' '1.8%Gluc-R1' '1.8%Gluc-R2' '1.6%Gluc-R1'
 '1.6%Gluc-R2' '1.5%Suc1%Raf-R1' '1.4%Gluc-R1' '1.4%Gluc-R2' '0.5MKCl-R1'
 '0.5%Raf-R1' '0.5%Raf-R2' '0.2MKCl-R1']


35

------

## Get barcode sequence along with GC-content for each variant

In [35]:
barcodesInfo = pd.read_csv(rawDataDir+'Kinsler_et_al_2020_BCID_to_barcode_sequence.csv')
barcodesInfo

Unnamed: 0,barcode,BCID,barcode_fasta,barcode_lowcomplexity,barcode_highcomplexity,set
0,CGCTAAAGACATAATGTGGTTTGTTG_AACGTAAGTCTAAACGTAT...,53,AACGTAAGTCTAAACGTATTTGAGAACGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,AACGTAAGTCTAAACGTATTTGAGAA,Test
1,CGCTAAAGACATAATGTGGTTTGTTG_TCCATAATTGGGAATTGGA...,151,TCCATAATTGGGAATTGGATTTTGGCCGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,TCCATAATTGGGAATTGGATTTTGGC,Train
2,CGCTAAAGACATAATGTGGTTTGTTG_TTAATAAACAAGAAACGGG...,262,TTAATAAACAAGAAACGGGTTGTGGACGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,TTAATAAACAAGAAACGGGTTGTGGA,
3,CGCTAAAGACATAATGTGGTTTGTTG_TCACTAAGAAGGAAGTCCA...,273,TCACTAAGAAGGAAGTCCATTATTACCGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,TCACTAAGAAGGAAGTCCATTATTAC,Test
4,CGCTAAAGACATAATGTGGTTTGTTG_AGCCAAAAAAGTAATATTT...,323,AGCCAAAAAAGTAATATTTTTGACCACGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,AGCCAAAAAAGTAATATTTTTGACCA,
...,...,...,...,...,...,...
526,CGCTAAAGACATAATGTGGTTTGTTG_ACAAGAAGTTGTAAAGGAC...,113483,ACAAGAAGTTGTAAAGGACTTTGGGGCGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,ACAAGAAGTTGTAAAGGACTTTGGGG,
527,CGCTAAAGACATAATGTGGTTTGTTG_ATCTAAAGGTCCAAAAGTA...,134852,ATCTAAAGGTCCAAAAGTATTGTTAGCGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,ATCTAAAGGTCCAAAAGTATTGTTAG,
528,CGCTAAAGACATAATGTGGTTTGTTG_CTCGTAATAAGAAAAATAG...,263665,CTCGTAATAAGAAAAATAGTTGAAGTCGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,CTCGTAATAAGAAAAATAGTTGAAGT,
529,CGCTAAAGACATAATGTGGTTTGTTG_ATAAAAACAAGAATGCCAT...,316954,ATAAAAACAAGAATGCCATTAATATACGCTAAAGACATAATGTGGT...,CGCTAAAGACATAATGTGGTTTGTTG,ATAAAAACAAGAATGCCATTAATATA,


In [36]:
variants_barcodes = []
variants_GCratios = []
for idx, variantInfo in variantsInfo.iterrows():
    variant_barcodeID  = variantInfo['barcode']
    try:
        variant_barcodeSeq     = barcodesInfo.loc[barcodesInfo['BCID'] == variant_barcodeID, 'barcode_highcomplexity'].values[0]
        variant_barcodeGCratio = (variant_barcodeSeq.count('G') + variant_barcodeSeq.count('C'))/len(variant_barcodeSeq)
    except IndexError:
        variant_barcodeSeq     = None
        variant_barcodeGCratio = None
    variants_barcodes.append(variant_barcodeSeq)
    variants_GCratios.append(variant_barcodeGCratio)
    
variantsInfo['barcode_sequence'] = variants_barcodes
variantsInfo['barcode_GCratio']  = variants_GCratios
variantsInfo

Unnamed: 0,additional_muts,barcode,class,gene,ploidy,type,barcode_sequence,barcode_GCratio
0,TIP1-upstream_point_variant; YKR012C-upstream_...,53,Diploid,Diploid,Diploid,Diploid,AACGTAAGTCTAAACGTATTTGAGAA,0.307692
1,SEH1-missense_variant; ZIP1-missense_variant; ...,151,PKA,IRA1,Haploid,stop_gained,TCCATAATTGGGAATTGGATTTTGGC,0.384615
2,NotSequenced,262,NotSequenced,NotSequenced,NotSequenced,NotSequenced,TTAATAAACAAGAAACGGGTTGTGGA,0.346154
3,,273,PKA,IRA1,Haploid,frameshift_variant,TCACTAAGAAGGAAGTCCATTATTAC,0.346154
4,NotSequenced,323,NotSequenced,NotSequenced,NotSequenced,NotSequenced,AGCCAAAAAAGTAATATTTTTGACCA,0.269231
...,...,...,...,...,...,...,...,...
544,,9000106,PKA,IRA1,Haploid,stop_gained,,
545,,9000107,PKA,IRA1,Haploid,stop_gained,,
546,,9000108,PKA,IRA1,Haploid,stop_gained,,
547,,9000109,PKA,IRA1,Haploid,stop_gained,,


------

## Save processed data to file

In [37]:
samplesInfo.to_csv(outdir+'samples.csv', index=False)
display(samplesInfo)

Unnamed: 0,kinsler_sample_label,sample,assay,treatment,rep,timept,total_raw_count
0,A0,M3-R1-T0,M3-R1,M3,1,0,3347507.0
1,A1,M3-R1-T1,M3-R1,M3,1,1,4885816.0
2,A2,M3-R1-T2,M3-R1,M3,1,2,8910028.0
3,A3,M3-R1-T3,M3-R1,M3,1,3,1036411.0
4,A4,M3-R1-T4,M3-R1,M3,1,4,5206525.0
...,...,...,...,...,...,...,...
349,O0,0.2MKCl-R1-T0,0.2MKCl-R1,0.2MKCl,1,0,4797684.0
350,O1,0.2MKCl-R1-T1,0.2MKCl-R1,0.2MKCl,1,1,2930331.0
351,O2,0.2MKCl-R1-T2,0.2MKCl-R1,0.2MKCl,1,2,2883775.0
352,O3,0.2MKCl-R1-T3,0.2MKCl-R1,0.2MKCl,1,3,296368.0


In [38]:
variantsInfo.to_csv(outdir+'variants.csv', index=False)
variantsInfo

Unnamed: 0,additional_muts,barcode,class,gene,ploidy,type,barcode_sequence,barcode_GCratio
0,TIP1-upstream_point_variant; YKR012C-upstream_...,53,Diploid,Diploid,Diploid,Diploid,AACGTAAGTCTAAACGTATTTGAGAA,0.307692
1,SEH1-missense_variant; ZIP1-missense_variant; ...,151,PKA,IRA1,Haploid,stop_gained,TCCATAATTGGGAATTGGATTTTGGC,0.384615
2,NotSequenced,262,NotSequenced,NotSequenced,NotSequenced,NotSequenced,TTAATAAACAAGAAACGGGTTGTGGA,0.346154
3,,273,PKA,IRA1,Haploid,frameshift_variant,TCACTAAGAAGGAAGTCCATTATTAC,0.346154
4,NotSequenced,323,NotSequenced,NotSequenced,NotSequenced,NotSequenced,AGCCAAAAAAGTAATATTTTTGACCA,0.269231
...,...,...,...,...,...,...,...,...
544,,9000106,PKA,IRA1,Haploid,stop_gained,,
545,,9000107,PKA,IRA1,Haploid,stop_gained,,
546,,9000108,PKA,IRA1,Haploid,stop_gained,,
547,,9000109,PKA,IRA1,Haploid,stop_gained,,


In [39]:
rawCounts.to_csv(outdir+'counts.csv', index=False)
rawCounts

Unnamed: 0,M3-R1-T0,M3-R1-T1,M3-R1-T2,M3-R1-T3,M3-R1-T4,M3-R2-T0,M3-R2-T1,M3-R2-T2,M3-R2-T3,M3-R2-T4,M3-R3-T0,M3-R3-T1,M3-R3-T2,M3-R3-T3,M3-R3-T4,M3-R4-T0,M3-R4-T1,M3-R4-T2,M3-R4-T3,M3-R4-T4,EC3-R1-T0,EC3-R1-T1,EC3-R1-T2,EC3-R1-T3,EC3-R1-T4,EC3-R2-T0,EC3-R2-T1,EC3-R2-T2,EC3-R2-T3,EC3-R2-T4,EC3-R3-T0,EC3-R3-T1,EC3-R3-T2,EC3-R3-T3,EC3-R3-T4,EC23-R1-T0,EC23-R1-T1,EC23-R1-T2,EC23-R1-T3,EC23-R1-T4,EC23-R2-T0,EC23-R2-T1,EC23-R2-T2,EC23-R2-T3,EC23-R2-T4,EC23-R3-T0,EC23-R3-T1,EC23-R3-T2,EC23-R3-T3,EC23-R3-T4,EC21-R1-T0,EC21-R1-T1,EC21-R1-T2,EC21-R1-T3,EC21-R1-T4,EC21-R2-T0,EC21-R2-T1,EC21-R2-T2,EC21-R2-T3,EC21-R2-T4,EC21-R3-T0,EC21-R3-T1,EC21-R3-T2,EC21-R3-T3,EC21-R3-T4,EC20-R1-T0,EC20-R1-T1,EC20-R1-T2,EC20-R1-T3,EC20-R2-T0,EC20-R2-T1,EC20-R2-T2,EC20-R2-T3,EC20-R3-T0,EC20-R3-T1,EC20-R3-T2,EC20-R3-T3,EC18-R1-T0,EC18-R1-T1,EC18-R1-T2,EC18-R1-T3,EC18-R2-T0,EC18-R2-T1,EC18-R2-T2,EC18-R2-T3,EC18-R3-T0,EC18-R3-T1,EC18-R3-T2,EC18-R3-T3,EC13-R1-T0,EC13-R1-T1,EC13-R1-T2,EC13-R1-T3,EC13-R1-T4,EC13-R2-T0,EC13-R2-T1,EC13-R2-T2,EC13-R2-T3,EC13-R2-T4,EC13-R3-T0,EC13-R3-T1,EC13-R3-T2,EC13-R3-T3,EC13-R3-T4,Baffled-R1-T0,Baffled-R1-T1,Baffled-R1-T2,Baffled-R1-T3,Baffled-R1-T4,Baffled-R2-T0,Baffled-R2-T1,Baffled-R2-T2,Baffled-R2-T3,Baffled-R2-T4,1.8%Gluc-R1-T0,1.8%Gluc-R1-T1,1.8%Gluc-R1-T2,1.8%Gluc-R1-T3,1.8%Gluc-R1-T4,1.8%Gluc-R2-T0,1.8%Gluc-R2-T1,1.8%Gluc-R2-T2,1.8%Gluc-R2-T3,1.8%Gluc-R2-T4,1.6%Gluc-R1-T0,1.6%Gluc-R1-T1,1.6%Gluc-R1-T2,1.6%Gluc-R1-T3,1.6%Gluc-R1-T4,1.6%Gluc-R2-T0,1.6%Gluc-R2-T1,1.6%Gluc-R2-T2,1.6%Gluc-R2-T3,1.6%Gluc-R2-T4,1.5%Suc1%Raf-R1-T0,1.5%Suc1%Raf-R1-T1,1.5%Suc1%Raf-R1-T2,1.5%Suc1%Raf-R1-T3,1.5%Suc1%Raf-R1-T4,1.4%Gluc-R1-T0,1.4%Gluc-R1-T1,1.4%Gluc-R1-T2,1.4%Gluc-R1-T3,1.4%Gluc-R1-T4,1.4%Gluc-R2-T0,1.4%Gluc-R2-T1,1.4%Gluc-R2-T2,1.4%Gluc-R2-T3,1.4%Gluc-R2-T4,0.5MKCl-R1-T0,0.5MKCl-R1-T1,0.5MKCl-R1-T2,0.5MKCl-R1-T3,0.5MKCl-R1-T4,0.5%Raf-R1-T0,0.5%Raf-R1-T1,0.5%Raf-R1-T2,0.5%Raf-R1-T3,0.5%Raf-R1-T4,0.5%Raf-R2-T0,0.5%Raf-R2-T1,0.5%Raf-R2-T2,0.5%Raf-R2-T3,0.5%Raf-R2-T4,0.2MKCl-R1-T0,0.2MKCl-R1-T1,0.2MKCl-R1-T2,0.2MKCl-R1-T3,0.2MKCl-R1-T4
0,5430.0,6191.0,8361.0,574.0,2148.0,18106.0,2775.0,17050.0,10057.0,18295.0,2367.0,8268.0,15562.0,1846.0,11209.0,2722.0,5916.0,3468.0,3720.0,2945.0,111.0,244.0,183.0,81.0,101.0,111.0,376.0,164.0,95.0,69.0,111.0,223.0,177.0,104.0,79.0,2786.0,2019.0,2104.0,1529.0,1172.0,2786.0,2406.0,2892.0,2292.0,1750.0,2786.0,1856.0,2817.0,1772.0,1686.0,3276.0,962.0,1883.0,985.0,1025.0,3276.0,760.0,574.0,1144.0,1070.0,3276.0,1889.0,1691.0,1945.0,1302.0,773.0,2785.0,1855.0,954.0,2336.0,2493.0,1569.0,1495.0,2192.0,2199.0,1775.0,1990.0,90.0,40.0,365.0,486.0,133.0,65.0,102.0,516.0,122.0,481.0,574.0,380.0,2.0,512.0,594.0,319.0,51.0,2.0,700.0,403.0,299.0,70.0,2.0,594.0,104.0,361.0,59.0,5251.0,11213.0,3158.0,908.0,4044.0,3315.0,8696.0,4901.0,7853.0,4079.0,6639.0,6157.0,2544.0,380.0,466.0,1539.0,4670.0,2662.0,363.0,341.0,56.0,121.0,113.0,120.0,245.0,56.0,102.0,212.0,146.0,116.0,3758.0,1879.0,442.0,299.0,607.0,1065.0,5289.0,1933.0,249.0,495.0,1239.0,4131.0,1553.0,150.0,235.0,7391.0,11230.0,6963.0,1183.0,1828.0,5659.0,2713.0,3435.0,198.0,236.0,5765.0,3076.0,1751.0,167.0,193.0,8704.0,6361.0,7231.0,780.0,1934.0
1,102665.0,165765.0,422624.0,62916.0,367191.0,200215.0,66810.0,915046.0,1357544.0,4230603.0,36142.0,224264.0,751263.0,187575.0,1956473.0,23585.0,121243.0,138770.0,293532.0,443175.0,48524.0,54722.0,102971.0,101137.0,217453.0,48524.0,88884.0,127795.0,187235.0,209290.0,48524.0,49070.0,95476.0,150720.0,229493.0,16449.0,18428.0,40070.0,57945.0,83239.0,16449.0,21698.0,42476.0,64251.0,93228.0,16449.0,16341.0,40354.0,43842.0,73539.0,23186.0,14470.0,50457.0,47319.0,79224.0,23186.0,11899.0,16668.0,49533.0,76326.0,23186.0,27178.0,36587.0,58512.0,70336.0,5215.0,26566.0,35823.0,34463.0,14656.0,24995.0,31769.0,49171.0,12802.0,21013.0,25010.0,41128.0,22844.0,6260.0,179028.0,491772.0,14132.0,14745.0,49741.0,661420.0,7847.0,66171.0,171156.0,367527.0,2625.0,43106.0,160273.0,399304.0,223043.0,2625.0,54385.0,107883.0,308884.0,241795.0,2625.0,42068.0,20536.0,267212.0,145296.0,34768.0,134512.0,75757.0,39864.0,404244.0,24826.0,123926.0,128460.0,407379.0,497347.0,69494.0,107559.0,89549.0,20215.0,55134.0,16720.0,94363.0,105430.0,23514.0,54599.0,975.0,5096.0,13055.0,30405.0,141123.0,975.0,3330.0,17235.0,32195.0,65247.0,67840.0,59385.0,17611.0,16887.0,31111.0,14677.0,136115.0,103853.0,25061.0,105103.0,14822.0,123119.0,93059.0,15122.0,53362.0,81043.0,5267.0,237.0,46.0,46.0,91956.0,98775.0,289984.0,30196.0,67673.0,90000.0,122581.0,173982.0,31380.0,56512.0,94463.0,59101.0,47035.0,3860.0,7283.0
2,4357.0,4856.0,6961.0,523.0,1654.0,13083.0,2054.0,11295.0,7406.0,12487.0,1697.0,5251.0,11435.0,1357.0,8488.0,2214.0,4522.0,2632.0,2995.0,2326.0,677.0,1148.0,751.0,280.0,214.0,677.0,1873.0,733.0,443.0,205.0,677.0,1199.0,703.0,369.0,233.0,2595.0,1794.0,1776.0,1117.0,935.0,2595.0,1867.0,2385.0,1902.0,1350.0,2595.0,1674.0,2276.0,1406.0,1448.0,2938.0,701.0,1422.0,680.0,897.0,2938.0,437.0,415.0,761.0,887.0,2938.0,1650.0,1471.0,1614.0,1146.0,562.0,2223.0,1335.0,542.0,2189.0,2185.0,1225.0,1056.0,2077.0,1772.0,1724.0,1680.0,466.0,149.0,1429.0,1460.0,681.0,331.0,417.0,1711.0,613.0,2356.0,2165.0,1312.0,15.0,2909.0,2582.0,1524.0,195.0,15.0,2977.0,1640.0,1028.0,161.0,15.0,2776.0,442.0,1036.0,209.0,4169.0,8543.0,2355.0,798.0,2852.0,2569.0,6808.0,3839.0,6019.0,2907.0,4786.0,4727.0,1970.0,306.0,440.0,1395.0,3453.0,2171.0,312.0,272.0,160.0,421.0,267.0,254.0,419.0,160.0,261.0,508.0,288.0,236.0,2810.0,1647.0,379.0,267.0,462.0,877.0,3970.0,1650.0,195.0,355.0,966.0,3238.0,1243.0,129.0,218.0,5370.0,8196.0,5068.0,1016.0,1497.0,4272.0,2120.0,2587.0,147.0,198.0,4183.0,2314.0,1222.0,124.0,156.0,6215.0,4498.0,5200.0,605.0,1666.0
3,31310.0,50871.0,111168.0,12981.0,64484.0,85099.0,23085.0,272459.0,289504.0,759227.0,13032.0,86444.0,230444.0,45782.0,390609.0,11648.0,42988.0,42505.0,73141.0,91313.0,17986.0,44047.0,50323.0,33279.0,61405.0,17986.0,69906.0,62321.0,63313.0,51856.0,17986.0,40006.0,48286.0,54598.0,63709.0,7478.0,7283.0,13690.0,14120.0,19420.0,7478.0,8379.0,13810.0,17528.0,20655.0,7478.0,6361.0,13057.0,12541.0,19372.0,9515.0,4008.0,14116.0,9158.0,17543.0,9515.0,2997.0,3846.0,10795.0,17963.0,9515.0,8336.0,9536.0,14683.0,16884.0,1617.0,9278.0,8884.0,6847.0,4669.0,8024.0,8257.0,9549.0,4572.0,6011.0,7533.0,10797.0,8375.0,3659.0,110050.0,206957.0,9166.0,10151.0,27822.0,240271.0,5969.0,57614.0,106658.0,156816.0,710.0,36510.0,92924.0,149876.0,45139.0,710.0,32361.0,59293.0,114393.0,50325.0,710.0,30864.0,14493.0,102630.0,32077.0,18407.0,67806.0,33209.0,16715.0,145974.0,13585.0,64492.0,64553.0,193034.0,213798.0,25108.0,35574.0,26487.0,4974.0,10050.0,6912.0,30660.0,29291.0,5363.0,9120.0,842.0,3180.0,4374.0,5983.0,15873.0,842.0,2385.0,6422.0,6826.0,8019.0,23773.0,17469.0,4596.0,4148.0,6269.0,5623.0,43059.0,28458.0,5785.0,18732.0,6046.0,39863.0,26022.0,3529.0,9660.0,27004.0,10366.0,1853.0,97.0,54.0,32593.0,28351.0,66414.0,5804.0,9854.0,34029.0,35398.0,40071.0,6373.0,9042.0,33758.0,25329.0,26904.0,2371.0,5193.0
4,16656.0,30016.0,61338.0,5636.0,26463.0,48423.0,11163.0,119669.0,134225.0,307002.0,5961.0,36974.0,102814.0,19134.0,156159.0,6281.0,25379.0,19195.0,28529.0,32723.0,8509.0,21699.0,30602.0,20805.0,34762.0,8509.0,30903.0,30253.0,34032.0,27915.0,8509.0,27190.0,41487.0,35132.0,42078.0,10128.0,10771.0,21947.0,19119.0,24951.0,10128.0,13978.0,21518.0,22756.0,25391.0,10128.0,10719.0,19548.0,19028.0,21651.0,12496.0,6132.0,20009.0,13641.0,22004.0,12496.0,4184.0,5851.0,17577.0,25565.0,12496.0,14282.0,17392.0,23714.0,18469.0,2401.0,15875.0,17416.0,8853.0,9021.0,15272.0,12063.0,11886.0,9131.0,11231.0,10482.0,16967.0,5276.0,1810.0,44467.0,98782.0,6048.0,6676.0,14450.0,123080.0,3852.0,23651.0,58251.0,70724.0,373.0,21956.0,46277.0,79897.0,25842.0,373.0,23483.0,33707.0,57846.0,25732.0,373.0,22294.0,7626.0,56967.0,14473.0,11115.0,50235.0,22132.0,9878.0,82992.0,7641.0,47021.0,45894.0,131776.0,150927.0,15516.0,22488.0,11867.0,1657.0,3783.0,3364.0,17533.0,12238.0,2012.0,3159.0,523.0,2047.0,3268.0,4498.0,13125.0,523.0,1330.0,4385.0,4424.0,5924.0,12230.0,6691.0,1490.0,1241.0,1698.0,2977.0,24466.0,14523.0,1956.0,9075.0,3251.0,24428.0,12904.0,1217.0,3397.0,17811.0,13880.0,3763.0,255.0,284.0,18681.0,13604.0,27372.0,1546.0,2836.0,19814.0,17285.0,14678.0,1765.0,2547.0,21049.0,14632.0,14957.0,1163.0,3159.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,2742.0,5721.0,13094.0,1444.0,9658.0,5922.0,1915.0,18682.0,26408.0,61689.0,716.0,4850.0,11697.0,2683.0,21182.0,604.0,3423.0,2537.0,4702.0,4900.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,705.0,1890.0,628.0,243.0,2449.0,356.0,1855.0,1138.0,2678.0,2198.0,2617.0,2920.0,1399.0,204.0,623.0,424.0,2361.0,1656.0,218.0,598.0,0.0,4.0,4.0,2.0,0.0,0.0,7.0,4.0,4.0,3.0,4231.0,2913.0,482.0,376.0,600.0,323.0,4772.0,3058.0,427.0,2371.0,391.0,4812.0,3003.0,346.0,1157.0,3207.0,24.0,2.0,1.0,0.0,3814.0,3415.0,8156.0,435.0,996.0,3124.0,3676.0,3439.0,430.0,640.0,4222.0,669.0,149.0,6.0,1.0
545,1952.0,4932.0,9663.0,821.0,8528.0,4097.0,1263.0,13183.0,14277.0,33577.0,510.0,4356.0,6071.0,1651.0,7489.0,798.0,3536.0,2261.0,3046.0,3010.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,702.0,1475.0,856.0,170.0,1288.0,346.0,1791.0,871.0,1308.0,846.0,2594.0,3322.0,1762.0,399.0,945.0,521.0,2350.0,2023.0,358.0,676.0,4.0,4.0,3.0,0.0,2.0,4.0,1.0,1.0,0.0,1.0,4442.0,3042.0,876.0,670.0,951.0,442.0,5216.0,4282.0,732.0,2500.0,533.0,5348.0,3651.0,556.0,1664.0,2777.0,4.0,1.0,1.0,0.0,4001.0,3732.0,6003.0,628.0,1488.0,3266.0,4113.0,3531.0,711.0,1109.0,3628.0,624.0,142.0,3.0,2.0
546,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
547,2256.0,4515.0,11090.0,975.0,6747.0,3880.0,1278.0,13314.0,19459.0,41518.0,522.0,3595.0,9610.0,2260.0,16705.0,495.0,2361.0,1691.0,3294.0,3389.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,518.0,1444.0,516.0,172.0,1634.0,269.0,1380.0,710.0,1878.0,1480.0,1796.0,2101.0,1093.0,183.0,548.0,308.0,1718.0,1430.0,214.0,472.0,0.0,3.0,4.0,0.0,1.0,0.0,0.0,0.0,3.0,1.0,2695.0,2054.0,394.0,341.0,479.0,241.0,3202.0,2575.0,345.0,1651.0,279.0,3641.0,2334.0,298.0,944.0,2152.0,11.0,1.0,0.0,0.0,2648.0,2670.0,6360.0,361.0,876.0,2175.0,2604.0,2461.0,302.0,518.0,2577.0,454.0,97.0,4.0,1.0
