In [1]:
import pandas as pd

In [2]:
filename = '/Users/jamesmcgann/Downloads/Dashboard 1_20221025_142800.csv'

df = pd.read_csv(filename)

# Filter: Spatial ATAC-seq with Run ID, BOTH chips 25um and Production
df = df[
        (df['Workflow'] == 'ATAC-seq')
        & (df['Run Id'])
        & (df['Bulk?'] != True) # 413 runs
        & (df['Chip A ROI Channel Width (um)'] == 25)  # 308 runs
        & (df['Chip B ROI Channel Width (um)'] == 25)  # 284 runs
        & (df['Chip B Designation'] == 'Production') # 191 runs
        & (df['Chip A Designation'] == 'Production')  # 189 runs
]

columns = [
    'Run Id',
     'Flow A Leak',
    'Flow A Blocks',
    'Flow A Crosses',
    'Flow B Leak',
    'Flow B Blocks',
    'Flow B Crosses'
]
# Keep only relevant columns
df = df[columns]
df.head()


Unnamed: 0,Run Id,Flow A Leak,Flow A Blocks,Flow A Crosses,Flow B Leak,Flow B Blocks,Flow B Crosses
78,D00974,,,,,,
80,D00972,,,,,,
81,D00971,,,,,,
82,D00970,,,,,,
85,D00967,,,,,,


In [3]:
# Define functions from making new columns

def count_channels(flow):
    
    # nan int type, so keep only str type and avoid empty string 
    if type(flow) == str and len(flow) > 0:
        return len(flow.split(','))
    else:
        return 0
    
                   
def get_bad_channels(df, chip):
    
    # if flow leaked, consider all channels bad
    if df[f'Flow {chip} Leak'] == True:
        return ", ".join([str(x) for x in range(1, 51)])
    # else, combine unique values of blocks and crosses 
    else:
        blocks, crosses = [], []
        
        if type(df[f'Flow {chip} Blocks']) == str:
            blocks = df[f'Flow {chip} Blocks'].split(',')
            blocks = [block.strip() for block in blocks]
        if type(df[f'Flow {chip} Crosses']) == str:
            crosses = df[f'Flow {chip} Crosses'].split(',')
            crosses = [cross.strip() for cross in crosses]
            
        return ", ".join(set(blocks + crosses))
    

In [4]:
# Add columns with counts of chip blocks and crosses to df
for chip in ['A', 'B']:
    for fail in ['Blocks', 'Crosses']:
        col_name = f'Flow {chip} {fail}'
        df[f'{col_name} count'] = df[col_name].map(count_channels) 

# Add columns with chip bad channels and bad channel counts 
for chip in ['A', 'B']:
    df[f'{chip} bad channels'] = df.apply(get_bad_channels, args=(chip), axis=1)
    df[f'{chip} bad channels count'] = df[f'{chip} bad channels'].map(count_channels)


df.to_csv('request_jg_102422.csv')
df.head(10)


Unnamed: 0,Run Id,Flow A Leak,Flow A Blocks,Flow A Crosses,Flow B Leak,Flow B Blocks,Flow B Crosses,Flow A Blocks count,Flow A Crosses count,Flow B Blocks count,Flow B Crosses count,A bad channels,A bad channels count,B bad channels,B bad channels count
78,D00974,,,,,,,0,0,0,0,,0,,0
80,D00972,,,,,,,0,0,0,0,,0,,0
81,D00971,,,,,,,0,0,0,0,,0,,0
82,D00970,,,,,,,0,0,0,0,,0,,0
85,D00967,,,,,,,0,0,0,0,,0,,0
86,D00966,,,,,,,0,0,0,0,,0,,0
89,D00963,,,"26, 27, 28",,26.0,,0,3,1,0,"26, 27, 28",3,26,1
90,D00962,,,,,,,0,0,0,0,,0,,0
91,D00961,,,,True,,,0,0,0,0,,0,"1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...",50
92,D00960,,,,,,,0,0,0,0,,0,,0


In [5]:
# Calculate some stuff

# Runs and channels
number_runs = len(df)
total_channels = 2 * 50 * number_runs

# Blocks
a_blocks = df['Flow A Blocks count'].sum()
b_blocks = df['Flow B Blocks count'].sum()
total_blocks = a_blocks + b_blocks

# Crosses
a_crosses = df['Flow A Crosses count'].sum()
b_crosses = df['Flow B Crosses count'].sum()
total_crosses = a_crosses + b_crosses

# Leaks
a_leaks = df['Flow A Leak'].sum()
b_leaks = df['Flow B Leak'].sum()
total_leaks = a_leaks + b_leaks
leaked_channels = 50 * total_leaks

# All fails
a_fails = df['A bad channels count'].sum()
b_fails = df['B bad channels count'].sum()
total_fails = a_fails + b_fails


normal_channels = total_channels - total_fails

print(f"In {number_runs} spatial ATAC-seq runs:")
print(f"{total_blocks/total_channels*100:.2f}% of channels had blocks")
print(f"{total_crosses/total_channels*100:.2f}% of channels had crosses")
print(f"{total_leaks/number_runs*100:.2f}% of runs had leaks")
print(f"{normal_channels/total_channels*100:.2f}% of channels flowed normally")


In 190 spatial ATAC-seq runs:
1.35% of channels had blocks
0.68% of channels had crosses
10.53% of runs had leaks
93.09% of channels flowed normally


False