In [2]:
from __future__ import division
import numpy as np
import pandas as pd

# Script A
Take KIN file which contains one curve per set. Input file is a saved set file from such KIN:

| Set name | Curve index |
|---|---|
| Target_1 | 1 |
| Target_2 | 3 |
| Target_3 | 6 |
| ... | ... |

Following bit produces group file that can be loaded into original KIN. It generates two groups. One of them contains all sets whose name length is 8 and another one contains all sets whose name length is 9. Empty sets are omitted, even if their name has 8 or 9 characters.

Notice that Curve index **is not the same** as Set index.

| Set name | Set index |
|---|---|
| Length 8 | 1,2,4,5,9,... |
| Length 9 | 3,6,8,11,... |

In [97]:
files_in  = ['scrubbed sets 0.txt',
             'scrubbed sets 1.txt',
             'scrubbed sets 2.txt']

files_out = ['scrubbed length groups 0.txt',
             'scrubbed length groups 1.txt',
             'scrubbed length groups 2.txt']

for i, file_in in enumerate(files_in):
    df = pd.read_csv(file_in, sep = '\t', header = None, keep_default_na = False)
    
    # add set index
    df[2] = range(len(df))
        
    # drop empty sets
    df = df[df[1] >= 0]
    
    df[0] = df[0].map(len)
    df = df.groupby(0).agg(lambda x : ','.join([str(e) for e in x]))
    
    # keep only groups of sets with name length 8 or 9, drop curve indices
    df = df.loc[[8, 9]].reset_index().drop(1, axis = 1)
    df[0] = ['Length 8', 'Length 9']
    df.to_csv(files_out[i], sep = '\t', index = False, header = False)

# Script B
Take KIN file which contains one target (one labeled curve) per set. Set name matches curve label. Labels can repeat. Input file is a saved set file from such KIN:

| Set name | Curve index |
|---|---|
| Target_1 | 1 |
| Target_2 | 3 |
| Target_3 | 6 |
| ... | ... |

Following bit produces set file that can be loaded into original KIN. New set file will produce sets where all curves with the same label are put into a set of the same name.

| Set name | Curve index |
|---|---|
| Target_1 | 1,14,207 |
| Target_2 | 3, 18, 100, 267 |
| Target_3 | 6, 22 |
| ... | ... |

In [99]:
file_in  = 'set indices scrubbed length 9.txt'
file_out = 'sets scrubbed length 9 regrouped.txt'

# file_in  = 'mixed sets.txt'
# file_out = 'mixed sets regrouped.txt'

df = pd.read_csv(file_in, sep = '\t', header = None)
df = df.groupby(0).agg(lambda x : ','.join([str(e) for e in x]))
df.reset_index(inplace = True)
df['len'] = df[0].map(len)
df = df.sort(columns = ['len', 0]).drop('len', axis = 1)
df.to_csv(file_out, sep = '\t', index = False, header = False)

# Script C

Takes output fit and endpoint txt files of two KIN files, stitches them together, dropping repeating columns.

In [3]:
file_fit1 = '1-to-1 Fit Streptavidin scrubbed length 8.txt'
file_fit2 = '1-to-1 Fit Streptavidin scrubbed length 9.txt'
file_ep1 = 'Endpoint Streptavidin scrubbed length 8.txt'
file_ep2 = 'Endpoint Streptavidin scrubbed length 9.txt'
file_out = '1-to-1 Fit and Endpoint Streptavidin scrubbed length 8 and 9.csv'

fit1 = pd.read_csv(file_fit1, sep = '\t', header = 0)
fit2 = pd.read_csv(file_fit2, sep = '\t', header = 0)
ep1 = pd.read_csv(file_ep1, sep = '\t', header = 0)
ep2 = pd.read_csv(file_ep2, sep = '\t', header = 0)

fit1_ep1 = pd.concat([fit1, ep1.iloc[:, 7:]], axis = 1)
fit2_ep2 = pd.concat([fit2, ep2.iloc[:, 7:]], axis = 1)
fit_ep = pd.concat([fit1_ep1, fit2_ep2], axis = 0)
fit_ep.to_csv(file_out, index = False)