In [None]:

# Script to extract and process CASA data from Hamilton Thorne IVOS II system.
# It reads .DBS and .DBT files, cleans and formats data, and exports it to .csv and .xlsx files.


In [9]:
import pandas as pd
import os

In [10]:
# === Block 2: Set date and file location ===
date = 20250715  # Change to today's date or the experiment batch date
folder = 'DBST_files'  # Folder containing your .DBS and .DBT files

# Make sure the folder exists (optional safety check)
if not os.path.exists(folder):
    raise FileNotFoundError(f"The folder '{folder}' does not exist.")

In [13]:
# Columns to keep in the average metrics files
keep = ['VAP', 'VSL', 'VCL', 'ALH', 'BCF', 'STR', 'LIN', 'ELONGATION', 'SIZE',
        'VAP_SD', 'VSL_SD', 'VCL_SD', 'ALH_SD', 'BCF_SD', 'STR_SD', 'LIN_SD',
        'ELONGATION_SD', 'SIZE_SD', 'TOTAL_COUNT', 'MOTILE_COUNT', 'PROGRESSIVE_COUNT', 'RAPID_COUNT',
        'MEDIUM_COUNT', 'SLOW_COUNT', 'STATIC_COUNT', 'SORT1_COUNT', 'SORT2_COUNT']

sort = ['SAMPLE', 'TOTAL_COUNT', 'MOTILE_COUNT', 'PROGRESSIVE_COUNT',
        'HYP1_COUNT', 'HYP2_COUNT', 'VAP', 'VSL', 'VCL', 'ALH', 'BCF', 'STR', 'LIN', 'ELONGATION', 'SIZE',
        'VAP_SD', 'VSL_SD', 'VCL_SD', 'ALH_SD', 'BCF_SD', 'STR_SD', 'LIN_SD',
        'ELONGATION_SD', 'SIZE_SD', 'RAPID_COUNT', 'MEDIUM_COUNT', 'SLOW_COUNT', 'STATIC_COUNT']

# Columns to keep and order in the single-sperm tracking data
keepsperm = ['FIELD#', 'TRACK#', 'TRACK_TYPE', 'POINTS', 'VAP',
             'VSL', 'VCL', 'ALH', 'BCF', 'STR', 'LIN', 'ELONGATION', 'SIZE',
             'SIZE_PIXELS', 'INTENSITY', 'SORTED']

sortsperm = ['SAMPLE', 'FIELD#', 'TRACK#', 'TRACK_TYPE', 'POINTS', 'VAP',
             'VSL', 'VCL', 'ALH', 'BCF', 'STR', 'LIN', 'ELONGATION', 'SIZE',
             'SIZE_PIXELS', 'INTENSITY', 'SORTED']


compiledf = pd.DataFrame()

for filename in  os.listdir(f'{folder}'):
    if filename.endswith('.DBS'):
        file = f'{folder}/{filename}'
        base, ext = os.path.splitext(filename)
        condition = base
        totaldf = pd.read_csv(file, sep='\t', encoding = 'ascii')
        totaldf = totaldf.dropna(axis = 1, how = 'all')
        totalclean = totaldf[keep].copy()
        totalclean.loc[:,'SAMPLE'] = condition
        compiledf = pd.concat([totalclean, compiledf], ignore_index = True)
compiledf = compiledf.rename(columns = {'SORT1_COUNT':'HYP1_COUNT','SORT2_COUNT':'HYP2_COUNT'})
compiledf = compiledf[sort]
compiledf = compiledf.sort_values(by='SAMPLE').reset_index(drop=True)
compiledf.to_csv(f'{folder}/{date}_CASA.csv', index = False)
compiledf.to_excel(f'{folder}/{date}_CASA.xlsx', index = False)

compilesperm = pd.DataFrame()

for filename in  os.listdir(f'{folder}'):
    if filename.endswith('.DBT'):
        file = f'{folder}/{filename}'
        base, ext = os.path.splitext(filename)
        conditionsperm = base
        spermdf = pd.read_csv(file, sep='\t', encoding = 'ascii')
        available_cols = [col for col in keepsperm if col in spermdf.columns]
        if not available_cols:
            print(f'Skipping {filename}: no relevant columns found')
            continue
        spermclean = spermdf[available_cols].copy()
        spermclean.loc[:,'SAMPLE'] = conditionsperm
        compilesperm = pd.concat([compilesperm, spermclean], ignore_index=True)

compilesperm = compilesperm.sort_values(by=['SAMPLE', 'FIELD#', 'TRACK#']).reset_index(drop=True)
compilesperm = compilesperm[sortsperm]

# Export files:

compilesperm.to_csv(f'{folder}/{date}_sperm.csv', index = False)
compilesperm.to_excel(f'{folder}/{date}_sperm.xlsx', index = False)

In [7]:
compilesperm

Unnamed: 0,SAMPLE,FIELD#,TRACK#,TRACK_TYPE,POINTS,VAP,VSL,VCL,ALH,BCF,STR,LIN,ELONGATION,SIZE,SIZE_PIXELS,INTENSITY,SORTED
0,6BN60,1,5,Medium,30,20.7,17.8,31.2,1.3,33.6,86,57,67,5.5,10.2,137.6,YES
1,6BN60,1,6,Rapid,30,84.6,12.2,143.0,8.4,41.5,14,9,45,7.9,12.1,136.5,YES
2,6BN60,1,8,Rapid,12,67.2,29.8,116.3,6.1,0.0,44,26,34,11.5,24.0,138.5,YES
3,6BN60,1,9,Medium,29,40.0,17.9,104.7,6.7,45.7,45,17,31,11.2,29.4,134.8,YES
4,6BN60,1,10,Rapid,19,63.4,42.7,134.6,9.9,60.0,67,32,40,4.9,14.1,120.2,YES
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2725,NCT60H,2,90,Rapid,21,171.3,125.8,333.7,14.8,40.0,73,38,71,27.2,11.1,125.9,YES
2726,NCT60H,2,96,Rapid,23,126.1,30.6,326.3,23.6,55.2,24,9,78,8.0,15.5,110.8,YES
2727,NCT60H,2,132,Rapid,12,181.3,59.4,313.7,9.3,37.5,33,19,31,16.6,22.8,125.9,YES
2728,NCT60H,2,133,Rapid,20,104.0,25.0,277.1,16.3,44.2,24,9,77,5.7,13.2,121.5,YES
