In [4]:
import pandas as pd
from datetime import datetime
from functools import partial

# SETUP OPTIONAL PANDAS OPTIONS
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199



In [5]:
# READ THE DATA FROM CSV TO PANDAS

# You will need to download the .csv from Jikky, as was originally posted:
# You will need to rename the file so its a .csv extension same as its written on line 9 below
# https://files.catbox.moe/i544mb.zip

# Download the CSV, rename it, add it to project folder.
df = pd.read_csv(
    filepath_or_buffer="c4591001-A-D-adva.csv",
    low_memory=False,
)

# CONVERT THE ADT DATE TO DATETIME SO WE CAN FILTER IT LATER
df['ADT'] = pd.to_datetime(df['ADT'])

In [6]:
# QUERY DATA 
# Looking for ALL Placebo patients NEGATIVE at VISIT 1
# We add to that ALL Placebo patients POSITIVE at VISIT 3
placebo_neg_pos = df[['USUBJID', 'ACTARM','PARAM', 'VISIT', 'AVALC','ADT']][
    ((df['ACTARM'] == 'Placebo') & (df['PARAM'] == 'N-binding antibody - N-binding Antibody Assay')) &
    ((df['VISIT'] == 'V1_DAY1_VAX1_L') & (df['AVALC'] == 'NEG')) | 
    ((df['VISIT'] == 'V3_MONTH1_POSTVAX2_L') & (df['AVALC'] == 'POS') & (df['ADT'] <= datetime(2020, 11, 15)))
    ]

# This step counts the unique subject IDs, returning those which count to two.
# Subjects that appear twice had both v1_ NEGATIVE and v3_ Positive
placebo_neg_pos_results = placebo_neg_pos[placebo_neg_pos.groupby('USUBJID').USUBJID.transform('count') == 2]

# Repeat, this time for vaccinated
vaccinated_neg_pos = df[['USUBJID', 'ACTARM','PARAM', 'VISIT', 'AVALC', 'ADT']][
    ((df['ACTARM']=='BNT162b2 Phase 2/3 (30 mcg)') & (df['PARAM'] == 'N-binding antibody - N-binding Antibody Assay')) &
    ((df['VISIT'] == 'V1_DAY1_VAX1_L') & (df['AVALC'] == 'NEG')) | 
    ((df['VISIT'] == 'V3_MONTH1_POSTVAX2_L') & (df['AVALC'] == 'POS')& (df['ADT'] <= datetime(2020, 11, 15)))
    ]

vaccinated_neg_pos_results = vaccinated_neg_pos[vaccinated_neg_pos.groupby('USUBJID').USUBJID.transform('count') == 2]

# PRINT TO CSV
placebo_neg_pos_results.to_csv('placebo_neg_pos.csv')
vaccinated_neg_pos_results.to_csv('vaccinated_neg_pos.csv')

# PRINT THE RESULTS
print(f"Placebo: {placebo_neg_pos_results.count()/2} Vaccinated: {vaccinated_neg_pos_results.count()/2}")

Placebo: USUBJID    160.0
ACTARM     160.0
PARAM      160.0
VISIT      160.0
AVALC      160.0
ADT        160.0
dtype: float64 Vaccinated: USUBJID    75.0
ACTARM     75.0
PARAM      75.0
VISIT      75.0
AVALC      75.0
ADT        75.0
dtype: float64
