In [14]:
import pandas as pd
from datetime import datetime
from functools import partial
f = partial(pd.to_datetime, dayfirst=True)

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

# READ THE DATA FROM CSV TO PANDAS
# Download the CSV, add it to project folder. Change filepath below, or if you add file it will just work.
df = pd.read_csv(
    filepath_or_buffer="c4591001-A-D-adva.csv",
    low_memory=False,
)

# CONVERT THE ADT DATE TO DATETIME SO WE CAN FILTER IT LATER
df['ADT'] = pd.to_datetime(df['ADT'])

In [34]:

# QUERY DATA 
# Looking for ALL Placebo patients NEGATIVE at VISIT 1
# We add to that ALL Placebo patients POSITIVE at VISIT 3
placebo_neg_pos = df[['USUBJID', 'ACTARM','PARAM', 'VISIT', 'AVALC','ADT']][
    ((df.ACTARM.str.contains('Placebo')) & (df['PARAM'] == 'N-binding antibody - N-binding Antibody Assay') & (df['ADT'] <= datetime(2020, 9, 15))) &
    ((df['VISIT'] == 'V1_DAY1_VAX1_L') & (df['AVALC'] == 'NEG')) | 
    ((df['VISIT'] == 'V3_MONTH1_POSTVAX2_L') & (df['AVALC'] == 'POS'))
    ].drop_duplicates()

# This step counts the results, returning those which count to two.
# Patients that appear twice had both v1_ NEGATIVE and v3_ Positive
placebo_neg_pos_results = placebo_neg_pos[placebo_neg_pos.groupby('USUBJID').USUBJID.transform('count') == 2]

# Repeat, this time for vaccinated
vaccinated_neg_pos = df[['USUBJID', 'ACTARM','PARAM', 'VISIT', 'AVALC', 'ADT']][
    ((df.ACTARM.str.contains('BNT162b2')) & (df['PARAM'] == 'N-binding antibody - N-binding Antibody Assay') & (df['ADT'] <= datetime(2020, 9, 15))) &
    ((df['VISIT'] == 'V1_DAY1_VAX1_L') & (df['AVALC'] == 'NEG')) | 
    ((df['VISIT'] == 'V3_MONTH1_POSTVAX2_L') & (df['AVALC'] == 'POS'))
    ].drop_duplicates()

vaccinated_neg_pos_results = vaccinated_neg_pos[vaccinated_neg_pos.groupby('USUBJID').USUBJID.transform('count') == 2]

# PRINT TO CSV
placebo_neg_pos_results.to_csv('placebo_neg_pos.csv')
vaccinated_neg_pos_results.to_csv('vaccinated_neg_pos.csv')

# PRINT THE RESULTS
print(f"Placebo: {placebo_neg_pos_results.count()/2} Vaccinated: {vaccinated_neg_pos_results.count()/2}")

Placebo: USUBJID    146.0
ACTARM     146.0
PARAM      146.0
VISIT      146.0
AVALC      146.0
ADT        146.0
dtype: float64 Vaccinated: USUBJID    62.0
ACTARM     62.0
PARAM      62.0
VISIT      62.0
AVALC      62.0
ADT        62.0
dtype: float64
