In [1]:
# Import required libraries
import json
import pandas as pd
import datetime

In [2]:
# Read the patient json file and store it into patients
with open('./dataset/build/patients.json') as f:
    patients = json.load(f)
    
# Read the observation json file and store it into observations
with open('./dataset/build/observations.json') as f1:
    observations = json.load(f1)       

In [3]:
# Creating a dataframe to store patient ids, platelet mean value and effectiveDateTime
df = pd.DataFrame(columns=['patId', 'value', 'effectiveDateTime'])

In [4]:
# List to store patient ids
patIds = []

# List to store value quantity for code 32623-1 for that patient
valueList = []

# List to create effective date time for code 32623-1 for that patient
effectiveDateTimeList = []

# Iterate through each entry
for entries in observations['entry']:
    for item in entries['resource']['code']['coding']:
        if item['code'] == '32623-1':
            #Store all patient IDs
            patId = entries['resource']['subject']['reference']
            patIds.append(patId.replace('urn:uuid:',''))
            
            # Store all platelet mean value for particular patient
            valueList.append(entries['resource']['valueQuantity']['value'])
            
            # Split datetime into respective columns and then combine again in proper string format
            effectiveDate = entries['resource']['effectiveDateTime'].split('T')[0]
            effectiveTime = entries['resource']['effectiveDateTime'].split('T')[1].split('-')[0]
            effectiveDateTimeList.append(effectiveDate + ' ' + effectiveTime)           

In [5]:
# Display the total number of patient ids, valueList and effectiveDateTimeList -- All should be same
print(len(patIds))
print(len(valueList))
print(len(effectiveDateTimeList))

234118
234118
234118


In [6]:
# Creating a dataframe to store above details
df['patId'] = patIds
df['value'] = valueList
df['effectiveDateTime'] = effectiveDateTimeList

# Convert the effectiveDateTime into datetime format
df['effectiveDateTime'] = pd.to_datetime(df['effectiveDateTime'], format='%Y-%m-%d %H:%M:%S')

# Sort the dataframe based on ascending order of patient id and descending order of effective datetime and reset the index
df = df.sort_values(['patId', 'effectiveDateTime'], ascending=[True, False])
df = df.reset_index(drop=True)

In [7]:
# Pick the entries which have latest effective datetime after grouping by patient IDs
df = df.groupby(['patId']).head(1).reset_index(drop=True)

# List to store the amount of serum required based on platelet mean volume
serumRequiredList = []

# Calculate serum required
for val in df['value']:
    serumRequiredList.append((20*float(val)) + 10)

# Store each serum required value into df
df['serumRequired'] = serumRequiredList

# Since we have already filtered based on latest effective date, this column is no more required. 
# Assuming that the final way of choosing maximum number of patients to be saved is based on the serum available and required.
df.drop(['effectiveDateTime'], axis=1, inplace=True)

# As we have calculated serum required quanity for given platelet mean value, we no more need this column
df.drop(['value'], axis=1, inplace=True)

# Display the dataframe
df

Unnamed: 0,patId,serumRequired
0,00002ef2-e29a-433b-91cc-4c7516aea2f1,338.955746
1,0000db5c-14e8-4bbf-b5d3-5ab54a0f34d6,272.189680
2,0001af34-fb61-484f-9382-3d26639f4283,255.191680
3,000341dd-e395-4e8e-bb33-d52f8bc9e957,268.964422
4,00050350-bfdc-478c-a039-c2168c47dff4,337.193915
...,...,...
87170,fffbdcfe-0cd6-487d-b44d-53eb34de9e6a,542.591490
87171,fffc5e21-fbc0-4cb1-a474-c781ccfeb16d,428.981754
87172,fffe5553-3112-4621-a32d-525855eb6437,469.475689
87173,fffedf9b-9791-4ea8-b8f8-635b6a8f19bf,332.277355


In [8]:
# List to store patient ids
patIds = []

# List to store gp list for all patient ids
gpList = []

# List to store gp list for each patient id entry
gpListperPatient = []


# Iterate through each entry
for entries in patients['entry']:
    # Store patient ids
    patIds.append(entries['resource']['id'])
    
    # Run through the list of each gp id for a particular patient and store them in a list
    for gpIds in entries['resource']['generalPractitioner']:
        gpId = gpIds['reference']
        gpListperPatient.append(gpId.replace('urn:uuid:',''))
    
    # Storing all the gp ids for a particular patient
    gpList.append(gpListperPatient)
    
    # Empty the list to store the gp ids for next patient id
    gpListperPatient = []

In [9]:
# Creating a dataframe to store patient ids and list of gp ids
df1 = pd.DataFrame(columns=['patId', 'gpId'])

# Dump patient and gp ids
df1['patId'] = patIds
df1['gpId'] = gpList

# Display the dataframe
df1

Unnamed: 0,patId,gpId
0,00058c64-996b-42de-8692-d9db634ef859,"[ccbe592e-f5cd-4a1f-adb2-75c9ab3e8bd0, 75c752a..."
1,000eff0a-e969-4b00-941f-579519186980,"[9da2216e-3a62-4150-841a-895cd03521b6, 242c8c1..."
2,00159b3f-3d73-4164-a06c-8213afa3ef08,"[19576a22-c2bb-4688-ad56-38974af2f20d, 0aed01c..."
3,001ca35e-9353-47bb-945d-e78eca0f2cd5,"[7ba77cf5-16bc-47aa-824c-d16c0299e54e, de516e2..."
4,00277ad6-0534-4d7a-aec8-c3805c9c6750,"[35148109-cff4-4703-a066-2470e0a26d67, ee8728e..."
...,...,...
99995,ffdc058c-7a8e-4ed0-8255-68f8b16f6e44,"[d08a343d-7195-498a-9030-4309607f09a1, 162ffee..."
99996,ffe1dbc1-a5b0-4732-8b7e-6a3336957475,"[e17b2f27-d11c-4d49-ae7a-83b46dbc8f01, 37eefa5..."
99997,ffeb4e99-67de-4e22-9538-340728809706,[13331cac-2b83-4d0f-855f-ca48f03f860e]
99998,fff4a29e-a0ab-4a76-bc7c-ef2020868900,"[24cd731f-ae70-438b-ac5b-78c539393ba9, 5a7d554..."


In [10]:
# Creating a dataframe to store patient and gp ids
df2 = pd.DataFrame(columns=['gpId', 'patId'])

# Empty lists to store gp and patient ids
gpIdList = []
patIdList = []

# Unlist each gp entry and create a new entry
for i in range(0, len(df1)):
    for eachId in df1.iloc[i]['gpId']:
        gpIdList.append(eachId)
        patIdList.append(df1.iloc[i]['patId'])

# Dump gp and patient ids into dataframe        
df2['gpId'] = gpIdList
df2['patId'] = patIdList

# Display the dataframe
df2

Unnamed: 0,gpId,patId
0,ccbe592e-f5cd-4a1f-adb2-75c9ab3e8bd0,00058c64-996b-42de-8692-d9db634ef859
1,75c752a4-a41a-47bc-82b7-12eff5c38a57,00058c64-996b-42de-8692-d9db634ef859
2,9da2216e-3a62-4150-841a-895cd03521b6,000eff0a-e969-4b00-941f-579519186980
3,242c8c1f-a8b6-4813-b4a2-a10d4139aba1,000eff0a-e969-4b00-941f-579519186980
4,206bc163-d325-459e-b2b4-a79d111f82ef,000eff0a-e969-4b00-941f-579519186980
...,...,...
288451,d08a343d-7195-498a-9030-4309607f09a1,ffe1dbc1-a5b0-4732-8b7e-6a3336957475
288452,13331cac-2b83-4d0f-855f-ca48f03f860e,ffeb4e99-67de-4e22-9538-340728809706
288453,24cd731f-ae70-438b-ac5b-78c539393ba9,fff4a29e-a0ab-4a76-bc7c-ef2020868900
288454,5a7d554e-957e-4d53-909d-13f25584d55e,fff4a29e-a0ab-4a76-bc7c-ef2020868900


In [11]:
# left join df2 and df on patient id to get final df
finalDf = pd.merge(df2, df, on='patId',  how='left')

# Replace the NaN values with 0
finalDf['serumRequired'].fillna(0, inplace=True)

# Display the dataframe
finalDf

Unnamed: 0,gpId,patId,serumRequired
0,ccbe592e-f5cd-4a1f-adb2-75c9ab3e8bd0,00058c64-996b-42de-8692-d9db634ef859,353.017669
1,75c752a4-a41a-47bc-82b7-12eff5c38a57,00058c64-996b-42de-8692-d9db634ef859,353.017669
2,9da2216e-3a62-4150-841a-895cd03521b6,000eff0a-e969-4b00-941f-579519186980,50.357743
3,242c8c1f-a8b6-4813-b4a2-a10d4139aba1,000eff0a-e969-4b00-941f-579519186980,50.357743
4,206bc163-d325-459e-b2b4-a79d111f82ef,000eff0a-e969-4b00-941f-579519186980,50.357743
...,...,...,...
288451,d08a343d-7195-498a-9030-4309607f09a1,ffe1dbc1-a5b0-4732-8b7e-6a3336957475,260.428794
288452,13331cac-2b83-4d0f-855f-ca48f03f860e,ffeb4e99-67de-4e22-9538-340728809706,471.523986
288453,24cd731f-ae70-438b-ac5b-78c539393ba9,fff4a29e-a0ab-4a76-bc7c-ef2020868900,175.971974
288454,5a7d554e-957e-4d53-909d-13f25584d55e,fff4a29e-a0ab-4a76-bc7c-ef2020868900,175.971974


In [12]:
# Reading the input file to store N and test cases
with open('./input_files/Q3/P1/input.txt', 'r') as inputFile:
    inputText = inputFile.readlines()
    for i in range(0,len(inputText)):
        inputText[i] = inputText[i].replace('\n', '')

In [13]:
# Take input from the user for the number of entries
N = int(inputText[0])

# Create an empty list to store id and serum quantity available
lines = []

# Take gp ids and available serum from the user for the number of times the user entered above
for i in range(1,len(inputText)):
    lines.append(inputText[i])    

In [14]:
# List to store the max no. of patients that gp can save
result = []

# Iterate through each user entry
for line in lines:
    # Get the gpid and available serum quantity in ml by splitting the string based on space
    genPracId = line.split(' ')[0]
    availableSerum = float(line.split(' ')[1])
    
    # Filter the dataframe based on user entered gpid and exclude patients who don't have platelet mean value i.e. serumRequired = 0
    filteredDf = finalDf[finalDf['gpId'] == genPracId][['patId', 'serumRequired']]
    filteredDf = filteredDf[filteredDf['serumRequired']!=0].sort_values(['serumRequired'])
    
    # Conver the column into list for easy calculations
    serumRequired = filteredDf['serumRequired'].tolist()
    
    # Set a counter to 0
    counter = 0
    
    if len(serumRequired) == 0:
        result.append(counter)
    else:
        # Loop through each element in the list
        for i in range(0, len(serumRequired)):
            # Calculate the available serum quantity
            availableSerum = availableSerum - serumRequired[i]
        
            # Add a counter if available serum is more than 0 else break the loop and go to next entry
            if availableSerum >= 0:
                counter = counter + 1
            else:
                break
    
        # Store the counter in the result list
        result.append(counter)

In [15]:
# Store the resultant output in a txt file
with open('./problemC1Output.txt', 'w') as f:
    for i in range(0,len(result)):
        text = "Test " + str(i+1) + ": " + str(result[i]) + "\n"
        f.write(text)