In [1]:
import pandas as pd

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
xlsx_path = '/content/drive/MyDrive/blood_data/cbc information.xlsx'

dataset = pd.read_excel(xlsx_path)

In [4]:
normal_ranges = {
    "WBC": (4.0, 10.0),
    "LYMp": (20.0, 40.0),
    "MIDp": (1.0, 15.0),
    "NEUTp": (50.0, 70.0),
    "LYMn": (0.6, 4.1),
    "MIDn": (0.1, 1.8),
    "NEUTn": (2.0, 7.8),
    "RBC": (3.50, 5.50),
    "HGB": (11.0, 16.0),
    "HCT": (36.0, 48.0),
    "MCV": (80.0, 99.0),
    "MCH": (26.0, 32.0),
    "MCHC": (32.0, 36.0),
    "RDWSD": (37.0, 54.0),
    "RDWCV": (11.5, 14.5),
    "PLT": (100, 400),
    "MPV": (7.4, 10.4),
    "PDW": (10.0, 17.0),
    "PCT": (0.10, 0.28),
    "PLCR": (13.0, 43.0)
}

In [5]:
# Based on reasearch https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7094281/

def check_row_pneumonia(row):
    viral_pneumonia_check = row['WBC'] < normal_ranges['WBC'][0] and row['LYMn'] < normal_ranges['LYMn'][0]
    bacterial_pneumonia_check = row['PCT'] > normal_ranges['PCT'][1] or (row['WBC'] > normal_ranges['WBC'][1] and row['NEUTp'] > normal_ranges['NEUTp'][1])

    if viral_pneumonia_check or bacterial_pneumonia_check:
        return 1  # 1 indicates presence of pneumonia
    return 0  # 0 indicates absence of pneumonia

In [6]:
def check_col_general_deviation(row, col):
    if row[col] < normal_ranges[col][0] or row[col] > normal_ranges[col][1]:
        return 1 # 1 indicates presence of deviation
    return 0 # 0 indicates absence of deviation

def apply_check_row(row):
    for col in row.index[1:]:
        if check_col_general_deviation(row, col):
          return 1
    return 0

In [7]:
dataset['deviation'] = dataset.apply(apply_check_row, axis=1)
dataset['pneumonia'] = dataset.apply(check_row_pneumonia, axis=1)

In [8]:
dataset

Unnamed: 0,ID,WBC,LYMp,MIDp,NEUTp,LYMn,MIDn,NEUTn,RBC,HGB,...,MCHC,RDWSD,RDWCV,PLT,MPV,PDW,PCT,PLCR,deviation,pneumonia
0,1,10.0,43.2,6.7,50.1,4.3,0.7,5.0,2.77,7.3,...,30.1,35.3,11.4,189.0,9.2,12.5,0.17,22.30,1,0
1,2,10.0,42.4,5.3,52.3,4.2,0.5,5.3,2.84,7.3,...,20.2,35.3,11.4,180.0,8.9,12.5,0.16,19.50,1,0
2,3,7.2,30.7,8.6,60.7,2.2,0.6,4.4,3.97,9.0,...,29.5,37.2,13.7,148.0,10.1,14.3,0.14,30.50,1,0
3,4,6.0,30.2,6.3,63.5,1.8,0.4,3.8,4.22,3.8,...,29.8,46.5,17.0,143.0,8.6,11.3,0.12,16.40,1,0
4,5,4.2,39.1,7.2,53.7,1.6,0.3,2.3,3.93,0.4,...,29.7,42.7,15.1,236.0,19.5,12.8,0.22,24.80,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,496,2.7,43.4,7.1,49.5,1.2,0.2,1.3,4.77,13.2,...,31.7,37.2,12.2,169.0,10.1,14.3,0.17,28.60,1,0
496,497,6.2,35.0,6.9,57.6,2.2,0.4,3.6,4.82,11.6,...,31.4,37.2,13.8,177.0,9.9,13.6,0.18,29.00,1,0
497,498,8.4,29.2,7.3,63.5,2.0,0.5,4.3,4.40,9.9,...,33.0,38.0,10.6,133.1,9.6,12.8,0.12,24.60,1,0
498,499,7.4,19.0,8.5,72.5,0.8,0.3,2.9,3.34,7.4,...,30.9,36.2,11.0,125.0,10.7,15.9,0.13,33.60,1,0


In [9]:
dataset.to_excel('/content/drive/MyDrive/blood_data/cbc-dataset-refined.xlsx', index=False)