In [1]:
# Importing the libraries
import pandas as pd

In [2]:
# Importing the dataset
data=pd.read_csv('healthmonitoring.csv')
data.head()

Unnamed: 0,PatientID,Age,Gender,HeartRate,BloodPressure,RespiratoryRate,BodyTemperature,ActivityLevel,OxygenSaturation,SleepQuality,StressLevel,Timestamp
0,1,69,Male,60.993428,130/85,15,98.885236,resting,95.0,excellent,low,2024-04-26 17:28:55.286711
1,2,32,Male,98.723471,120/80,23,98.281883,walking,97.0,good,high,2024-04-26 17:23:55.286722
2,3,78,Female,82.295377,130/85,13,98.820286,resting,98.0,fair,high,2024-04-26 17:18:55.286726
3,4,38,Female,80.0,111/78,19,98.412594,running,98.0,poor,moderate,2024-04-26 17:13:55.286728
4,5,41,Male,87.531693,120/80,14,99.369871,resting,98.0,good,low,2024-04-26 17:08:55.286731


In [3]:
# Checking for missing values
data.isnull().sum()

PatientID             0
Age                   0
Gender                0
HeartRate             0
BloodPressure         0
RespiratoryRate       0
BodyTemperature      18
ActivityLevel         0
OxygenSaturation    163
SleepQuality          0
StressLevel           0
Timestamp             0
dtype: int64

In [4]:
# Filling missing values with median
data.fillna({'BodyTemperature':data['BodyTemperature'].median()},inplace=True)
data.fillna({'OxygenSaturation':data['OxygenSaturation'].median()},inplace=True)

In [21]:
# Checking data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   PatientID          500 non-null    int64         
 1   Age                500 non-null    int64         
 2   ACategory          500 non-null    object        
 3   Gender             500 non-null    object        
 4   HeartRate          500 non-null    float64       
 5   HRCategory         500 non-null    object        
 6   BloodPressure      500 non-null    object        
 7   SystolicPressure   500 non-null    int64         
 8   DiastolicPressure  500 non-null    int64         
 9   BPCategory         500 non-null    object        
 10  RespiratoryRate    500 non-null    int64         
 11  RRCategory         500 non-null    object        
 12  BodyTemperature    500 non-null    float64       
 13  BTCategory         500 non-null    object        
 14  ActivityLe

In [19]:
# Changing Timestamp to datetime
data['Timestamp']=pd.to_datetime(data['Timestamp'])

In [5]:
#  Viewing the descriptive statistics
data.describe()

Unnamed: 0,PatientID,Age,HeartRate,RespiratoryRate,BodyTemperature,OxygenSaturation
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,51.146,80.131613,17.524,98.584383,96.296
std,144.481833,19.821566,9.606273,3.382352,0.461502,1.408671
min,1.0,18.0,60.169259,12.0,97.094895,94.0
25%,125.75,34.0,75.0,15.0,98.281793,96.0
50%,250.5,51.0,80.0,17.5,98.609167,96.0
75%,375.25,69.0,86.276413,20.0,98.930497,97.0
max,500.0,84.0,99.925508,23.0,99.48915,99.0


In [6]:
# Extracting systolicPressure and diastolicPressure blood pressure from BloodPressure
data['SystolicPressure']=data['BloodPressure'].apply(lambda x: int(x.split('/')[0]))
data['DiastolicPressure']=data['BloodPressure'].apply(lambda x: int(x.split('/')[1]))


In [7]:
# Viewing the data after extraction
data.head()

Unnamed: 0,PatientID,Age,Gender,HeartRate,BloodPressure,RespiratoryRate,BodyTemperature,ActivityLevel,OxygenSaturation,SleepQuality,StressLevel,Timestamp,SystolicPressure,DiastolicPressure
0,1,69,Male,60.993428,130/85,15,98.885236,resting,95.0,excellent,low,2024-04-26 17:28:55.286711,130,85
1,2,32,Male,98.723471,120/80,23,98.281883,walking,97.0,good,high,2024-04-26 17:23:55.286722,120,80
2,3,78,Female,82.295377,130/85,13,98.820286,resting,98.0,fair,high,2024-04-26 17:18:55.286726,130,85
3,4,38,Female,80.0,111/78,19,98.412594,running,98.0,poor,moderate,2024-04-26 17:13:55.286728,111,78
4,5,41,Male,87.531693,120/80,14,99.369871,resting,98.0,good,low,2024-04-26 17:08:55.286731,120,80


In [8]:
# Function to categirize BloodPressure column
def blood_pressure_category(systolic,diastolic):
    if systolic<120 and diastolic<80:
        return 'Normal'
    elif systolic>=120 and systolic<=129 and diastolic<80:
        return 'Elevated'
    elif systolic>=130 and systolic<=139 or diastolic>=80 and diastolic<=89:
        return 'High Blood Pressure (Hypertension) Stage 1'
    elif systolic>=140 or diastolic>=90:
        return 'High Blood Pressure (Hypertension) Stage 2'
    elif systolic>180 or diastolic>120:
        return 'Hypertensive Crisis'

In [9]:
# Function to categorize BodyTemperature column
def body_temperature_category(temp):
    if temp<95:
        return 'Low'
    elif temp>=95 and temp<=98.6:
        return 'Normal'
    elif temp>98.6 and temp<=100.4:
        return 'Fever'
    elif temp>100.4:
        return 'High Fever'

In [10]:
# Function to categorize OxygenSaturation column
def oxygen_saturation_category(saturation):
    if saturation>=95:
        return 'Normal'
    elif saturation<95:
        return 'Low'

In [11]:
# Function to categorize HeartRate column
def heart_rate_category(rate):
    if rate>=60 and rate<=100:
        return 'Normal'
    elif rate<60:
        return 'Low'
    elif rate>100:
        return 'High'

In [12]:
# Function to categorize Age column
def age_category(age):
    if age<18:
        return 'Child'
    elif age>=18 and age<=64:
        return 'Adult'
    elif age>64:
        return 'Senior'

In [13]:
# Function to categorize RespiratoryRate column
def respiratory_rate_category(rate):
    if rate>=12 and rate<=16:
        return 'Normal'
    elif rate<12:
        return 'Low'
    elif rate>16:
        return 'High'

In [14]:
# Applying the functions to the respective columns
data['BPCategory']=data.apply(lambda x: blood_pressure_category(x['SystolicPressure'],x['DiastolicPressure']),axis=1)
data['BTCategory']=data['BodyTemperature'].apply(lambda x: body_temperature_category(x))
data['OSCategory']=data['OxygenSaturation'].apply(lambda x: oxygen_saturation_category(x))
data['HRCategory']=data['HeartRate'].apply(lambda x: heart_rate_category(x))
data['ACategory']=data['Age'].apply(lambda x: age_category(x))
data['RRCategory']=data['RespiratoryRate'].apply(lambda x: respiratory_rate_category(x))

In [15]:
# re arranging the columns
data=data[['PatientID','Age','ACategory','Gender','HeartRate','HRCategory','BloodPressure','SystolicPressure','DiastolicPressure','BPCategory',
           'RespiratoryRate','RRCategory','BodyTemperature','BTCategory','ActivityLevel','OxygenSaturation','OSCategory','SleepQuality','StressLevel',
           'Timestamp']]

In [16]:
# Viewing the data after categorization
data.head()

Unnamed: 0,PatientID,Age,ACategory,Gender,HeartRate,HRCategory,BloodPressure,SystolicPressure,DiastolicPressure,BPCategory,RespiratoryRate,RRCategory,BodyTemperature,BTCategory,ActivityLevel,OxygenSaturation,OSCategory,SleepQuality,StressLevel,Timestamp
0,1,69,Senior,Male,60.993428,Normal,130/85,130,85,High Blood Pressure (Hypertension) Stage 1,15,Normal,98.885236,Fever,resting,95.0,Normal,excellent,low,2024-04-26 17:28:55.286711
1,2,32,Adult,Male,98.723471,Normal,120/80,120,80,High Blood Pressure (Hypertension) Stage 1,23,High,98.281883,Normal,walking,97.0,Normal,good,high,2024-04-26 17:23:55.286722
2,3,78,Senior,Female,82.295377,Normal,130/85,130,85,High Blood Pressure (Hypertension) Stage 1,13,Normal,98.820286,Fever,resting,98.0,Normal,fair,high,2024-04-26 17:18:55.286726
3,4,38,Adult,Female,80.0,Normal,111/78,111,78,Normal,19,High,98.412594,Normal,running,98.0,Normal,poor,moderate,2024-04-26 17:13:55.286728
4,5,41,Adult,Male,87.531693,Normal,120/80,120,80,High Blood Pressure (Hypertension) Stage 1,14,Normal,99.369871,Fever,resting,98.0,Normal,good,low,2024-04-26 17:08:55.286731


In [22]:
# Checking for data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   PatientID          500 non-null    int64         
 1   Age                500 non-null    int64         
 2   ACategory          500 non-null    object        
 3   Gender             500 non-null    object        
 4   HeartRate          500 non-null    float64       
 5   HRCategory         500 non-null    object        
 6   BloodPressure      500 non-null    object        
 7   SystolicPressure   500 non-null    int64         
 8   DiastolicPressure  500 non-null    int64         
 9   BPCategory         500 non-null    object        
 10  RespiratoryRate    500 non-null    int64         
 11  RRCategory         500 non-null    object        
 12  BodyTemperature    500 non-null    float64       
 13  BTCategory         500 non-null    object        
 14  ActivityLe

In [17]:
# Saving the data to a new csv file
data.to_csv('processed_data.csv',index=False)