In [157]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress, pearsonr
from pathlib import Path

In [158]:
#Path to CSV file
maternal_data = Path(r"C:\Users\laris\Documents\Project1group3\Project1group3\maternalhealth.csv")
maternal_data_df= pd.read_csv(maternal_data)

#Print DataFrame
maternal_data_df

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk
...,...,...,...,...,...,...,...
1009,22,120,60,15.0,98.0,80,high risk
1010,55,120,90,18.0,98.0,60,high risk
1011,35,85,60,19.0,98.0,86,high risk
1012,43,120,90,18.0,98.0,70,high risk


In [159]:
#Count each risk level (low, mid, high) before dropping the duplicates.
risk_level = maternal_data_df['RiskLevel'].value_counts()
risk_level

RiskLevel
low risk     406
mid risk     336
high risk    272
Name: count, dtype: int64

In [160]:
#Drop all the duplicates on the DataFrame
new_maternal_data_df = maternal_data_df.drop_duplicates(subset=None, keep='first', inplace=False)
new_maternal_data_df

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk
...,...,...,...,...,...,...,...
673,12,100,50,6.4,98.0,70,mid risk
674,15,100,60,6.0,98.0,80,low risk
703,15,100,49,7.6,98.0,77,low risk
704,12,100,50,6.0,98.0,70,mid risk


In [161]:
#Count each risk level (low, mid, high) after dropping the duplicates.
risk_level = new_maternal_data_df['RiskLevel'].value_counts()
risk_level

RiskLevel
low risk     234
high risk    112
mid risk     106
Name: count, dtype: int64

In [166]:
#Adding a new column 'Woman ID' with unique identifiers starting from 1
new_maternal_data_df['Woman_ID'] = range(1, len(new_maternal_data_df) + 1)

#Reordering teh DataFrame columns
columns = ['Woman_ID'] + [col for col in new_maternal_data_df.columns if col != 'Woman_ID']
new_maternal_data_df = new_maternal_data_df[columns]

#Print
new_maternal_data_df.head()

Unnamed: 0,Woman_ID,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,1,25,130,80,15.0,98.0,86,high risk
1,2,35,140,90,13.0,98.0,70,high risk
2,3,29,90,70,8.0,100.0,80,high risk
3,4,30,140,85,7.0,98.0,70,high risk
4,5,35,120,60,6.1,98.0,76,low risk


In [169]:
#Counting how many woman are being studied in this DF after the dropping the duplicates
women = new_maternal_data_df["Woman_ID"].nunique()
women

452

In [167]:
# Renaming column for better understanding
renamed_df = new_maternal_data_df.rename(columns={"Woman_ID": "Woman ID", "BS":"Blood Sugar", "SystolicBP":"Systolic Blood Pressure",
                                             "DiastolicBP":"Diastolic Blood Pressure", "BodyTemp":"Body Temp.",
                                             "HeartRate":"Heart Rate", "RiskLevel":"Risk Level"})
renamed_df.head()

Unnamed: 0,Woman ID,Age,Systolic Blood Pressure,Diastolic Blood Pressure,Blood Sugar,Body Temp.,Heart Rate,Risk Level
0,1,25,130,80,15.0,98.0,86,high risk
1,2,35,140,90,13.0,98.0,70,high risk
2,3,29,90,70,8.0,100.0,80,high risk
3,4,30,140,85,7.0,98.0,70,high risk
4,5,35,120,60,6.1,98.0,76,low risk


In [168]:
#Create a new colum "Blood Pressure" by combining "Systolic Blood Pressure"
renamed_df.loc[:, 'Blood Pressure'] = renamed_df['Systolic Blood Pressure'].astype(str) + '/' + renamed_df['Diastolic Blood Pressure'].astype(str)
column_to_move = renamed_df.pop('Blood Pressure')
renamed_df.insert(4, 'Blood Pressure', column_to_move)

#Print
renamed_df.head()

Unnamed: 0,Woman ID,Age,Systolic Blood Pressure,Diastolic Blood Pressure,Blood Pressure,Blood Sugar,Body Temp.,Heart Rate,Risk Level
0,1,25,130,80,130/80,15.0,98.0,86,high risk
1,2,35,140,90,140/90,13.0,98.0,70,high risk
2,3,29,90,70,90/70,8.0,100.0,80,high risk
3,4,30,140,85,140/85,7.0,98.0,70,high risk
4,5,35,120,60,120/60,6.1,98.0,76,low risk
