## Predictors of Pregnancy Risks

In [1]:
# Dependencies and Setup
%matplotlib inline

import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import scipy.stats as st
from scipy.stats import linregress, pearsonr

In [2]:
# Specifying path to CSV file
file_path = 'C:\\Users\\lnata\\GitHub\\Project1group3\\Resources\\maternal_health.csv'

# Read the .csv file
maternal_health = pd.read_csv(file_path)

In [3]:
# Print DataFrame
maternal_health.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [4]:
# Adding a new column 'Woman_ID' with unique identifiers starting from 1
maternal_health['Woman_ID'] = range(1, len(maternal_health) + 1)

# Reordering the DataFrame columns
columns = ['Woman_ID'] + [col for col in maternal_health.columns if col != 'Woman_ID']
maternal_health = maternal_health[columns]

# Print
maternal_health.head()

Unnamed: 0,Woman_ID,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,1,25,130,80,15.0,98.0,86,high risk
1,2,35,140,90,13.0,98.0,70,high risk
2,3,29,90,70,8.0,100.0,80,high risk
3,4,30,140,85,7.0,98.0,70,high risk
4,5,35,120,60,6.1,98.0,76,low risk


In [5]:
# Renaming columns for better understanding
maternal_renamed = maternal_health.rename(columns={"Woman_ID":"Woman ID", "BS":"Blood Sugar", "SystolicBP":"Systolic Blood Pressure", 
                                                   "DiastolicBP":"Diastolic Blood Pressure", "BodyTemp":"Body Temp.", 
                                                   "HeartRate":"Heart Rate", "RiskLevel":"Risk Level"})
maternal_renamed.head()

Unnamed: 0,Woman ID,Age,Systolic Blood Pressure,Diastolic Blood Pressure,Blood Sugar,Body Temp.,Heart Rate,Risk Level
0,1,25,130,80,15.0,98.0,86,high risk
1,2,35,140,90,13.0,98.0,70,high risk
2,3,29,90,70,8.0,100.0,80,high risk
3,4,30,140,85,7.0,98.0,70,high risk
4,5,35,120,60,6.1,98.0,76,low risk


In [6]:
# Create a new column "Blood Pressure" by combining "Systolic Blood Pressure" and "Diastolic Blood Pressure"
maternal_renamed.loc[:, 'Blood Pressure'] = maternal_renamed['Systolic Blood Pressure'].astype(str) + '/' + maternal_renamed['Diastolic Blood Pressure'].astype(str)
column_to_move = maternal_renamed.pop('Blood Pressure')
maternal_renamed.insert(4, 'Blood Pressure', column_to_move)

# Print
maternal_renamed.head()

Unnamed: 0,Woman ID,Age,Systolic Blood Pressure,Diastolic Blood Pressure,Blood Pressure,Blood Sugar,Body Temp.,Heart Rate,Risk Level
0,1,25,130,80,130/80,15.0,98.0,86,high risk
1,2,35,140,90,140/90,13.0,98.0,70,high risk
2,3,29,90,70,90/70,8.0,100.0,80,high risk
3,4,30,140,85,140/85,7.0,98.0,70,high risk
4,5,35,120,60,120/60,6.1,98.0,76,low risk


In [9]:
# Dropping duplicate data
maternal_df = maternal_renamed.drop_duplicates(subset=None, keep='first', inplace=False)

In [11]:
# Calculating how many women are being studied in this DF
women = maternal_df["Woman ID"].nunique()
women

1014

In [12]:
# Count each risk level (low, mid, high)
risk_level_counts = maternal_df["Risk Level"].value_counts()

# Print
risk_level_counts

Risk Level
low risk     406
mid risk     336
high risk    272
Name: count, dtype: int64