In [38]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import adfuller

In [None]:
# Import Dataset
url = "https://raw.githubusercontent.com/sanjida-akhtar/maternal-health-analysis/main/data/maternal_health.csv"
df = pd.read_csv(url)
df.head()

In [None]:
# Copy the dataset
data = df.copy()
data.head()

In [41]:
# Rename columns
data.rename(columns = {"District " : "District"}, inplace = True)

In [42]:
# Drop unncessary columns
data = data.drop(columns = ["Division","Total", "ANC1(%)", "ANC2(%)", "ANC3(%)", "ANC4(%)", "NVD(%)", "Ceasarean(%)","Total Death", "Maternal Death(%)",
                           "Maternal Death Review", "Total PNC", "PNC1(%)", "PNC2(%)"], axis = 1)

In [None]:
data["Date"] = pd.to_datetime(data["Year"].astype(str) + "-" + data["Month"].astype(str))
data = data.sort_values("Date")
data.set_index("Date", inplace = True)
data.head()

In [None]:
# Drop Year and Month column
data.drop(["Year", "Month"],axis = 1, inplace = True)


In [None]:
data.head()

In [None]:
# Display the shape of dataset
data.shape

In [None]:
# Display basic information of dataset
data.info()

In [None]:
# Display statistical summary of the dataset
data.describe(include="all")

In [None]:
# Check for data types of all variables
data.dtypes

In [None]:
# Display number of values
data["District"].value_counts()

In [53]:
def line_plot(df, dist):
  """
  Plot a line plot for a specified district.
  df : Data
  dist : District
  """
  sns.set(style="whitegrid")
  plt.figure(figsize = (10, 6))
  sns.lineplot(x = "Date", y = "Maternal Death", data = df, label = "Number of maternal death", color = "blue")
  plt.xlabel("Date")
  plt.ylabel("Maternal Death")
  plt.title("Number of maternal death over time in "+ dist)
  plt.show()

In [54]:
def plot_seasonality(df, dist):
  """
  Plot seasonality of data.
  df: Data
  dist: District
  """

  if "Date" not in df:
    print("Date is already in index or not present in the dataframe")
  else:
    df.set_index("Date", inplace = True)

  plt.figure(figsize = (10, 6))
  plot_acf(df["Maternal Death"], lags = 30)
  plt.xlabel("Lag")
  plt.ylabel("Autocorrealtion")
  plt.title("Autocorrelation Function Plot for "+ dist)
  plt.show()

In [55]:
def testing_stationarity(data, dist):
  """
  Test for stationarity in data.
  data : Data
  dist : District
  """

  stationarity = adfuller(data["Maternal Death"])
  print("ADF Statistic: ", stationarity[0])
  print("p-value for "+ dist, " : ", stationarity[1])
  print("Critical Values: ", stationarity[4])

In [56]:
def differencing(data, dist):
  """
  Plot differenced Maternal Death.
  data : Data
  dist : Dist
  """

  data["Maternal Death Diff"] = data["Maternal Death"].diff()

  plt.figure(figsize = (10, 6))
  plt.plot(df["Maternal Death"], label = "Actual Maternal Death", color = "red")
  plt.plot(df["Maternal Death Diff"], label = "Differenced Maternal Death", linestyle = "--", color = "blue")
  plt.legend()
  plt.title("Actual Maternal Death vs Differenced Maternal Death in "+dist)
  plt.show()


In [57]:
def smoothing(data, dist):
  """
  Plot smoothed Maternal Death.
  data : Data
  dist : District
  """

  window_size = 30
  data["Smoothed Maternal Death"] = data["Maternal Death"].rolling(window = window_size).mean()

  plt.plot(data["Maternal Death"], label = "Actual Maternal Death", color = "red")
  plt.plot(data["Smoothed Maternal Death"], label = f"Moving average (window_size): {window_size}", linestyle = "--", color = "blue")
  plt.xlabel("Date")
  plt.ylabel("Matenal Death")
  plt.title("Actual vs Moving average for ", dist)
  plt.legend()
  plt.show()


In [58]:
def after_smoothing(data, dist):
  """
  Display new data after smoothing.
  data : Data
  dist : District
  """

  new_data = pd.concat([data["Maternal Death"], data["Smoothed Maternal Death"]], axis = 1)
  data.dropnae(subset = ["Maternal Death Diff"], inplace = True)
  print("New data for ", dist, ": ")
  print(new_data.head())


In [None]:
# Select records for Sunamganj
sunamganj = data.loc[data["District"] == "Sunamganj", ["Maternal Death"]]
sunamganj

In [None]:
# Plot a line plot for Sunamganj
line_plot(sunamganj, "Sunamganj")

In [None]:
# Plot seasonality of Sunamganj
plot_seasonality(sunamganj, "Sunamganj")

In [None]:
# Test for stationarity of Sunamganj
testing_stationarity(sunamganj, "Sunamganj")

In [None]:
# Select records for Sirajganj
sirajganj = data.loc[data["District"] == "Sirajganj", ["Maternal Death"]]
sirajganj.head()

In [None]:
# Display trend Maternal Death in Sirajganj
line_plot(sirajganj, "Sirajganj")

In [None]:
# Check for seasonality
plot_seasonality(sirajganj, "Sirajganj")

In [None]:
# Check for stationarity
testing_stationarity(sirajganj, "Sirajganj")

In [None]:
# Select records for Netrokona
netrokona = data.loc[data["District"] == "Netrokona", ["Maternal Death"]]
netrokona.head()

In [None]:
# Display trend of Netrokona
line_plot(netrokona, "Netrokona")

In [None]:
# Display seasonality of netrokona
plot_seasonality(netrokona, "Netrokona")

In [None]:
# Display stationarity of Netrokona
testing_stationarity(netrokona, "Netrokona")

In [None]:
# Select records for Rangpur District
rangpur = data.loc[data["District"] == "Rangpur", ["Maternal Death"]]
rangpur.head()

In [None]:
# Display trend in Rangpur
line_plot(rangpur, "Rangpur")

In [None]:
# Display seasonality of Rangpur
plot_seasonality(rangpur, "Rangpur")

In [None]:
# Display stationarity of Rangpur
testing_stationarity(rangpur, "Rangpur")

In [None]:
# Select records for Chandpur district
chandpur = data.loc[data["District"] == "Chandpur", ["Maternal Death"]]
chandpur.head()

In [None]:
# Display trend for Chandpur
line_plot(chandpur, "Chandpur")

In [None]:
# Display seasonality of Chandpur
plot_seasonality(chandpur, "Chandpur")

# Display stationarity of Chandpur
testing_stationarity(chandpur, "Chandpur")

In [None]:
# Select records for Dhaka district
dhaka = data.loc[data["District"] == "Dhaka", ["Maternal Death"]]
dhaka.head()

In [None]:
# Display trend for Dhaka district
line_plot(dhaka, "Dhaka")

In [None]:
# Display seasonality of Dhaka
plot_seasonality(dhaka, "Dhaka")

# Check for stationarity of Dhaka
testing_stationarity(dhaka, "Dhaka")

In [None]:
# Select records of Chuadanga
chuadanga = data.loc[data["District"] == "Chuadanga", ["Maternal Death"]]
chuadanga.head()

In [None]:
# Display trend of Chuadanga
line_plot(chuadanga, "Chuadanga")

In [None]:
# Check for seasonality of Chuadanga
plot_seasonality(chuadanga, "Chuadanga")

# Check for stationarity of chuadanga
testing_stationarity(chuadanga, "Chuadanga")

In [None]:
# Select records of Bhola
bhola = data.loc[data["District"] == "Bhola", ["Maternal Death"]]
bhola.head()

In [None]:
# Check for seasonality of Bhola
plot_seasonality(bhola , "Bhola")

# Check for stationarity of Bhola
testing_stationarity(bhola, "Bhola")