# Analyze Manufacturer and User Facility Device Experience (MAUDE) Data Set
- Reports the total number of records
- The total size of the data set in compressed (zip) format
- The total size of the data set uncompressed

In [None]:
%pip install --quiet pandas lxml

In [None]:
import pandas as pd

# Read the entire webpaage from fda.gov
tables = pd.read_html(
    "https://www.fda.gov/medical-devices/mandatory-reporting-requirements-manufacturers-importers-and-device-user-facilities/about-manufacturer-and-user-facility-device-experience-maude"
)

# The read should return one table; use that as the dataframe
df = tables[0]

# Drop the first row which is only used for formatting on the web page
df.drop(index=df.index[0], axis=0, inplace=True)

# Rename the columns of the table to include 'Description' and remove tabs
df.columns = [
    "File Name",
    "Compressed Size in Bytes",
    "Uncompressed Size in Bytes",
    "Total Records",
    "Description",
]

# Convert total records to integer
df = df.astype({"Total Records": "int"})

# Move the 'Description' and 'Total Records' columns to be next to the file name
df = df.reindex(
    columns=[
        "File Name",
        "Description",
        "Total Records",
        "Compressed Size in Bytes",
        "Uncompressed Size in Bytes",
    ]
)

df

In [None]:
# Convert 'Compressed Size in Bytes' and 'Uncompressed Size in Bytes' to bytes
df["Compressed Size in Bytes-Int"] = (
    df["Compressed Size in Bytes"].str.replace("KB", "").astype(int) * 1024
)
df["Uncompressed Size in Bytes-Int"] = (
    df["Uncompressed Size in Bytes"].str.replace("KB", "").astype(int) * 1024
)

# Sum the columns
total_records_sum = df["Total Records"].sum()
compressed_size_sum = df["Compressed Size in Bytes-Int"].sum()
uncompressed_size_sum = df["Uncompressed Size in Bytes-Int"].sum()

# Conversions
bytes_in_gb = 1024**3
bytes_in_tb = 1024**4
total_records_millions = total_records_sum / 1_000_000
compressed_size_gb = compressed_size_sum / bytes_in_gb
uncompressed_size_gb = uncompressed_size_sum / bytes_in_gb

# Report the number of records and their total size

In [None]:
from IPython.display import display, Markdown

markdown_text = f"""
## - {total_records_millions:.2f} M Records
## - {compressed_size_gb:.2f} GB Compressed
## - {uncompressed_size_gb:.2f} GB Uncompressed
"""
display(Markdown(markdown_text))

# Print the entire dataframe

In [None]:
import IPython.display


def display_left_aligned_df(df):
    html_table = df.to_html(notebook=True, index=False)
    html_table = html_table.replace("<td>", '<td style="text-align:left">').replace(
        "<th>", '<th style="text-align:left">'
    )
    IPython.display.display(IPython.display.HTML(html_table))


display_left_aligned_df(df)