# Analyze Manufacturer and User Facility Device Experience (MAUDE) Data Set
- Reports the total number of records
- The total size of the data set in compressed (zip) format
- The total size of the data set uncompressed

In [1]:
%pip install --quiet pandas lxml

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

# Read the entire webpaage from fda.gov
tables = pd.read_html(
    "https://www.fda.gov/medical-devices/mandatory-reporting-requirements-manufacturers-importers-and-device-user-facilities/about-manufacturer-and-user-facility-device-experience-maude"
)

# The read should return one table; use that as the dataframe
df = tables[0]

# Drop the first row which is only used for formatting on the web page
df.drop(index=df.index[0], axis=0, inplace=True)

# Rename the columns of the table to include 'Description' and remove tabs
df.columns = [
    "File Name",
    "Compressed Size in Bytes",
    "Uncompressed Size in Bytes",
    "Total Records",
    "Description",
]

# Convert total records to integer
df = df.astype({"Total Records": "int"})

# Move the 'Description' and 'Total Records' columns to be next to the file name
df = df.reindex(
    columns=[
        "File Name",
        "Description",
        "Total Records",
        "Compressed Size in Bytes",
        "Uncompressed Size in Bytes",
    ]
)

df

Unnamed: 0,File Name,Description,Total Records,Compressed Size in Bytes,Uncompressed Size in Bytes
1,mdrfoi.zip,MAUDE Base records received to date for 2023,1766723,58880KB,560954KB
2,mdrfoithru2023.zip,Master Record through 2023,18118160,598951KB,5800337KB
3,mdrfoiadd.zip,New MAUDE Base records for the current month.,179856,6052KB,57102KB
4,mdrfoichange.zip,MAUDE Base data updates: changes to existing B...,786149,27210KB,267107KB
5,patient.zip,MAUDE Patient records received to date for 2023,1766486,9439KB,71703KB
...,...,...,...,...,...
70,foitext2022.zip,Narrative Data for 2022,5927375,354865KB,2632509KB
71,foitext2023.zip,Narrative Data for 2023,4228146,306475KB,2173130KB
72,foitext.zip,Narrative Data received to date for 2023,3191200,228840KB,1624064KB
73,foitextadd.zip,New MAUDE Narrative data for the current month.,319976,23518KB,165967KB


In [3]:
# Convert 'Compressed Size in Bytes' and 'Uncompressed Size in Bytes' to bytes
df["Compressed Size in Bytes-Int"] = (
    df["Compressed Size in Bytes"].str.replace("KB", "").astype(int) * 1024
)
df["Uncompressed Size in Bytes-Int"] = (
    df["Uncompressed Size in Bytes"].str.replace("KB", "").astype(int) * 1024
)

# Sum the columns
total_records_sum = df["Total Records"].sum()
compressed_size_sum = df["Compressed Size in Bytes-Int"].sum()
uncompressed_size_sum = df["Uncompressed Size in Bytes-Int"].sum()

# Conversions
bytes_in_gb = 1024**3
bytes_in_tb = 1024**4
total_records_millions = total_records_sum / 1_000_000
compressed_size_gb = compressed_size_sum / bytes_in_gb
uncompressed_size_gb = uncompressed_size_sum / bytes_in_gb

# Report the number of records and their total size

In [4]:
from IPython.display import display, Markdown

markdown_text = f"""
## - {total_records_millions:.2f} M Records
## - {compressed_size_gb:.2f} GB Compressed
## - {uncompressed_size_gb:.2f} GB Uncompressed
"""
display(Markdown(markdown_text))


## - 144.11 M Records
## - 5.08 GB Compressed
## - 33.24 GB Uncompressed


# Print the entire dataframe

In [5]:
import IPython.display


def display_left_aligned_df(df):
    html_table = df.to_html(notebook=True, index=False)
    html_table = html_table.replace("<td>", '<td style="text-align:left">').replace(
        "<th>", '<th style="text-align:left">'
    )
    IPython.display.display(IPython.display.HTML(html_table))


display_left_aligned_df(df)

File Name,Description,Total Records,Compressed Size in Bytes,Uncompressed Size in Bytes,Compressed Size in Bytes-Int,Uncompressed Size in Bytes-Int
mdrfoi.zip,MAUDE Base records received to date for 2023,1766723,58880KB,560954KB,60293120,574416896
mdrfoithru2023.zip,Master Record through 2023,18118160,598951KB,5800337KB,613325824,5939545088
mdrfoiadd.zip,New MAUDE Base records for the current month.,179856,6052KB,57102KB,6197248,58472448
mdrfoichange.zip,MAUDE Base data updates: changes to existing Ba...,786149,27210KB,267107KB,27863040,273517568
patient.zip,MAUDE Patient records received to date for 2023,1766486,9439KB,71703KB,9665536,73423872
patientthru2023.zip,Patient Record through 2023,18104231,95312KB,687674KB,97599488,704178176
patientadd.zip,New MAUDE Patient records for the current month.,179802,980KB,7363KB,1003520,7539712
patientchange.zip,MAUDE Patient data updates: changes to existing...,785692,4710KB,32258KB,4823040,33032192
patientproblemcode.zip,MAUDE Patient records for problemcode,17844128,127591KB,1229424KB,130653184,1258930176
patientproblemdata.zip,Patient Problem Data,998,11KB,25KB,11264,25600
