In [1]:
import pandas as pd
!pip install "pymongo[srv]"
import pymongo
from pymongo import MongoClient


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Database access credentials: to establish a connection to the MongoDB server using the MongoClient.
# The connection string includes the username (user), password(123), and database details.
client = MongoClient("mongodb+srv://user:123@cluster0.9d0ja.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")

# Access the 'blancco' database
db = client["blancco"]

# Access the 'all_reports' collection within the 'blancco' database
all_reports_collection = db["all_reports"]

# Can now use 'all_reports_collection' to perform operations like find, insert, update, or aggregate data

In [14]:
pipeline = [
    { "$unwind": "$erasures" },

    # Match failed regular wipes with failure messages
    { 
        "$match": { 
            "erasures.state": "Failed", 
            "erasures.erase_messages.failure": { "$exists": True, "$ne": [] }, # failure messages exist
            "system": { "$ne": {} } # ensures it's a regular wipe (system field is not empty)
        }
    },

    # Add a field to mark this as regular wipe
    { 
        "$addFields": { "wipeType": "regular" }
    },

    # Group by serial number (disks which is same as erasure)
    { 
        "$group": {
            "_id": { "serial": "$disks.serial" }, 
            "failureMessages": { "$push": "$erasures.erase_messages.failure" } # Collect all failure messages
        }
    },

    # Lookup the rig wipe data based on serial number -- matching regular to rig
    {
        "$lookup": {
            "from": "all_reports", 
            "localField": "_id.serial", 
            "foreignField": "disks.serial", # Match the serial number in the rig wipe reports
            "as": "rigWipeData"
        }
    },

    # Unwind the rigWipeData array to process each device
    { "$unwind": "$rigWipeData" },

    # Match only rig wipe data where the system field is empty (indicating it's a rig wipe)
    {
        "$match": {
            "rigWipeData.system": { "$eq": {} }
        }
    },

    # Add a field for the rig wipe status (whether the wipe was successful or failed)
    {
        "$addFields": {
            "rigWipeStatus": { 
                "$ifNull": [
                    "$rigWipeData.erasures.state", # Extract the state of the rig wipe
                    "Unknown" # Default value if no state is found
                ] 
            },
            "rigWipeFailureMessages": {
                "$ifNull": [
                    "$rigWipeData.erasures.erase_messages", # Extract rig wipe failure messages
                    []
                ]
            }
        }
    },

    # Group by serial number, collecting failure messages and statuses
    {
        "$group": {
            "_id": "$_id.serial", # Group by serial number
            "failureMessages": { "$first": "$failureMessages" }, # Include failure messages from the regular wipe
            "rigWipeStatuses": { "$addToSet": "$rigWipeStatus" }, # Collect rig wipe statuses (Success/Failed)
            "rigWipeFailureMessages": { "$push": "$rigWipeFailureMessages" } # Collect rig wipe failure messages
        }
    },

    # Project the desired output
    {
        "$project": {
            "serial": "$_id",
            "failureMessages": 1, 
            "rigWipeStatuses": 1,
            "rigWipeFailureMessages": 1
        }
    }
]
result = all_reports_collection.aggregate(pipeline)
result_list = list(result)
df2 = pd.DataFrame(result_list)
df2 = df2.drop(columns=['_id'])
df2

Unnamed: 0,failureMessages,rigWipeStatuses,rigWipeFailureMessages,serial
0,[[FORMAT NVMe (user data erase) command failed]],[[Successful]],"[[{'information': ['Exceeds NIST-Purge'], 'failure': [], 'exception': []}]]",[S443NX0M247743]
1,"[[ENHANCED SECURE ERASE command failed, SECURE ERASE command failed]]",[[Successful]],"[[{'information': ['Exceeds NIST-Clear', 'Self-test completed without error.'], 'failure': [], 'exception': ['Device is SSD, see manual for more information']}]]",[TW05MFP0550855780009]
2,[[FORMAT NVMe (user data erase) command failed]],[[Failed]],"[[{'information': [], 'failure': ['FORMAT NVMe (user data erase) command failed'], 'exception': []}]]",[S364NA0J936916]
3,"[[Cryptographic erasure has failed, Shadow MBR area content was not erased by the user]]","[[Successful], [Failed]]","[[{'information': ['Device has been set back to factory defaults and cryptographically erased with TCG security features, see manual for more information', 'Exceeds NIST-Purge', 'Self-test completed without error.'], 'failure': [], 'exception': []}], [{'information': ['Self-test completed without error.'], 'failure': ['Cryptographic erasure has failed', 'Shadow MBR area content was not erased by the user'], 'exception': []}]]",[174619ACAE68]
4,"[[Previously written pattern found, Verification failed, Read errors count reaches or exceeds the configured threshold, sector(s) failed the verification]]",[[Failed]],"[[{'information': ['Self-test completed without error.'], 'failure': ['Previously written pattern found', 'Verification failed', 'Read errors count reaches or exceeds the configured threshold', 'sector(s) failed the verification'], 'exception': []}]]",[2I4920053065]
5,"[[Verification failed, Read errors count reaches or exceeds the configured threshold, sector(s) failed the verification]]",[[Successful]],"[[{'information': ['Exceeds NIST-Purge'], 'failure': [], 'exception': []}]]",[FBFB180605C0003667]
6,"[[Verification failed, Read errors count reaches or exceeds the configured threshold, sector(s) failed the verification, Self-test completed with read failure.]]",[[Successful]],"[[{'information': [], 'failure': [], 'exception': ['Self-test completed with read failure.']}]]",[WD-WCC3F0CTU036]
7,[[Cryptographic erasure has failed]],[[Successful]],"[[{'information': ['Exceeds NIST-Purge'], 'failure': [], 'exception': []}]]",[39IS1128TMFQ]
8,"[[Erasure target is offline., Write errors count reaches or exceeds the configured threshold, sector(s) failed to overwrite, Remapped sectors area was not erased by the user, Self-test aborted by internal timeout.]]",[[Successful]],"[[{'information': ['Self-test completed without error.'], 'failure': [], 'exception': ['Remapped sectors area was not erased by the user']}]]",[83KZ0ZHKS]
9,[[Erasure target is offline.]],[[Successful]],"[[{'information': ['Exceeds NIST-Purge', 'Self-test completed without error.'], 'failure': [], 'exception': []}]]","[Z9AYS9T8, 1844E1D4C3F6]"


In [15]:
def keep_successful(status_list):
    if isinstance(status_list, list):
        # Flatten any nested lists if necessary
        flat_list = [item for sublist in status_list for item in sublist] if any(isinstance(i, list) for i in status_list) else status_list
        # Return "Successful" if it exists, otherwise the first status (or None)
        return ["Successful"] if "Successful" in flat_list else [flat_list[0]] if flat_list else []
    return status_list
df2["rigWipeStatuses"] = df2["rigWipeStatuses"].apply(keep_successful)


def keep_latest(serial_list):
    if isinstance(serial_list, list) and len(serial_list) > 0:
        return [serial_list[-1]]  # Keep only the last item
    return serial_list  # Return as is if not a list or empty
df2["serial"] = df2["serial"].apply(keep_latest)

for column in ['failureMessages', 'rigWipeStatuses', 'serial']:
    # Flatten nested lists if they contain only one item
    df2[column] = df2[column].apply(lambda x: x[0] if isinstance(x, list) and len(x) == 1 else x)
    
    # Make sure we are NOT splitting failureMessages or other fields by commas
    # Explode only if the column is already a list
    if df2[column].apply(lambda x: isinstance(x, list)).any():
        df2 = df2.explode(column)

In [16]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Do not truncate output horizontally
pd.set_option('display.max_colwidth', None)  # Show full column content without truncation

df2

Unnamed: 0,failureMessages,rigWipeStatuses,rigWipeFailureMessages,serial
0,FORMAT NVMe (user data erase) command failed,Successful,"[[{'information': ['Exceeds NIST-Purge'], 'failure': [], 'exception': []}]]",S443NX0M247743
1,ENHANCED SECURE ERASE command failed,Successful,"[[{'information': ['Exceeds NIST-Clear', 'Self-test completed without error.'], 'failure': [], 'exception': ['Device is SSD, see manual for more information']}]]",TW05MFP0550855780009
1,SECURE ERASE command failed,Successful,"[[{'information': ['Exceeds NIST-Clear', 'Self-test completed without error.'], 'failure': [], 'exception': ['Device is SSD, see manual for more information']}]]",TW05MFP0550855780009
2,FORMAT NVMe (user data erase) command failed,Failed,"[[{'information': [], 'failure': ['FORMAT NVMe (user data erase) command failed'], 'exception': []}]]",S364NA0J936916
3,Cryptographic erasure has failed,Successful,"[[{'information': ['Device has been set back to factory defaults and cryptographically erased with TCG security features, see manual for more information', 'Exceeds NIST-Purge', 'Self-test completed without error.'], 'failure': [], 'exception': []}], [{'information': ['Self-test completed without error.'], 'failure': ['Cryptographic erasure has failed', 'Shadow MBR area content was not erased by the user'], 'exception': []}]]",174619ACAE68
3,Shadow MBR area content was not erased by the user,Successful,"[[{'information': ['Device has been set back to factory defaults and cryptographically erased with TCG security features, see manual for more information', 'Exceeds NIST-Purge', 'Self-test completed without error.'], 'failure': [], 'exception': []}], [{'information': ['Self-test completed without error.'], 'failure': ['Cryptographic erasure has failed', 'Shadow MBR area content was not erased by the user'], 'exception': []}]]",174619ACAE68
4,Previously written pattern found,Failed,"[[{'information': ['Self-test completed without error.'], 'failure': ['Previously written pattern found', 'Verification failed', 'Read errors count reaches or exceeds the configured threshold', 'sector(s) failed the verification'], 'exception': []}]]",2I4920053065
4,Verification failed,Failed,"[[{'information': ['Self-test completed without error.'], 'failure': ['Previously written pattern found', 'Verification failed', 'Read errors count reaches or exceeds the configured threshold', 'sector(s) failed the verification'], 'exception': []}]]",2I4920053065
4,Read errors count reaches or exceeds the configured threshold,Failed,"[[{'information': ['Self-test completed without error.'], 'failure': ['Previously written pattern found', 'Verification failed', 'Read errors count reaches or exceeds the configured threshold', 'sector(s) failed the verification'], 'exception': []}]]",2I4920053065
4,sector(s) failed the verification,Failed,"[[{'information': ['Self-test completed without error.'], 'failure': ['Previously written pattern found', 'Verification failed', 'Read errors count reaches or exceeds the configured threshold', 'sector(s) failed the verification'], 'exception': []}]]",2I4920053065
