In [1]:
!pip install "pymongo[srv]"
import pymongo
from pymongo import MongoClient
import pandas as pd


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Database access credentials: to establish a connection to the MongoDB server using the MongoClient.
# The connection string includes the username (user), password(123), and database details.
client = MongoClient("mongodb+srv://user:123@cluster0.9d0ja.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")

# Access the 'blancco' database
db = client["blancco"]

# Access the 'all_reports' collection within the 'blancco' database
all_reports_collection = db["all_reports"]

# Can now use 'all_reports_collection' to perform operations like find, insert, update, or aggregate data

In [9]:
pipeline = [
    # Unwind to process each erasure and disk entry separately
    { "$unwind": "$disks" },
    { "$unwind": "$erasures" },
    
    # Match on SATA/SSD interface type and successful erasure state
    {
        "$match": {
            "disks.interface_type": "SATA/SSD",
            "erasures.interface_type": "SATA/SSD",
            "erasures.state": "Successful"
        }
    },

    # Project required fields, including converted capacity, elapsed time, and erasure rate
    {
        "$project": {
            "_id": 0,
            "interface_type": "$erasures.interface_type",
            "model": "$erasures.model",
            "serial_number": "$erasures.serial",
            "capacity_gb": { "$divide": ["$erasures.capacity", 1073741824] },  # Convert bytes to GB
            "elapsed_time_seconds": {
                "$add": [
                    { "$multiply": [{ "$toInt": { "$arrayElemAt": [{ "$split": ["$erasures.elapsed_time", ":"] }, 0] } }, 3600] },
                    { "$multiply": [{ "$toInt": { "$arrayElemAt": [{ "$split": ["$erasures.elapsed_time", ":"] }, 1] } }, 60] },
                    { "$toInt": { "$arrayElemAt": [{ "$split": ["$erasures.elapsed_time", ":"] }, 2] } }
                ]
            }
        }
    },

    # Calculate erasure rate in GB/min and handle zero-division cases
    {
        "$project": {
            "interface_type": 1,
            "model": 1,
            "serial_number": 1,
            "capacity_gb": 1,
            "elapsed_time_minutes": { "$divide": ["$elapsed_time_seconds", 60] },
            "erasure_rate_gb_per_min": {
                "$cond": {
                    "if": { "$gt": ["$elapsed_time_seconds", 0] },  # Ensure elapsed time is > 0
                    "then": { "$divide": ["$capacity_gb", { "$divide": ["$elapsed_time_seconds", 60] }] },
                    "else": None  # Set to null if elapsed time is zero
                }
            }
        }
    },

    # Sort results by serial number for clarity and analysis
    { "$sort": { "serial_number": 1, "erasure_rate_gb_per_min": 1 } }
]


In [10]:
results = list(all_reports_collection.aggregate(pipeline))

# Output results
for result in results:
    print(result)


{'interface_type': 'SATA/SSD', 'model': 'SSD', 'serial_number': '00000000012022910093', 'capacity_gb': 29.818199157714844, 'elapsed_time_minutes': 59.833333333333336, 'erasure_rate_gb_per_min': 0.49835430347155724}
{'interface_type': 'SATA/SSD', 'model': 'SSD', 'serial_number': '0000000001202291010D', 'capacity_gb': 29.818199157714844, 'elapsed_time_minutes': 63.06666666666667, 'erasure_rate_gb_per_min': 0.4728044263908273}
{'interface_type': 'SATA/SSD', 'model': 'C300-CTFDDAC128MAG', 'serial_number': '00000000100500006F68', 'capacity_gb': 119.24251556396484, 'elapsed_time_minutes': 37.6, 'erasure_rate_gb_per_min': 3.171343499041618}
{'interface_type': 'SATA/SSD', 'model': 'C300-CTFDDAC064MAG', 'serial_number': '00000000104502FEF229', 'capacity_gb': 59.626304626464844, 'elapsed_time_minutes': 33.21666666666667, 'erasure_rate_gb_per_min': 1.7950718904103815}
{'interface_type': 'SATA/SSD', 'model': 'C300-CTFDDAC064MAG', 'serial_number': '00000000104502FEF229', 'capacity_gb': 59.626304626