In [2]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pymongo

In [4]:
client = pymongo.MongoClient("mongodb://localhost:27017/")

In [5]:
db = client['dbproject']

In [6]:
collection = db['collisions']
collection = db['parties']
collection = db['victims']

In [7]:
cursor = collection.find()

In [56]:
for document in cursor:
    print(document)


In [40]:
# Age range 10 to 20
pipeline_10_to_20 = [
    {
        "$match": {
            "victim_age": {"$gte": 10, "$lt": 20}
        }
    },
    {
        "$group": {
            "_id": None,
            "count": {"$sum": 1}
        }
    }
]

# Age range 20 to 30
pipeline_20_to_30 = [
    {
        "$match": {
            "victim_age": {"$gte": 20, "$lt": 30}
        }
    },
    {
        "$group": {
            "_id": None,
            "count": {"$sum": 1}
        }
    }
]

# Age range 30 to 40
pipeline_30_to_40 = [
    {
        "$match": {
            "victim_age": {"$gte": 30, "$lt": 40}
        }
    },
    {
        "$group": {
            "_id": None,
            "count": {"$sum": 1}
        }
    }
]

# Age range 40 to 50
pipeline_40_to_50 = [
    {
        "$match": {
            "victim_age": {"$gte": 40, "$lt": 50}
        }
    },
    {
        "$group": {
            "_id": None,
            "count": {"$sum": 1}
        }
    }
]

# Execute the pipelines
pipelines = [pipeline_10_to_20, pipeline_20_to_30, pipeline_30_to_40, pipeline_40_to_50]

for i, pipeline in enumerate(pipelines):
    result = collection.aggregate(pipeline)
    for doc in result:
        print("Number of accidents with victim age between", i*10 + 10, "and", i*10 + 19, ":", doc["count"])


Number of accidents with victim age between 10 and 19 : 31978
Number of accidents with victim age between 20 and 29 : 92886
Number of accidents with victim age between 30 and 39 : 65872
Number of accidents with victim age between 40 and 49 : 50627


In [55]:
# Execute the pipelines
pipelines = [pipeline_10_to_20, pipeline_20_to_30, pipeline_30_to_40, pipeline_40_to_50]

total_accidents_pipeline = [{"$group": {"_id": None, "total_accidents": {"$sum": 1}}}]

# Get the total count of accidents
total_accidents_result = collection.aggregate(total_accidents_pipeline)
total_accidents_count = next(total_accidents_result, {"total_accidents": 0})["total_accidents"]

# Execute the age range pipelines and calculate percentage
for i, pipeline in enumerate(pipelines):
    result = collection.aggregate(pipeline)
    age_range_count = next(result, {"count": 0})["count"]
    percentage = (age_range_count / total_accidents_count) * 100
    print("Number of accidents with victim age between", i*10 + 10, "and", i*10 + 19, ":", age_range_count)
    print("Percentage of accidents within this age range:", percentage, "%")

Number of accidents with victim age between 10 and 19 : 31978
Percentage of accidents within this age range: 9.909728441583797 %
Number of accidents with victim age between 20 and 29 : 92886
Percentage of accidents within this age range: 28.78463431186918 %
Number of accidents with victim age between 30 and 39 : 65872
Percentage of accidents within this age range: 20.41321007892951 %
Number of accidents with victim age between 40 and 49 : 50627
Percentage of accidents within this age range: 15.688905554195474 %


In [45]:
pipeline = [
    {
        "$match": {
            "victim_age": {"$gte": 10, "$lte": 30},
            "victim_sex": "male"
        }
    },
    {
        "$group": {
            "_id": "$victim_age",
            "total_count": {"$sum": 1},
            "injury_count": {
                "$sum": {
                    "$cond": [{"$eq": ["$victim_degree_of_injury", "complaint of pain"]}, 1, 0]
                }
            }
        }
    },
    {
        "$project": {
            "_id": 1,
            "total_count": 1,
            "injury_count": 1,
            "injury_percentage": {"$multiply": [{"$divide": ["$injury_count", "$total_count"]}, 100]}
        }
    },
    {
        "$sort": {
            "_id": 1
        }
    }
]

result = collection.aggregate(pipeline)

for doc in result:
    print("Age:", doc["_id"])
    print("Total count:", doc["total_count"])
    print("Injury count:", doc["injury_count"])
    print("Injury percentage:", doc["injury_percentage"], "%")


Age: 10
Total count: 850
Injury count: 70
Injury percentage: 8.235294117647058 %
Age: 11
Total count: 888
Injury count: 84
Injury percentage: 9.45945945945946 %
Age: 12
Total count: 1022
Injury count: 103
Injury percentage: 10.078277886497064 %
Age: 13
Total count: 1140
Injury count: 127
Injury percentage: 11.140350877192983 %
Age: 14
Total count: 1306
Injury count: 165
Injury percentage: 12.633996937212864 %
Age: 15
Total count: 1515
Injury count: 177
Injury percentage: 11.683168316831685 %
Age: 16
Total count: 1801
Injury count: 236
Injury percentage: 13.103831204886173 %
Age: 17
Total count: 2323
Injury count: 330
Injury percentage: 14.20576840292725 %
Age: 18
Total count: 3772
Injury count: 677
Injury percentage: 17.948038176033933 %
Age: 19
Total count: 5349
Injury count: 1130
Injury percentage: 21.125444008225834 %
Age: 20
Total count: 6905
Injury count: 1434
Injury percentage: 20.76755973931933 %
Age: 21
Total count: 7863
Injury count: 1792
Injury percentage: 22.790283606765865 