In [26]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.


In [27]:
from pymongo import MongoClient

try:
    # Connect to MongoDB (default localhost:27017)
    client = MongoClient("mongodb://localhost:27017/")

    # Send a ping command to check connection
    client.admin.command("ping")

    print("✅ Connected to MongoDB!")

    # Optional: List databases
    print("Databases:", client.list_database_names())

except Exception as e:
    print("❌ Could not connect to MongoDB:", e)


✅ Connected to MongoDB!
Databases: ['Nobel_Prices', 'admin', 'config', 'local']


In [28]:
from pymongo import MongoClient

# Replace with your MongoDB URI
#client = MongoClient("mongodb://localhost:27017")

# Select database and collection
db = client["Nobel_Prices"]        # your database name
collection = db["awards"]  # your collection name


In [29]:
import pandas as pd
import altair as alt


In [30]:
# Laureates by country of birth → shows which countries produced most laureates.

pipeline = [
    {
        "$group": {
            "_id": "$laureates.bornCountry",   # group by country of birth
            "count": {"$sum": 1}
        }
    },
    {
        "$sort": {"count": -1}       # sort descending
    }
]

result = list(collection.aggregate(pipeline))
df_country = pd.DataFrame(result)
df_country.head()


Unnamed: 0,_id,count
0,"[Prussia (now Germany), the Netherlands, the N...",1


In [37]:
# VISUALISED CHART FROM ALTAIR

chart_country = alt.Chart(df_country).mark_bar().encode(
    x=alt.X('_id', sort='-y', title='Country of Birth'),
    y=alt.Y('count', title='Number of Laureates'),
    color=alt.Color('_id', title='Country Category'),
    tooltip=['_id', 'count']
).properties(
    title='Nobel Laureates by Country of Birth'
)
chart_country


In [32]:
# Total prizes by category → shows distribution across Physics, Chemistry, etc.
pipeline = [
    {
        "$group": {
            "_id": "$laureates.prizes.category",
            "count": {"$sum": 1}
        }
    },
    {"$sort": {"count": -1}}
]

result = list(collection.aggregate(pipeline))
df_category = pd.DataFrame(result)
df_category.head()




Unnamed: 0,_id,count
0,"[[physics], [physics], [physics], [physics], [...",1


In [33]:
pipeline = [
    {"$unwind": "$laureates"},              # Unwind laureates array first
    {"$unwind": "$laureates.prizes"},       # Then unwind prizes for each laureate
    {"$group": {
        "_id": "$laureates.prizes.category",
        "count": {"$sum": 1}
    }},
    {"$sort": {"count": -1}}
]

result = list(collection.aggregate(pipeline))
df_category = pd.DataFrame(result)
print(df_category.head())



          _id  count
0    medicine    229
1     physics    227
2   chemistry    197
3       peace    142
4  literature    121


In [34]:
chart_category = alt.Chart(df_category).mark_bar().encode(
    x=alt.X('_id', sort='-y', title='Category'),
    y=alt.Y('count', title='Number of Prizes'),
    color=alt.Color('_id', title='Prize Category'),
    tooltip=['_id', 'count']
).properties(
    title='Total Nobel Prizes by Category'
)
chart_category
