<h1><center>Exploring the Polish laureates using MongoDB</center></h1>

<b>Lets import necessary libraries</b>

In [38]:
import datetime
from pymongo import MongoClient
import requests

<b>This sets up MongoDB Client</b>

In [2]:
client = MongoClient('localhost', 27017)
db = client["nobel"]

In [3]:
# This code gets the Laureates and Prizes collections and creates a database; commented out not to duplicate records"""

# for collection_name in ["prizes", "laureates"]:
#     response = requests.get("http://api.nobelprize.org/v1/{}.json".format(collection_name[:-1] ))
#     documents = response.json()[collection_name]
#     db[collection_name].insert_many(documents)

'This code gets the Laureates and Prizes collections and creates a database; commented out not to duplicate records'

In [4]:
# Drops the databases if the duplications happen ;)
# db.prizes.drop()
# db.laureates.drop()

'Drops the databases if the duplications happen ;)'

<b>Lets get a look at how Prizes collection look like. </b>

In [34]:
print(db.prizes.find_one())

{'_id': ObjectId('6238db184fafdb6f26002856'), 'year': '2021', 'category': 'chemistry', 'laureates': [{'id': '1002', 'firstname': 'Benjamin', 'surname': 'List', 'motivation': '"for the development of asymmetric organocatalysis"', 'share': '2'}, {'id': '1003', 'firstname': 'David', 'surname': 'MacMillan', 'motivation': '"for the development of asymmetric organocatalysis"', 'share': '2'}]}


<b>Lets see the Laureates collection. </b>

In [35]:
print(db.laureates.find_one())

{'_id': ObjectId('6238db184fafdb6f26002ae8'), 'id': '1', 'firstname': 'Wilhelm Conrad', 'surname': 'Röntgen', 'born': '1845-03-27', 'died': '1923-02-10', 'bornCountry': 'Prussia (now Germany)', 'bornCountryCode': 'DE', 'bornCity': 'Lennep (now Remscheid)', 'diedCountry': 'Germany', 'diedCountryCode': 'DE', 'diedCity': 'Munich', 'gender': 'male', 'prizes': [{'year': '1901', 'category': 'physics', 'share': '1', 'motivation': '"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him"', 'affiliations': [{'name': 'Munich University', 'city': 'Munich', 'country': 'Germany'}]}]}


<b>What is the count of documents?</b>

In [7]:
print(db.prizes.count_documents({}))
print(db.laureates.count_documents({}))

658
968


<b>Lets look at nobel price winners born in Poland.</b>

In [8]:
born_country_poland = db.laureates.find({'bornCountry': {'$regex': 'Poland'}},  {'firstname': 1, 'surname': 1, 'bornCountry': 1, '_id': 0})

for bcp in born_country_poland:
    print(bcp)

{'firstname': 'Marie', 'surname': 'Curie', 'bornCountry': 'Russian Empire (now Poland)'}
{'firstname': 'Albert A.', 'surname': 'Michelson', 'bornCountry': 'Prussia (now Poland)'}
{'firstname': 'Otto', 'surname': 'Stern', 'bornCountry': 'Germany (now Poland)'}
{'firstname': 'Isidor Isaac', 'surname': 'Rabi', 'bornCountry': 'Austria-Hungary (now Poland)'}
{'firstname': 'Max', 'surname': 'Born', 'bornCountry': 'Germany (now Poland)'}
{'firstname': 'Maria', 'surname': 'Goeppert Mayer', 'bornCountry': 'Germany (now Poland)'}
{'firstname': 'Klaus', 'surname': 'von Klitzing', 'bornCountry': 'German-occupied Poland (now Poland)'}
{'firstname': 'Georges', 'surname': 'Charpak', 'bornCountry': 'Poland'}
{'firstname': 'Fritz', 'surname': 'Haber', 'bornCountry': 'Prussia (now Poland)'}
{'firstname': 'Walther', 'surname': 'Nernst', 'bornCountry': 'Prussia (now Poland)'}
{'firstname': 'Friedrich', 'surname': 'Bergius', 'bornCountry': 'Germany (now Poland)'}
{'firstname': 'Kurt', 'surname': 'Alder', '

<b>Lets see what countries could be found if we filter country of birth by regex expression: Poland </b>

In [9]:
born_country_poland = db.laureates.distinct(key='bornCountry', filter = {'bornCountry': {'$regex': 'Poland'}})

for bcp in born_country_poland:
    print(bcp)

Austria-Hungary (now Poland)
Free City of Danzig (now Poland)
German-occupied Poland (now Poland)
Germany (now Poland)
Poland
Poland (now Belarus)
Poland (now Lithuania)
Poland (now Ukraine)
Prussia (now Poland)
Russian Empire (now Poland)


<b>Now lets try to find nobel laureates only born in Poland or in Poland but now different country.</b>

In [10]:
born_country_poland = db.laureates.find({'bornCountry': {'$regex': '^Poland'}},  {'firstname': 1, 'surname': 1, 'prizes.category':1, 'bornCountry': 1, '_id': 0})

for bcp in born_country_poland:
    print(bcp)

{'firstname': 'Georges', 'surname': 'Charpak', 'bornCountry': 'Poland', 'prizes': [{'category': 'physics'}]}
{'firstname': 'Roald', 'surname': 'Hoffmann', 'bornCountry': 'Poland (now Ukraine)', 'prizes': [{'category': 'chemistry'}]}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'bornCountry': 'Poland', 'prizes': [{'category': 'medicine'}]}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'bornCountry': 'Poland (now Lithuania)', 'prizes': [{'category': 'medicine'}]}
{'firstname': 'Lech', 'surname': 'Walesa', 'bornCountry': 'Poland', 'prizes': [{'category': 'peace'}]}
{'firstname': 'Shimon', 'surname': 'Peres', 'bornCountry': 'Poland (now Belarus)', 'prizes': [{'category': 'peace'}]}
{'firstname': 'Henryk', 'surname': 'Sienkiewicz', 'bornCountry': 'Poland', 'prizes': [{'category': 'literature'}]}
{'firstname': 'Wislawa', 'surname': 'Szymborska', 'bornCountry': 'Poland', 'prizes': [{'category': 'literature'}]}
{'firstname': 'Olga', 'surname': 'Tokarczuk', 'bornCountry': 'Poland', 'priz

<b>Lets see with how many laureates did Polish laureates shared their prizes.</b>

In [11]:
polish_laureates_share = db.laureates.find({'bornCountry': {'$regex': '^Poland'}},  {'firstname': 1, 'surname': 1, 'prizes.share': 1, '_id': 0})

for share in polish_laureates_share:
    print(share)

{'firstname': 'Georges', 'surname': 'Charpak', 'prizes': [{'share': '1'}]}
{'firstname': 'Roald', 'surname': 'Hoffmann', 'prizes': [{'share': '2'}]}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'prizes': [{'share': '3'}]}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'prizes': [{'share': '4'}]}
{'firstname': 'Lech', 'surname': 'Walesa', 'prizes': [{'share': '1'}]}
{'firstname': 'Shimon', 'surname': 'Peres', 'prizes': [{'share': '3'}]}
{'firstname': 'Henryk', 'surname': 'Sienkiewicz', 'prizes': [{'share': '1'}]}
{'firstname': 'Wislawa', 'surname': 'Szymborska', 'prizes': [{'share': '1'}]}
{'firstname': 'Olga', 'surname': 'Tokarczuk', 'prizes': [{'share': '1'}]}


<b>Lets see polish nobel laureates that shared the price! </b>

In [12]:
pipeline1 = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'prizes.share': {'$gt': '1'}}}]
pipeline2 = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'prizes.share': {'$gt': '1'}}},
            {'$project': {'firstname': 1, 'surname': 1, 'prizes.share': 1, '_id': 0}}]

In [32]:
polish_laureates_share = db.laureates.aggregate(pipeline1)

for num, share in enumerate(polish_laureates_share):
    print(str(num+1)+') ',share)
    print("")

1)  {'_id': ObjectId('6238db184fafdb6f26002be4'), 'id': '258', 'firstname': 'Roald', 'surname': 'Hoffmann', 'born': '1937-07-18', 'died': '0000-00-00', 'bornCountry': 'Poland (now Ukraine)', 'bornCountryCode': 'UA', 'bornCity': 'Zloczov', 'gender': 'male', 'prizes': [{'year': '1981', 'category': 'chemistry', 'share': '2', 'motivation': '"for their theories, developed independently, concerning the course of chemical reactions"', 'affiliations': [{'name': 'Cornell University', 'city': 'Ithaca, NY', 'country': 'USA'}]}]}

2)  {'_id': ObjectId('6238db184fafdb6f26002c40'), 'id': '350', 'firstname': 'Tadeus', 'surname': 'Reichstein', 'born': '1897-07-20', 'died': '1996-08-01', 'bornCountry': 'Poland', 'bornCountryCode': 'PL', 'bornCity': 'Wloclawek', 'diedCountry': 'Switzerland', 'diedCountryCode': 'CH', 'diedCity': 'Basel', 'gender': 'male', 'prizes': [{'year': '1950', 'category': 'medicine', 'share': '3', 'motivation': '"for their discoveries relating to the hormones of the adrenal cortex,

In [14]:
polish_laureates_share = db.laureates.aggregate(pipeline2)

for share in polish_laureates_share:
    print(share)

{'firstname': 'Roald', 'surname': 'Hoffmann', 'prizes': [{'share': '2'}]}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'prizes': [{'share': '3'}]}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'prizes': [{'share': '4'}]}
{'firstname': 'Shimon', 'surname': 'Peres', 'prizes': [{'share': '3'}]}


<b>It seems like people who won nobel prize together have the same motivation field, lets explore this!</b>

In [15]:
pipeline = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'prizes.share': {'$gt': '1'}}},
            {'$project': {'prizes.motivation': 1, '_id': 0}}]

cursor = db.laureates.aggregate(pipeline)

motivations = []

for document in cursor:
    for prize in document['prizes']:
        motivations.append(prize['motivation'])

print(motivations)

['"for their theories, developed independently, concerning the course of chemical reactions"', '"for their discoveries relating to the hormones of the adrenal cortex, their structure and biological effects"', '"for their discoveries concerning the peptide hormone production of the brain"', '"for their efforts to create peace in the Middle East"']


<b>We have got the motivations stored now lets find all the people with those motivations.</b>

In [16]:
for number, motivation in enumerate(motivations):
    pipeline = [{'$match': {'prizes.motivation': motivation}},
                {'$project':
                     {'firstname': 1, 'surname': 1, 'bornCountry':1, 'prizes.share':1, 'prizes.motivation':1, '_id': 0}}]
    cursor = db.laureates.aggregate(pipeline)

    print('')
    print(f'{number+1}) The laureates of shared nobel prize:')
    print('')

    for document in cursor:
        print(document)



1) The laureates of shared nobel prize:

{'firstname': 'Kenichi', 'surname': 'Fukui', 'bornCountry': 'Japan', 'prizes': [{'share': '2', 'motivation': '"for their theories, developed independently, concerning the course of chemical reactions"'}]}
{'firstname': 'Roald', 'surname': 'Hoffmann', 'bornCountry': 'Poland (now Ukraine)', 'prizes': [{'share': '2', 'motivation': '"for their theories, developed independently, concerning the course of chemical reactions"'}]}

2) The laureates of shared nobel prize:

{'firstname': 'Edward C.', 'surname': 'Kendall', 'bornCountry': 'USA', 'prizes': [{'share': '3', 'motivation': '"for their discoveries relating to the hormones of the adrenal cortex, their structure and biological effects"'}]}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'bornCountry': 'Poland', 'prizes': [{'share': '3', 'motivation': '"for their discoveries relating to the hormones of the adrenal cortex, their structure and biological effects"'}]}
{'firstname': 'Philip S.', 'surna

<b>Which of the polish laureates are still alive?</b>

In [17]:
pipeline = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'died': {'$eq': '0000-00-00'}}},
            {'$project':
                 {'firstname': 1, 'surname': 1, 'bornCountry': 1, '_id': 0}}]
cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)

{'firstname': 'Roald', 'surname': 'Hoffmann', 'bornCountry': 'Poland (now Ukraine)'}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'bornCountry': 'Poland (now Lithuania)'}
{'firstname': 'Lech', 'surname': 'Walesa', 'bornCountry': 'Poland'}
{'firstname': 'Olga', 'surname': 'Tokarczuk', 'bornCountry': 'Poland'}


<b>What is the age of the living polish nobel laureates?</b>

In [18]:
now = datetime.datetime.now()
current_time = now.strftime('%Y-%m-%d')


pipeline = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'died': '0000-00-00'}},
            {'$addFields': {'today': {'$dateFromString': {'dateString': current_time}}, 'born_date': {'$dateFromString': {'dateString': '$born'}}}},
            {'$project': {'firstname': 1, 'surname': 1, 'bornCountry': 1, '_id': 0, 'age': {'$floor': {'$divide': [{'$subtract': ['$today', '$born_date']}, 31557600000]}}}}]

cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)

{'firstname': 'Roald', 'surname': 'Hoffmann', 'bornCountry': 'Poland (now Ukraine)', 'age': 84.0}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'bornCountry': 'Poland (now Lithuania)', 'age': 95.0}
{'firstname': 'Lech', 'surname': 'Walesa', 'bornCountry': 'Poland', 'age': 78.0}
{'firstname': 'Olga', 'surname': 'Tokarczuk', 'bornCountry': 'Poland', 'age': 60.0}


<b>How many years did polish nobel laureates live?</b>

In [19]:
pipeline = [{'$match': {'bornCountry': {'$regex': '^Poland'}, 'died': {'$ne': '0000-00-00'}}},
            {'$addFields': {'died_date': {'$dateFromString': {'dateString': '$died'}}, 'born_date': {'$dateFromString': {'dateString': '$born'}}}},
            {'$project': {'firstname': 1, 'surname': 1, 'bornCountry': 1, '_id': 0, 'age': {'$floor': {'$divide': [{'$subtract': ['$died_date', '$born_date']}, 31557600000]}}}}]

cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)

{'firstname': 'Georges', 'surname': 'Charpak', 'bornCountry': 'Poland', 'age': 86.0}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'bornCountry': 'Poland', 'age': 99.0}
{'firstname': 'Shimon', 'surname': 'Peres', 'bornCountry': 'Poland (now Belarus)', 'age': 93.0}
{'firstname': 'Henryk', 'surname': 'Sienkiewicz', 'bornCountry': 'Poland', 'age': 70.0}
{'firstname': 'Wislawa', 'surname': 'Szymborska', 'bornCountry': 'Poland', 'age': 88.0}


<b>Now let\'s try to find age of both alive and deceased polish laureates.</b>

In [20]:
pipeline = [{'$match': {'bornCountry': {'$regex': '^Poland'}}},
            {'$addFields': {'died_date': {'$dateFromString': {'dateString': {'$cond': {'if': {'$eq': ['$died', '0000-00-00']}, 'then': current_time, 'else': '$died'}}}},
                            'born_date': {'$dateFromString': {'dateString': '$born'}}, 'today': {'$dateFromString': {'dateString': current_time}},
            'vitalStatus': {'$cond': {'if': {'$eq': ['$died', '0000-00-00']}, 'then': 'alive', 'else': 'deceased'}}}},
            {'$project': {'firstname': 1, 'surname': 1, 'bornCountry': 1, '_id': 0, 'vitalStatus': 1, 'age': {'$floor': {'$divide': [{'$subtract': ['$died_date', '$born_date']}, 31557600000]}}}}]

cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)

{'firstname': 'Georges', 'surname': 'Charpak', 'bornCountry': 'Poland', 'vitalStatus': 'deceased', 'age': 86.0}
{'firstname': 'Roald', 'surname': 'Hoffmann', 'bornCountry': 'Poland (now Ukraine)', 'vitalStatus': 'alive', 'age': 84.0}
{'firstname': 'Tadeus', 'surname': 'Reichstein', 'bornCountry': 'Poland', 'vitalStatus': 'deceased', 'age': 99.0}
{'firstname': 'Andrew V.', 'surname': 'Schally', 'bornCountry': 'Poland (now Lithuania)', 'vitalStatus': 'alive', 'age': 95.0}
{'firstname': 'Lech', 'surname': 'Walesa', 'bornCountry': 'Poland', 'vitalStatus': 'alive', 'age': 78.0}
{'firstname': 'Shimon', 'surname': 'Peres', 'bornCountry': 'Poland (now Belarus)', 'vitalStatus': 'deceased', 'age': 93.0}
{'firstname': 'Henryk', 'surname': 'Sienkiewicz', 'bornCountry': 'Poland', 'vitalStatus': 'deceased', 'age': 70.0}
{'firstname': 'Wislawa', 'surname': 'Szymborska', 'bornCountry': 'Poland', 'vitalStatus': 'deceased', 'age': 88.0}
{'firstname': 'Olga', 'surname': 'Tokarczuk', 'bornCountry': 'Polan

<b>What is the nationality of Marie Curie Skłodowska?</b>

In [26]:
pipeline = [{'$match': {'firstname':'Marie','surname':'Curie'}},
           {'$project': {'firstname': 1, 'surname': 1, 'bornCountry': 1}}]


cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)

{'_id': ObjectId('6238db184fafdb6f26002aed'), 'firstname': 'Marie', 'surname': 'Curie', 'bornCountry': 'Russian Empire (now Poland)'}


<b>How many prizes did Marie Curie win?</b>

In [22]:
list(db.laureates.aggregate([
    {'$match': {'firstname': 'Marie','surname': 'Curie'}},
    {'$project': {'n_prizes': {'$size': '$prizes'}, 'firstname':1, 'surname':1}},
    {'$group': {'_id': {'firstname': '$firstname','surname':'$surname'}, 'total_prizes': {'$sum': '$n_prizes'}}}
]))

[{'_id': {'firstname': 'Marie', 'surname': 'Curie'}, 'total_prizes': 2}]

<b>Lets see the prizes.</b>

In [33]:
pipeline = [{'$match': {'firstname': 'Marie'}},
            {'$unwind': '$prizes'}, 
            {'$project':{'firstname':1,'surname':1, 'prizes':1}}]

cursor = db.laureates.aggregate(pipeline)

for document in cursor:
    print(document)
    print("")

{'_id': ObjectId('6238db184fafdb6f26002aed'), 'firstname': 'Marie', 'surname': 'Curie', 'prizes': {'year': '1903', 'category': 'physics', 'share': '4', 'motivation': '"in recognition of the extraordinary services they have rendered by their joint researches on the radiation phenomena discovered by Professor Henri Becquerel"', 'affiliations': [[]]}}

{'_id': ObjectId('6238db184fafdb6f26002aed'), 'firstname': 'Marie', 'surname': 'Curie', 'prizes': {'year': '1911', 'category': 'chemistry', 'share': '1', 'motivation': '"in recognition of her services to the advancement of chemistry by the discovery of the elements radium and polonium, by the isolation of radium and the study of the nature and compounds of this remarkable element"', 'affiliations': [{'name': 'Sorbonne University', 'city': 'Paris', 'country': 'France'}]}}



<h2> Thank you for exploring with me :)</h2>