#PyMongo Week3 Notebook

In [1]:
import pymongo
import sys
import os

In [2]:
# I'm in the week3 folder, which contains all of the materials
# from the given zip file
os.getcwd()

'/home/mmulholland/Dropbox/Docs/mongodb/week3'

In [3]:
# Now, let's try to do the first homework assignment
# Write a program in the language of your choice that will
# remove the lowest homework score for each student. Since
# there is a single document for each student containing an
# array of scores, you will need to update the scores array
# and remove the homework.

In [4]:
# establish a connection to the database
connection = pymongo.MongoClient("mongodb://localhost")
db = connection.school
students = db.students

In [5]:
# Let's take a look at some entries to get familiar with the
# data
list(students.find())[:5]

[{u'_id': 1.0,
  u'name': u'Aurelia Menendez',
  u'scores': [{u'score': 60.06045071030959, u'type': u'exam'},
   {u'score': 52.79790691903873, u'type': u'quiz'},
   {u'score': 71.76133439165544, u'type': u'homework'},
   {u'score': 34.85718117893772, u'type': u'homework'}]},
 {u'_id': 2.0,
  u'name': u'Corliss Zuk',
  u'scores': [{u'score': 67.03077096065002, u'type': u'exam'},
   {u'score': 6.301851677835235, u'type': u'quiz'},
   {u'score': 20.18160621941858, u'type': u'homework'},
   {u'score': 66.28344683278382, u'type': u'homework'}]},
 {u'_id': 3.0,
  u'name': u'Bao Ziglar',
  u'scores': [{u'score': 71.64343899778332, u'type': u'exam'},
   {u'score': 24.80221293650313, u'type': u'quiz'},
   {u'score': 1.694720653897219, u'type': u'homework'},
   {u'score': 42.26147058804812, u'type': u'homework'}]},
 {u'_id': 0.0,
  u'name': u'aimee Zank',
  u'scores': [{u'score': 1.463179736705023, u'type': u'exam'},
   {u'score': 11.78273309957772, u'type': u'quiz'},
   {u'score': 6.67617606065

In [6]:
# Let's see if we can look in one particular entry's hw
# assignments
list(students.find({'name': 'Zachary Langlais'}))

[{u'_id': 4.0,
  u'name': u'Zachary Langlais',
  u'scores': [{u'score': 78.68385091304332, u'type': u'exam'},
   {u'score': 90.2963101368042, u'type': u'quiz'},
   {u'score': 34.41620148042529, u'type': u'homework'},
   {u'score': 19.21886443577987, u'type': u'homework'}]},
 {u'_id': 118.0,
  u'name': u'Zachary Langlais',
  u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 8.548735651522431, u'type': u'homework'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [7]:
# So, there are apparently multiple entries for at least
# some students
# Let's work on getting all of the unique names
# We can use the find method for this, specifying a "projection

In [8]:
names = list((students.find({}, {'name': 1, '_id': 1})))
names[:5]

[{u'_id': 1.0, u'name': u'Aurelia Menendez'},
 {u'_id': 2.0, u'name': u'Corliss Zuk'},
 {u'_id': 3.0, u'name': u'Bao Ziglar'},
 {u'_id': 0.0, u'name': u'aimee Zank'},
 {u'_id': 4.0, u'name': u'Zachary Langlais'}]

In [9]:
# So, we will need to iterate over all of the names in 'names' and
# get all assignmnent entries that correspond to each document's
# 'scores' key
# Recall that a student entry's 'scores' key maps to a list of
# sub-documents, one for each assignment:
list(students.find({'name': 'Zachary Langlais'},
                   {'scores': 1, '_id': 0}))

[{u'scores': [{u'score': 78.68385091304332, u'type': u'exam'},
   {u'score': 90.2963101368042, u'type': u'quiz'},
   {u'score': 34.41620148042529, u'type': u'homework'},
   {u'score': 19.21886443577987, u'type': u'homework'}]},
 {u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 8.548735651522431, u'type': u'homework'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [10]:
# So, one we could do it (and there are probably easier ways to
# do this (and other stuff I do above) is we could all of the
# 'scores' sub-docs, put them in a list and then make a list of
# all of the sub-docs' 'score' keys for 'type' equal to 'homework'
# We should the original document's _id key value for later use
entry_lists = [dict(scores=entry['scores'], _id=entry['_id']) for
               entry in list(students.find({'name':
                                            'Zachary Langlais'}))]
entry_lists

[{'_id': 4.0,
  'scores': [{u'score': 78.68385091304332, u'type': u'exam'},
   {u'score': 90.2963101368042, u'type': u'quiz'},
   {u'score': 34.41620148042529, u'type': u'homework'},
   {u'score': 19.21886443577987, u'type': u'homework'}]},
 {'_id': 118.0,
  'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 8.548735651522431, u'type': u'homework'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [11]:
# Now, we can iterate over the 'scores' lists and get the one with
# the lowest score (and get its corresponding _id since that is
# different even if the name is the same)
hw_scores = []
for entry_list in entry_lists:
    for _entry in entry_list['scores']:
        if _entry['type'] == 'homework':
            hw_scores.append(dict(score=_entry['score'],
                                  _id=entry_list['_id']))
if hw_scores:
    worst_hw_score = sorted(hw_scores, key=lambda x: x['score'])[0]
worst_hw_score

{'_id': 118.0, 'score': 8.548735651522431}

In [12]:
# So, here's the list of HW scores for 'Zachary Langlais'
hw_scores

[{'_id': 4.0, 'score': 34.41620148042529},
 {'_id': 4.0, 'score': 19.21886443577987},
 {'_id': 118.0, 'score': 8.548735651522431},
 {'_id': 118.0, 'score': 82.41688205392703}]

In [13]:
# What we would have to do now is use the update_one method,
# specifying a student's name and the value of the score to be
# dropped from the list corresponding to 'scores'
# Here's the worst HW score again
worst_hw_score

{'_id': 118.0, 'score': 8.548735651522431}

In [14]:
list(students.find({'name': 'Zachary Langlais',
                    '_id': worst_hw_score['_id']}))

[{u'_id': 118.0,
  u'name': u'Zachary Langlais',
  u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 8.548735651522431, u'type': u'homework'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [15]:
# Let's get the relevant document and then manually edit its
# 'scores' value (in a new list) and then we can use update_one to
# substitute the edited 'scores' list back into the doc
orig_doc = list(students.find({'name': 'Zachary Langlais',
                               '_id': worst_hw_score['_id']}))[0]
orig_doc

{u'_id': 118.0,
 u'name': u'Zachary Langlais',
 u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
  {u'score': 61.03733414415722, u'type': u'quiz'},
  {u'score': 8.548735651522431, u'type': u'homework'},
  {u'score': 82.41688205392703, u'type': u'homework'}]}

In [16]:
orig_doc_scores = orig_doc['scores']
substitute_doc_scores = list(orig_doc_scores)
for score_doc in substitute_doc_scores:
    if score_doc == dict(score=worst_hw_score['score'],
                         type='homework'):
        del substitute_doc_scores[substitute_doc_scores.index(score_doc)]
substitute_doc_scores

[{u'score': 62.20457822364115, u'type': u'exam'},
 {u'score': 61.03733414415722, u'type': u'quiz'},
 {u'score': 82.41688205392703, u'type': u'homework'}]

In [17]:
substitute_doc_scores

[{u'score': 62.20457822364115, u'type': u'exam'},
 {u'score': 61.03733414415722, u'type': u'quiz'},
 {u'score': 82.41688205392703, u'type': u'homework'}]

In [18]:
result = students.update_one({'name': 'Zachary Langlais',
                              '_id': worst_hw_score['_id']},
                             {'$set': {'scores': substitute_doc_scores}})
result.matched_count

1

In [19]:
# So, it looks like one doc was updated
# Let's take a look at the updated doc
list(students.find({'name': 'Zachary Langlais',
                    '_id': worst_hw_score['_id']}))

[{u'_id': 118.0,
  u'name': u'Zachary Langlais',
  u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [20]:
# Great, it looks like the 'homework' with the lowest
# score has been dropped!

In [21]:
# Now, let's put orig_doc_scores back in
result = students.update_one({'name': 'Zachary Langlais',
                              '_id': worst_hw_score['_id']},
                             {'$set': {'scores': orig_doc_scores}})
result.matched_count

1

In [22]:
list(students.find({'name': 'Zachary Langlais',
                    '_id': worst_hw_score['_id']}))

[{u'_id': 118.0,
  u'name': u'Zachary Langlais',
  u'scores': [{u'score': 62.20457822364115, u'type': u'exam'},
   {u'score': 61.03733414415722, u'type': u'quiz'},
   {u'score': 8.548735651522431, u'type': u'homework'},
   {u'score': 82.41688205392703, u'type': u'homework'}]}]

In [23]:
# Now, the entry is back to its original state!

In [29]:
# Ok, now I'll paste in the script I wrote and then
# I'll run it from this session to see what happens
! head -50 homework_3_1/remove_lowest_hw_score_each_student.py

#!/usr/env python2.7
import pymongo
import sys

if __name__ == '__main__':

    # establish a connection to the database
    connection = pymongo.MongoClient("mongodb://localhost")
    db = connection.school
    students = db.students

    # Let's get all the unique names first
    names = list((students.find({}, {'name': 1, '_id': 0})))
    names = [name['name'] for name in names]

    # Now, iterate over all names, getting all entries corresponding to each
    # name and then we can all homeworks
    for name in names:
        entry_lists = [dict(scores=entry['scores'], _id=entry['_id']) for
                       entry in list(students.find({'name':
                                                    'Zachary Langlais'}))]
        hw_scores = []
        for entry_list in entry_lists:
            for _entry in entry_list['scores']:
                if _entry['type'] == 'homework':
                    hw_scores.append(dict(score=_entry['score'],
              

In [30]:
# Let's try to run it now
! python2.7 homework_3_1/remove_lowest_hw_score_each_student.py

Traceback (most recent call last):
  File "homework_3_1/remove_lowest_hw_score_each_student.py", line 33, in <module>
    substitute_doc_scores = orig_doc['scores']
TypeError: 'NoneType' object has no attribute '__getitem__'


In [32]:
! git status

On branch master
Your branch is ahead of 'origin/master' by 2 commits.
  (use "git push" to publish your local commits)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   .ipynb_checkpoints/PyMongo_week3-checkpoint.ipynb[m
	[31mmodified:   PyMongo_week3.ipynb[m
	[31mmodified:   homework_3_1/remove_lowest_hw_score_each_student.py[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[31m../week2/.ipynb_checkpoints/[m
	[31m../week2/homework_2_3/login_logout_signup/login_logout_signup/sessionDAO.pyc[m
	[31m../week2/homework_2_3/login_logout_signup/login_logout_signup/userDAO.pyc[m
	[31m../week2/pymongo_find_and_modify/using_find_and_modify.pyc[m
	[31m../week2/pymongo_updating/using_update.pyc[m
	[31m../week2/pymongo_updating_data_using_replace/using_replace_one.pyc[m

no changes adde

In [33]:
! git add PyMongo_week3.ipynb homework_3_1/remove_lowest_hw_score_each_student.py

In [34]:
! git commit -m "Updated week3 notebook, homework 1 script."

[master 0eb935e] Updated week3 notebook, homework 1 script.
 2 files changed, 161 insertions(+), 34 deletions(-)


In [35]:
! git push

Counting objects: 37, done.
Delta compression using up to 4 threads.
Compressing objects: 100% (33/33), done.
Writing objects: 100% (36/36), 288.01 KiB | 0 bytes/s, done.
Total 36 (delta 8), reused 0 (delta 0)
remote: error: GH001: Large files detected.[K
remote: error: Trace: fb58b8b0611873554982f65c16089542[K
remote: error: See http://git.io/iEPt8g for more information.[K
remote: error: File week3/handling_blobs/using_gridfs/sample_128_mb.txt is 128.00 MB; this exceeds GitHub's file size limit of 100 MB[K
To https://github.com/mulhod/mongodb_course.git
 ! [remote rejected] master -> master (pre-receive hook declined)
error: failed to push some refs to 'https://github.com/mulhod/mongodb_course.git'
