In [6]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
import json

In [7]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [8]:
# confirm that the "emissions" database is in MongoDB
print(mongo.list_database_names())

['admin', 'classDB', 'config', 'emissions', 'epa', 'fruits_db', 'local', 'met', 'petsitly_marketing', 'uk_food']


In [9]:
# assign the database to a variable name
db = mongo['emissions']

In [10]:
# review the collections in our new database
print(db.list_collection_names())

['countries']


In [11]:
# assign each collection to a variable
countries = db['countries']

In [12]:
# Create a query that finds emissions before 1945 to check if rows were deleted from shell
query = {'Year': {'$lt': '1945'}}
results = countries.find(query)
for result in results:
    print(result)

In [13]:
# Create a query that finds the country French Equatorial Africa to see if ISO was updated from shell
query = {'Country': 'French Equatorial Africa'}

# Capture the results to a variable
results = countries.find(query)

# Pretty print the first result
for i in range(1):
    pprint(results[i])

{'Cement': '',
 'Coal': '',
 'Country': 'French Equatorial Africa',
 'Flaring': '',
 'Gas': '',
 'ISO': 'FEA',
 'Oil': '',
 'Other': '',
 'Total': '',
 'Year': '1945',
 '_id': ObjectId('64273cf7cfb518dff1f26bac')}


In [14]:
# Create a query that finds the country Curacao to see if country name was updated from shell
query = {'Country': 'Curacao'}

# Capture the results to a variable
results = countries.find(query)

# Pretty print the first two results
for i in range(2):
    pprint(results[i])

{'Cement': '',
 'Coal': '',
 'Country': 'Curacao',
 'Flaring': '',
 'Gas': '',
 'ISO': 'CUW',
 'Oil': '',
 'Other': '',
 'Total': '',
 'Year': '1945',
 '_id': ObjectId('64273cf6cfb518dff1f2544c')}
{'Cement': '',
 'Coal': '',
 'Country': 'Curacao',
 'Flaring': '',
 'Gas': '',
 'ISO': 'CUW',
 'Oil': '',
 'Other': '',
 'Total': '',
 'Year': '1946',
 '_id': ObjectId('64273cf6cfb518dff1f2544d')}


In [15]:
#count documents
countries.count_documents({})

17864

In [16]:
#limit to data from past 5 years
query = {'Year': {'$gte': '2017'}}
results = countries.find(query)
for result in results:
    print(result)

{'_id': ObjectId('64273cf6cfb518dff1f22084'), 'Country': 'Afghanistan', 'Year': '2017', 'Total': '0.276852', 'Coal': '0.096731', 'Oil': '0.170436', 'Gas': '0.008429', 'Cement': '0.001256', 'Flaring': '0', 'Other': '', 'ISO': 'AFG'}
{'_id': ObjectId('64273cf6cfb518dff1f22085'), 'Country': 'Afghanistan', 'Year': '2018', 'Total': '0.294876', 'Coal': '0.104866', 'Oil': '0.180470', 'Gas': '0.007990', 'Cement': '0.001550', 'Flaring': '0', 'Other': '', 'ISO': 'AFG'}
{'_id': ObjectId('64273cf6cfb518dff1f22086'), 'Country': 'Afghanistan', 'Year': '2019', 'Total': '0.293401', 'Coal': '0.104708', 'Oil': '0.181177', 'Gas': '0.006502', 'Cement': '0.001015', 'Flaring': '0', 'Other': '', 'ISO': 'AFG'}
{'_id': ObjectId('64273cf6cfb518dff1f22087'), 'Country': 'Afghanistan', 'Year': '2020', 'Total': '0.299746', 'Coal': '0.106488', 'Oil': '0.185254', 'Gas': '0.006447', 'Cement': '0.001557', 'Flaring': '0', 'Other': '', 'ISO': 'AFG'}
{'_id': ObjectId('64273cf6cfb518dff1f22088'), 'Country': 'Afghanistan', 

In [17]:
#print all results to a dataframe
query = {'Year': {'$gte': '2017'}}
results = countries.find(query)
emissions2017_df = pd.DataFrame(results)

In [18]:
#check to make sure all rows were captured
print("Rows in DataFrame: ", len(emissions2017_df))

Rows in DataFrame:  1160


In [None]:
#export DataFrame to JSON
emissions2017_df.to_json(r'C:\Users\LTabl\OneDrive\Desktop\Project_03\data.json', indent = 1, orient = "columns")

In [1]:
#results = countries.find({})
#for i in range(17864):
    #pprint(results[i])