# Eat Safe, Love

## Notebook Set Up

In [None]:
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [None]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [None]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [None]:
# review the collections in our database
print(db.list_collection_names())

In [None]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [None]:
# Find the establishments with a hygiene score of 20
query  = {'scores.Hygiene': 20}

# Use count_documents to display the number of documents in the result
result = establishments.count_documents(query)
result

# Display the first document in the results using pprint
establishments.find_one(query)

In [None]:
# Convert the result to a Pandas DataFrame

query_result = establishments.find()
results = list(query_result)
result_df = pd.DataFrame(results)
result_df

# Display the number of rows in the DataFrame

print("Rows in DataFrame: ", len(result_df))

# Display the first 10 rows of the DataFrame

result_df.head(10)


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [None]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.

query_london = {'LocalAuthority': 'London', 
            'RatingValue': {'$gte' : 4}}
cursor = establishments.find(query_london)

# Use count_documents to display the number of documents in the result

result_london = establishments.count_documents(query_london)
result_london
# Display the first document in the results using pprint

pprint(result_london)

In [None]:
# Convert the result to a Pandas DataFrame

london_result = establishments.find(query_london)
result_london = list(london_result)
london_df = pd.DataFrame(result_london)
london_df

# Display the number of rows in the DataFrame

print("Rows in DataFrame: ", len(london_df))

# Display the first 10 rows of the DataFrame

london_df.head(10)


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [None]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score


degree_search = 0.01
latitude = {'geocode.latitude'}
longitude = {'geocode.longitude'}

query = {
    "geocode": {
        "$geoWithin": {
            "$box": [
                [longitude - degree_search, latitude - degree_search],
                [longitude + degree_search, latitude + degree_search]
            ]
        }
    },
    'RatingValue': 5
}


sort = establishments.find(query).sort("scores.hygiene", -1)
limit = establishments.find(query).sort("scores.hygiene", -1).limit(10)

# Print the results
for result in limit:
    print(result)

In [None]:
# Convert result to Pandas DataFrame
result_geo = list(limit)
geo_df = pd.DataFrame(result_geo)
geo_df

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [None]:
# Create a pipeline that:
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest
query_h = [{'$match':{'scores.hygiene': 0, '$group':{"_id": '$LocalAuthorityName', '$short':{'count': -1}}}}]

results_hygiene = list(establishments.aggregate(query_h))

# Print the number of documents in the result

print("Number of documents:", len(results_hygiene))

# Print the first 10 results
query_h_10 = query_h[{"$limit": 10}]
query_h_10_list = list(query_h_10)
for result in query_h_10_list:
    print(result)



In [None]:
# Convert the result to a Pandas DataFrame

hygiene_df = pd.DataFrame(results_hygiene)

# Display the number of rows in the DataFrame

print("Rows in DataFrame: ", len(hygiene_df))


# Display the first 10 rows of the DataFrame
hygiene_df.head(10)