# Eat Safe, Love

## Notebook Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [5]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [6]:
# Find the establishments with a hygiene score of 20
hygiene_query = {'scores.Hygiene': 20}
#find in establishments the hygiene requrements
hygiene_result = establishments.find(hygiene_query)

# Use count_documents to display the number of documents in the result
print('Number of establishments with a hygiene score of 20: ', establishments.count_documents(hygiene_query))
# Display the first document in the results using pprint
pprint(hygiene_result[0])

Number of establishments with a hygiene score of 20:  41
{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': '0',
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('6578c0d6b78d875b03ccf577'),
 'geocode': {'latitude': '50.769705', 'longitude': '0.27694'},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel':

In [7]:
# Convert the result to a Pandas DataFrame
hygiene_df = pd.DataFrame(hygiene_result)
# Display the number of rows in the DataFrame
print('Number of rows in dataframe: ', len(hygiene_df))
# Display the first 10 rows of the DataFrame
hygiene_df.head(10)

NameError: name 'pd' is not defined

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [None]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
london_query = {'LocalAuthorityName' : {'$regex' : 'London'}, 'RatingValue' : {'$gte' : '4'}}
#find in establishments the query requirements
london_result = establishments.find(london_query)

# Use count_documents to display the number of documents in the result
print('Number of establishments in London with a rating value of 4 or more: ', establishments.count_documents(london_query)
# Display the first document in the results using pprint
pprint(london_result[0])

In [None]:
# Convert the result to a Pandas DataFrame
london_df = pd.Dataframe(london_result)
# Display the number of rows in the DataFrame
print('Number of rows in dataframe: ', len(london_df)
# Display the first 10 rows of the DataFrame
london_df.head(10)

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [None]:
# Search within 0.01 degree on either side of the latitude and longitude.
degree_search = 0.01
latitude = 51.490142
longitude = 0.08384

# Rating value must equal 5
penang_query = {'RatingValue' : '5', 'geocode.latitude' : {'geocode.longitude': ('$gte': longitude - degree_search, '$lte' : longitude + degree_search}}

# Sort by hygiene score
penang_sort =  [('scores.Hygiene', 1)]

#top five
penang_limit = 5

penang_result = list(establishments.find(penang_query).sort(penang_sort).limit(penang_imit)

# Print the results
pprint(penang_result)

In [None]:
# Convert result to Pandas DataFrame
penang_df = pd.DataFrame(penang_result)

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [None]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
authority_query = {'$match' : {'scores.Hygiene' : 0}}

# 2. Groups the matches by Local Authority
authority_group = {'$group' : {'_id' : '$LocalAuthorityName', 'count':{'$sum': 1}}}

# 3. Sorts the matches from highest to lowest
authority_sort = {'$sort': {'count' : -1}}

authority_merger = [authority_query, authority_group, authority_sort]

authority_results = list(establishments.aggregate(authority_merger))

# Print the number of documents in the result
print('Number of local authority areas: ', len(authority_results)
# Print the first 10 results
pprint(authority_results[0:10}

In [None]:
# Convert the result to a Pandas DataFrame
authority_df = pd.DataFrame(authority_results)
# Display the number of rows in the DataFrame
print('Number of rows in dataframe', len(authority_df))
# Display the first 10 rows of the DataFrame
authority_df.head(10)