# Eat Safe, Love

## Notebook Set Up

In [1]:
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
print(db.list_collection_names())
pprint(db.establishments.find_one())

['establishments']
{'AddressLine1': 'The Pines Garden',
 'AddressLine2': 'Beach Road',
 'AddressLine3': 'St Margarets Bay',
 'AddressLine4': 'Kent',
 'BusinessName': 'The Tea Room',
 'BusinessType': 'Restaurant/Cafe/Canteen',
 'BusinessTypeID': 1,
 'ChangesByServerID': 0,
 'Distance': 4587.362402580997,
 'FHRSID': 551803,
 'LocalAuthorityBusinessID': 'PI/000070948',
 'LocalAuthorityCode': '182',
 'LocalAuthorityEmailAddress': 'publicprotection@dover.gov.uk',
 'LocalAuthorityName': 'Dover',
 'LocalAuthorityWebSite': 'http://www.dover.gov.uk/',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'CT15 6DZ',
 'RatingDate': '2021-08-17T00:00:00',
 'RatingKey': 'fhrs_5_en-gb',
 'RatingValue': 5,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('660497e112e8c108be00ca61'),
 'geocode': {'latitude': Decimal128('51.1481330000000'),
             'longitude': Decimal128('1.38329800000000')},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/551803',
            'r

In [5]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [6]:
# Find establishments with a hygiene score of 20
query = {"scores.Hygiene": 20}

# Count the number of documents matching the query
result_count = establishments.count_documents(query)
print("Number of establishments with a hygiene score of 20:", result_count)

# Display the details of the first matching establishment
first_establishment = establishments.find_one(query)
print("First establishment with a hygiene score of 20:")
pprint(first_establishment)


Number of establishments with a hygiene score of 20: 41
First establishment with a hygiene score of 20:
{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': 0,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('660497e112e8c108be00e878'),
 'geocode': {'latitude': Decimal128('50.7697050000000'),
             'longitude': Decimal128('0.276940000000000'

In [7]:
# Convert MongoDB query result to DataFrame
df = pd.DataFrame(establishments.find(query))

# Display number of rows in DataFrame
print("Number of rows in the DataFrame:", len(df))

# Display first 10 rows of DataFrame
print("First 10 rows of the DataFrame:")
print(df.head(10))


Number of rows in the DataFrame: 41
First 10 rows of the DataFrame:
                        _id   FHRSID  ChangesByServerID  \
0  660497e112e8c108be00e878   110681                  0   
1  660497e112e8c108be00ebfc   612039                  0   
2  660497e112e8c108be00ef05   730933                  0   
3  660497e112e8c108be00f0f3   172735                  0   
4  660497e112e8c108be00f105   172953                  0   
5  660497e212e8c108be00faa0   512854                  0   
6  660497e212e8c108be00fcc0  1537089                  0   
7  660497e212e8c108be0111ef   155648                  0   
8  660497e212e8c108be011633  1012883                  0   
9  660497e212e8c108be011e42   644109                  0   

  LocalAuthorityBusinessID               BusinessName  \
0                     4029        The Chase Rest Home   
1                1970/FOOD                 Brenalwood   
2                1698/FOOD              Melrose Hotel   
3             PI/000023858              Seaford Pizza 

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [8]:
# Count and print the number of establishments in London with a RatingValue >= 4
query = {'LocalAuthorityName': {'$regex':'London'}, "RatingValue": {"$gte": 4}}
print('Number of establishments:', establishments.count_documents(query))

# Print the details of the first matching establishment
print("First establishment details:")
print(establishments.find_one({"LocalAuthorityName": {'$regex':'London'}, "RatingValue": {"$gte": 4}}))


Number of establishments: 33
First establishment details:
{'_id': ObjectId('660497e212e8c108be010295'), 'FHRSID': 621707, 'ChangesByServerID': 0, 'LocalAuthorityBusinessID': 'PI/000025307', 'BusinessName': "Charlie's", 'BusinessType': 'Other catering premises', 'BusinessTypeID': 7841, 'AddressLine1': 'Oak Apple Farm Building 103 Sheernes Docks', 'AddressLine2': 'Sheppy Kent', 'AddressLine3': '', 'AddressLine4': '', 'PostCode': 'ME12', 'Phone': '', 'RatingValue': 4, 'RatingKey': 'fhrs_4_en-gb', 'RatingDate': '2021-10-18T00:00:00', 'LocalAuthorityCode': '508', 'LocalAuthorityName': 'City of London Corporation', 'LocalAuthorityWebSite': 'http://www.cityoflondon.gov.uk/Corporation/homepage.htm', 'LocalAuthorityEmailAddress': 'publicprotection@cityoflondon.gov.uk', 'scores': {'Hygiene': 5, 'Structural': 10, 'ConfidenceInManagement': 5}, 'SchemeType': 'FHRS', 'geocode': {'longitude': Decimal128('0.508551000000000'), 'latitude': Decimal128('51.3693210000000')}, 'RightToReply': '', 'Distance':

In [9]:
# Convert result to Pandas DataFrame
df = pd.DataFrame(establishments.find({"LocalAuthorityName": {'$regex':'London'}, "RatingValue": {"$gte": 4}}))

# Display number of rows in DataFrame
print("Number of rows in the DataFrame:", len(df))

# Display first 10 rows of DataFrame
print("First 10 rows of the DataFrame:")
print(df.head(10))

Number of rows in the DataFrame: 33
First 10 rows of the DataFrame:
                        _id   FHRSID  ChangesByServerID  \
0  660497e212e8c108be010295   621707                  0   
1  660497e212e8c108be0105bc  1130836                  0   
2  660497e212e8c108be011109   293783                  0   
3  660497e212e8c108be011f05   294474                  0   
4  660497e212e8c108be011f07  1315095                  0   
5  660497e212e8c108be012615   294900                  0   
6  660497e212e8c108be01471f   293756                  0   
7  660497e212e8c108be014ae7   878523                  0   
8  660497e212e8c108be014afe   293772                  0   
9  660497e212e8c108be014b01   295107                  0   

  LocalAuthorityBusinessID                          BusinessName  \
0             PI/000025307                             Charlie's   
1             PI/000034075               Mv City Cruises Erasmus   
2             PI/000002614             Benfleet Motor Yacht Club   
3         

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [10]:
degree_search = 0.1
latitude = 51.5074
longitude = -0.1278

query = {
    'geocode.latitude': {"$gte": latitude - degree_search, "$lte": latitude + degree_search},
    'geocode.longitude': {"$gte": longitude - degree_search, "$lte": longitude + degree_search},
    'RatingValue': 5
}

sort = [('Hygiene', 1)]
limit = 10

results = list(establishments.find().sort(sort).limit(limit))

pprint(results)


[{'AddressLine1': 'Reach Court Farm',
  'AddressLine2': 'Reach Road',
  'AddressLine3': 'St Margarets At Cliffe',
  'AddressLine4': 'Kent',
  'BusinessName': 'The Barn',
  'BusinessType': 'Restaurant/Cafe/Canteen',
  'BusinessTypeID': 1,
  'ChangesByServerID': 0,
  'Distance': 4587.77622109052,
  'FHRSID': 429982,
  'LocalAuthorityBusinessID': 'PI/000043978',
  'LocalAuthorityCode': '182',
  'LocalAuthorityEmailAddress': 'publicprotection@dover.gov.uk',
  'LocalAuthorityName': 'Dover',
  'LocalAuthorityWebSite': 'http://www.dover.gov.uk/',
  'NewRatingPending': False,
  'Phone': '',
  'PostCode': 'CT15 6AQ',
  'RatingDate': '2017-09-21T00:00:00',
  'RatingKey': 'fhrs_5_en-gb',
  'RatingValue': 5,
  'RightToReply': '',
  'SchemeType': 'FHRS',
  '_id': ObjectId('660497e112e8c108be00ca69'),
  'geocode': {'latitude': Decimal128('51.1473790000000'),
              'longitude': Decimal128('1.37111500000000')},
  'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/429982',
     

In [21]:
# Define latitude, longitude, search range, and rating value
latitude, longitude, degree_search, rating_value = 51.5074, -0.1278, 0.01, 5

# Define query, sort, and limit
query = {'geocode.latitude': {"$gte": latitude - degree_search, "$lte": latitude + degree_search},
         'geocode.longitude': {"$gte": longitude - degree_search, "$lte": longitude + degree_search},
         'RatingValue': rating_value}
sort = [('scores.Hygiene', 1)]
limit = 10

# Execute and print the results
print (list(establishments.find(query).sort(sort).limit(limit)))


[]


In [12]:
from pprint import pprint

# Constants
degree_search = 0.01
latitude = 51.490142
longitude = 0.08384
rating_value = 5
limit = 5

# Query construction
query = {
    'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search},
    'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search},
    'RatingValue': rating_value
}

# Sorting criteria
sort = [('scores.Hygiene', 1)]  # Sorting by hygiene score in ascending order

# Retrieve and print the results
results = establishments.find(query).sort(sort).limit(limit)
pprint(list(results))


[{'AddressLine1': '130 - 132 Plumstead High Street',
  'AddressLine2': '',
  'AddressLine3': 'Plumstead',
  'AddressLine4': 'Greenwich',
  'BusinessName': 'Volunteer',
  'BusinessType': 'Pub/bar/nightclub',
  'BusinessTypeID': 7843,
  'ChangesByServerID': 0,
  'Distance': 4646.965634598608,
  'FHRSID': 694609,
  'LocalAuthorityBusinessID': 'PI/000116619',
  'LocalAuthorityCode': '511',
  'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk',
  'LocalAuthorityName': 'Greenwich',
  'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk',
  'NewRatingPending': False,
  'Phone': '',
  'PostCode': 'SE18 1JQ',
  'RatingDate': '2019-08-05T00:00:00',
  'RatingKey': 'fhrs_5_en-gb',
  'RatingValue': 5,
  'RightToReply': '',
  'SchemeType': 'FHRS',
  '_id': ObjectId('660497e212e8c108be0140d9'),
  'geocode': {'latitude': Decimal128('51.4873437000000'),
              'longitude': Decimal128('0.0920800000000000')},
  'links': [{'href': 'http://api.ratings.food.gov.uk/establishments/694609

In [13]:
#Constants
degree_search = 0.01
latitude, longitude, rating_value, limit = 51.490142, 0.08384, 5, 5

#Query
query = {
    'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search},
    'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search},
    'RatingValue': rating_value
}

#pandas DataFrame
df = pd.DataFrame(list(establishments.find(query).sort([('scores.Hygiene', 1)]).limit(limit)))

# Display the DataFrame
print(df)


                        _id   FHRSID  ChangesByServerID  \
0  660497e212e8c108be0140d9   694609                  0   
1  660497e212e8c108be0140ef   695241                  0   
2  660497e212e8c108be0140f6   694478                  0   
3  660497e212e8c108be0140ab   695223                  0   
4  660497e212e8c108be0140bb  1380578                  0   

  LocalAuthorityBusinessID                         BusinessName  \
0             PI/000116619                            Volunteer   
1             PI/000179088              Plumstead Manor Nursery   
2             PI/000086506                    Atlantic Fish Bar   
3             PI/000178842                              Iceland   
4                    14425  Howe and Co Fish and Chips - Van 17   

                            BusinessType  BusinessTypeID  \
0                      Pub/bar/nightclub            7843   
1                        Caring Premises               5   
2                 Takeaway/sandwich shop            7844   
3 

In [14]:
import pandas as pd

# Constants
latitude, longitude, degree_search, rating_value, limit = 51.490142, 0.08384, 0.01, 5, 5

# Define the query
query = {
    'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search},
    'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search},
    'RatingValue': rating_value
}

# Retrieve the results and convert to a pandas DataFrame
df = pd.DataFrame(list(establishments.find(query).sort([('scores.Hygiene', 1)]).limit(limit)))

# Display the number of rows and the first 10 rows of the DataFrame
print("Number of rows in the DataFrame:", len(df))
print("First 10 rows of the DataFrame:")
print(df.head(10))


Number of rows in the DataFrame: 5
First 10 rows of the DataFrame:
                        _id   FHRSID  ChangesByServerID  \
0  660497e212e8c108be0140d9   694609                  0   
1  660497e212e8c108be0140ef   695241                  0   
2  660497e212e8c108be0140f6   694478                  0   
3  660497e212e8c108be0140ab   695223                  0   
4  660497e212e8c108be0140bb  1380578                  0   

  LocalAuthorityBusinessID                         BusinessName  \
0             PI/000116619                            Volunteer   
1             PI/000179088              Plumstead Manor Nursery   
2             PI/000086506                    Atlantic Fish Bar   
3             PI/000178842                              Iceland   
4                    14425  Howe and Co Fish and Chips - Van 17   

                            BusinessType  BusinessTypeID  \
0                      Pub/bar/nightclub            7843   
1                        Caring Premises               