# Eat Safe, Love

## Notebook Set Up

In [48]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
import pymongo
import re

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
collection_names = db.list_collection_names()
collection_names

['establishments']

In [5]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [6]:
# Find the establishments with a hygiene score of 20
hygiene = establishments.find({"scores.Hygiene": 20})

# Use count_documents to display the number of documents in the result
count = establishments.count_documents({'scores.Hygiene': 20})
count


41

In [7]:
# Display the first document in the results using pprint
pprint(hygiene[0])

{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': 0,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('645ac7436468bb11210eb562'),
 'geocode': {'latitude': Decimal128('50.769705'),
             'longitude': Decimal128('0.27694')},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel': 'self'}],
 'meta': {'

In [8]:
# Convert the result to a Pandas DataFrame
hygiene_df = pd.DataFrame(hygiene)

# Display the number of rows in the DataFrame
print(len(hygiene_df))


41


In [9]:
# Display the first 10 rows of the DataFrame
print(hygiene_df.head(10))

                        _id   FHRSID  ChangesByServerID  \
0  645ac7436468bb11210eb562   110681                  0   
1  645ac7436468bb11210eb8e3   612039                  0   
2  645ac7436468bb11210ebbed   730933                  0   
3  645ac7436468bb11210ebddb   172735                  0   
4  645ac7436468bb11210ebdea   172953                  0   
5  645ac7436468bb11210ec788   512854                  0   
6  645ac7436468bb11210ec9a9  1537089                  0   
7  645ac7436468bb11210eded5   155648                  0   
8  645ac7436468bb11210ee319  1012883                  0   
9  645ac7446468bb11210eeb2d   644109                  0   

  LocalAuthorityBusinessID               BusinessName  \
0                     4029        The Chase Rest Home   
1                1970/FOOD                 Brenalwood   
2                1698/FOOD              Melrose Hotel   
3             PI/000023858              Seaford Pizza   
4             PI/000024532              Golden Palace   
5       

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [68]:
# Define the regular expression pattern
pattern = re.compile(r"\bLondon\b", re.IGNORECASE)

# Find establishments in London with a rating value greater than or equal to 4
london_establishments = establishments.find(
    {
        "LocalAuthorityName": {"$regex": pattern},
        "RatingValue": {"$gte": 4}
    }
)

# Print the number of establishments in the result
print("Total number of establishments in London with a rating value greater than or equal to 4:", london_establishments.count())



Total number of establishments in London with a rating value greater than or equal to 4: 33


  print("Total number of establishments in London with a rating value greater than or equal to 4:", london_establishments.count())


In [69]:
# Display the first document in the results using pprint
pprint(london_establishments[0])

{'AddressLine1': 'Oak Apple Farm Building 103 Sheernes Docks',
 'AddressLine2': 'Sheppy Kent',
 'AddressLine3': '',
 'AddressLine4': '',
 'BusinessName': "Charlie's",
 'BusinessType': 'Other catering premises',
 'BusinessTypeID': 7841,
 'ChangesByServerID': 0,
 'Distance': 4627.439467780196,
 'FHRSID': 621707,
 'LocalAuthorityBusinessID': 'PI/000025307',
 'LocalAuthorityCode': '508',
 'LocalAuthorityEmailAddress': 'publicprotection@cityoflondon.gov.uk',
 'LocalAuthorityName': 'City of London Corporation',
 'LocalAuthorityWebSite': 'http://www.cityoflondon.gov.uk/Corporation/homepage.htm',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'ME12',
 'RatingDate': '2021-10-18T00:00:00',
 'RatingKey': 'fhrs_4_en-gb',
 'RatingValue': 4,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('645ac7436468bb11210ecf7d'),
 'geocode': {'latitude': Decimal128('51.369321'),
             'longitude': Decimal128('0.508551')},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishm

In [70]:
# Convert the cursor object to a list
london_establishments_list = list(london_establishments)

# Convert the list to a Pandas DataFrame
df_london_establishments = pd.DataFrame(london_establishments_list)


In [71]:
# Display the first 10 rows of the DataFrame
print(df_london_establishments.head(10))

                        _id   FHRSID  ChangesByServerID  \
0  645ac7436468bb11210ecf7d   621707                  0   
1  645ac7436468bb11210ed2a4  1130836                  0   
2  645ac7436468bb11210eddf0   293783                  0   
3  645ac7446468bb11210eebf0  1315095                  0   
4  645ac7446468bb11210eebf1   294474                  0   
5  645ac7446468bb11210ef2f9   294900                  0   
6  645ac7446468bb11210f1408   293756                  0   
7  645ac7446468bb11210f17ce   878523                  0   
8  645ac7446468bb11210f17e8   293772                  0   
9  645ac7446468bb11210f17e9   295107                  0   

  LocalAuthorityBusinessID                          BusinessName  \
0             PI/000025307                             Charlie's   
1             PI/000034075               Mv City Cruises Erasmus   
2             PI/000002614             Benfleet Motor Yacht Club   
3             PI/000036464  Coombs Catering t/a The Lock and Key   
4         

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [20]:
print(db.establishments.find_one()["geocode"]["latitude"].__class__.__name__)
print(db.establishments.find_one()["geocode"]["longitude"].__class__.__name__)


Decimal128
Decimal128


In [76]:
# Find the new restaurant "Penang Flavours"
new_restaurant = establishments.find_one({"BusinessName": "Penang Flavours"})

# Define the search range
search_range = 0.01 

# Convert the latitude and longitude of the new restaurant to float
latitude = float(new_restaurant["geocode"]["latitude"].to_decimal())
longitude = float(new_restaurant["geocode"]["longitude"].to_decimal())

# Search for restaurants within 0.01 degree latitude and longitude of Penang Flavours and Rating value must equal 5
nearby = establishments.find(
    {
        "geocode.latitude": {"$gte": latitude - search_range, "$lte": latitude + search_range},
        "geocode.longitude": {"$gte": longitude - search_range, "$lte": longitude + search_range},
        "RatingValue": {"$eq": 5}
    }
).sort([("scores.Hygiene", pymongo.ASCENDING)])

# Print the number of establishments in the result
print("Total number of establishments in with rating value of 5 within 0.1 degree of Penang Flavours:", nearby.count())



Total number of establishments in with rating value of 5 within 0.1 degree of Penang Flavours: 50


  print("Total number of establishments in with rating value of 5 within 0.1 degree of Penang Flavours:", nearby.count())


In [77]:
pprint(nearby[0])

{'AddressLine1': '',
 'AddressLine2': 'Langley Crescent',
 'AddressLine3': '',
 'AddressLine4': 'Dagenham',
 'BusinessName': 'James Cambell Primary School',
 'BusinessType': 'School/college/university',
 'BusinessTypeID': 7845,
 'ChangesByServerID': 0,
 'Distance': 4647.385014639661,
 'FHRSID': 106490,
 'LocalAuthorityBusinessID': '3701',
 'LocalAuthorityCode': '501',
 'LocalAuthorityEmailAddress': 'foodsafety@lbbd.gov.uk',
 'LocalAuthorityName': 'Barking and Dagenham',
 'LocalAuthorityWebSite': 'http://www.lbbd.gov.uk/Pages/Home.aspx',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'RM9 6TD',
 'RatingDate': '2021-11-05T00:00:00',
 'RatingKey': 'fhrs_5_en-gb',
 'RatingValue': 5,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('645ac7446468bb11210f1055'),
 'geocode': {'latitude': Decimal128('51.536304'),
             'longitude': Decimal128('0.129873')},
 'links': [{'href': 'http://api.ratings.food.gov.uk/establishments/106490',
            'rel': 'self'}],
 'meta':

In [78]:
# Convert the result to a Pandas DataFrame
nearby_df = pd.DataFrame(nearby)

In [79]:
nearby_df.head(10)

Unnamed: 0,_id,FHRSID,ChangesByServerID,LocalAuthorityBusinessID,BusinessName,BusinessType,BusinessTypeID,AddressLine1,AddressLine2,AddressLine3,...,LocalAuthorityWebSite,LocalAuthorityEmailAddress,scores,SchemeType,geocode,RightToReply,Distance,NewRatingPending,meta,links
0,645ac7446468bb11210f1055,106490,0.0,3701,James Cambell Primary School,School/college/university,7845,,Langley Crescent,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.129873, 'latitude': 51.536304}",,4647.385015,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
1,645ac7446468bb11210f1064,128159,0.0,57988,Jo Richardson Secondary School,School/college/university,7845,,Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 5, 'ConfidenceInM...",FHRS,"{'longitude': 0.126375, 'latitude': 51.533355}",,4647.404096,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
2,645ac7446468bb11210f1065,950704,0.0,69962,Castle Green Nursery,Caring Premises,5,,Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.126375, 'latitude': 51.533355}",,4647.404096,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
3,645ac7446468bb11210f1066,1210200,0.0,75292,Jo Richardson Community School - Social Kitche...,Restaurant/Cafe/Canteen,1,,Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.126375, 'latitude': 51.533355}",,4647.404096,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
4,645ac7446468bb11210f1102,128811,0.0,59086,Crowned Kids,Caring Premises,5,,523 Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.127977, 'latitude': 51.539261}",,4647.562613,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
5,645ac7446468bb11210f1114,105018,0.0,3598,Angies Cafe and Kebab Ltd,Takeaway/sandwich shop,7844,,505 Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.127901, 'latitude': 51.539793}",,4647.584914,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
6,645ac7446468bb11210f1115,91591,0.0,254,Co-op Welcome,Retailers - supermarkets/hypermarkets,7840,,513b Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.127901, 'latitude': 51.539792}",,4647.584877,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
7,645ac7446468bb11210f1117,415585,0.0,42134,Keith Davis Cue Sports Club,Restaurant/Cafe/Canteen,1,,497-505 Gale Street,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.127901, 'latitude': 51.539793}",,4647.584914,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
8,645ac7446468bb11210f111d,110607,0.0,40230,Greggs,Retailers - other,4613,,12a Woodward Road,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.12701, 'latitude': 51.538928}",,4647.58559,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
9,645ac7446468bb11210f1122,1518914,0.0,78713,Martins,Retailers - other,4613,,16 Woodward Road,,...,http://www.lbbd.gov.uk/Pages/Home.aspx,foodsafety@lbbd.gov.uk,"{'Hygiene': 0, 'Structural': 0, 'ConfidenceInM...",FHRS,"{'longitude': 0.1268614, 'latitude': 51.5389039}",,4647.590115,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [97]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest

pipeline = [
    {"$match": {"scores.Hygiene": 0}},
    {"$group": {"_id": "$LocalAuthorityName", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
]

local_hygiene = establishments.aggregate(pipeline)


In [98]:
# Convert the result to a Pandas DataFrame
hygiene_zero_df = pd.DataFrame(list(local_hygiene))


In [99]:
# Display the number of rows in the DataFrame
print(hygiene_zero_df)

                           _id  count
0                       Thanet   1130
1                    Greenwich    882
2                    Maidstone    713
3                       Newham    711
4                        Swale    686
5                   Chelmsford    680
6                       Medway    672
7                       Bexley    607
8              Southend-On-Sea    586
9                     Tendring    542
10                  Colchester    498
11             Tunbridge Wells    491
12        Folkestone and Hythe    480
13                  Eastbourne    478
14                    Hastings    464
15                     Bromley    460
16                     Ashford    427
17                    Havering    397
18                    Dartford    383
19                   Braintree    382
20                    Basildon    362
21                   Gravesham    339
22       Tonbridge and Malling    325
23                    Thurrock    313
24        Barking and Dagenham    301
25          

In [100]:
# Display the first 10 rows of the DataFrame
print(hygiene_zero_df.head(10))

               _id  count
0           Thanet   1130
1        Greenwich    882
2        Maidstone    713
3           Newham    711
4            Swale    686
5       Chelmsford    680
6           Medway    672
7           Bexley    607
8  Southend-On-Sea    586
9         Tendring    542
