# Eat Safe, Love

## Notebook Set Up

In [6]:
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [7]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [8]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [9]:
# review the collections in our database
print(db.collection_names)

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'uk_food'), 'collection_names')


In [10]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [13]:
# Find the establishments with a hygiene score of 20
query =establishments.find({'scores.Hygiene':{'$eq':20}})
# Use count_documents to display the number of documents in the result
establishments.count_documents({'scores.Hygiene':{'$eq':20}})
# Display the first document in the results using pprint
pprint(establishments.find_one({'scores.Hygiene':{'$eq':20}}))

{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': 0,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('67d32cca31ad51a27e20ba20'),
 'geocode': {'latitude': Decimal128('50.769705'),
             'longitude': Decimal128('0.27694')},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel': 'self'}],
 'meta': {'

In [14]:
# Convert the result to a Pandas DataFrame
data=pd.DataFrame(list(query))
# Display the number of rows in the DataFrame
len(data)
# Display the first 10 rows of the DataFrame
print(data.head(10))


                        _id   FHRSID  ChangesByServerID  \
0  67d32cca31ad51a27e20ba20   110681                  0   
1  67d32cca31ad51a27e20bda5   612039                  0   
2  67d32cca31ad51a27e20c0ac   730933                  0   
3  67d32cca31ad51a27e20c29a   172735                  0   
4  67d32cca31ad51a27e20c2a7   172953                  0   
5  67d32ccb31ad51a27e20cc47   512854                  0   
6  67d32ccb31ad51a27e20ce67  1537089                  0   
7  67d32ccb31ad51a27e20e392   155648                  0   
8  67d32ccb31ad51a27e20e7d7  1012883                  0   
9  67d32ccb31ad51a27e20efe9   644109                  0   

  LocalAuthorityBusinessID               BusinessName  \
0                     4029        The Chase Rest Home   
1                1970/FOOD                 Brenalwood   
2                1698/FOOD              Melrose Hotel   
3             PI/000023858              Seaford Pizza   
4             PI/000024532              Golden Palace   
5       

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [15]:
# Find the establishments with London in the LocalAuthorityName and has a RatingValue greater than or equal to 4.
query ={'$and':[{"LocalAuthorityName":{'$regex':".*London.*"}},{"RatingValue":{"$gte":4}}]}
establishments.find(query)

# Use count_documents to display the number of documents in the result
print(f'There are {establishments.count_documents(query)} establishments with London in the LocalAuthorityName and has a RatingValue greater than or equal to 4.')
# Display the first document in the results using pprint
pprint(establishments.find_one(query))

There are 33 establishments with London in the LocalAuthorityName and has a RatingValue greater than or equal to 4.
{'AddressLine1': 'Oak Apple Farm Building 103 Sheernes Docks',
 'AddressLine2': 'Sheppy Kent',
 'AddressLine3': '',
 'AddressLine4': '',
 'BusinessName': "Charlie's",
 'BusinessType': 'Other catering premises',
 'BusinessTypeID': 7841,
 'ChangesByServerID': 0,
 'Distance': 4627.439467780196,
 'FHRSID': 621707,
 'LocalAuthorityBusinessID': 'PI/000025307',
 'LocalAuthorityCode': '508',
 'LocalAuthorityEmailAddress': 'publicprotection@cityoflondon.gov.uk',
 'LocalAuthorityName': 'City of London Corporation',
 'LocalAuthorityWebSite': 'http://www.cityoflondon.gov.uk/Corporation/homepage.htm',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'ME12',
 'RatingDate': '2021-10-18T00:00:00',
 'RatingKey': 'fhrs_4_en-gb',
 'RatingValue': 4,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('67d32ccb31ad51a27e20d43a'),
 'geocode': {'latitude': Decimal128('51.369321')

In [16]:
# Convert the result to a Pandas DataFrame
df=pd.DataFrame(establishments.find(query))
# Display the number of rows in the DataFrame
print(len(df))
# Display the first 10 rows of the DataFrame
print(df.head(10))


33
                        _id   FHRSID  ChangesByServerID  \
0  67d32ccb31ad51a27e20d43a   621707                  0   
1  67d32ccb31ad51a27e20d764  1130836                  0   
2  67d32ccb31ad51a27e20e2af   293783                  0   
3  67d32ccb31ad51a27e20f0ae  1315095                  0   
4  67d32ccb31ad51a27e20f0b2   294474                  0   
5  67d32ccb31ad51a27e20f7b6   294900                  0   
6  67d32ccb31ad51a27e2118c5   293756                  0   
7  67d32ccb31ad51a27e211c8f   878523                  0   
8  67d32ccb31ad51a27e211ca5   294606                  0   
9  67d32ccb31ad51a27e211ca6   293772                  0   

  LocalAuthorityBusinessID                          BusinessName  \
0             PI/000025307                             Charlie's   
1             PI/000034075               Mv City Cruises Erasmus   
2             PI/000002614             Benfleet Motor Yacht Club   
3             PI/000036464  Coombs Catering t/a The Lock and Key   
4      

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [17]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score

degree_search = 0.01
latitude =float(establishments.find_one({'BusinessName':'Penang Flavours'},{'geocode.latitude':1})['geocode']['latitude'].to_decimal().real)
longitude= float(establishments.find_one({'BusinessName':'Penang Flavours'},{'geocode.longitude':1})['geocode']['longitude'].to_decimal().real)

query ={'$and':[{'geocode.latitude':{'$lte':latitude+0.01,'$gte':latitude-0.01}},{'geocode.longitude':{'$gte':longitude-0.01,'$lte':longitude+0.01}},{'RatingValue':5}]}
sort = {'scores.Hygiene':1}
#limit =

# Print the results
for r in establishments.find(query).sort(sort):
    print(r)


{'_id': ObjectId('67d32ccb31ad51a27e2112e4'), 'FHRSID': 940026, 'ChangesByServerID': 0, 'LocalAuthorityBusinessID': 'PI/000177403', 'BusinessName': 'Abbi Wines', 'BusinessType': 'Retailers - other', 'BusinessTypeID': 4613, 'AddressLine1': '12 - 14 Lakedale Road', 'AddressLine2': '', 'AddressLine3': 'Plumstead', 'AddressLine4': 'Greenwich', 'PostCode': 'SE18 1PP', 'Phone': '', 'RatingValue': 5, 'RatingKey': 'fhrs_5_en-gb', 'RatingDate': '2021-08-23T00:00:00', 'LocalAuthorityCode': '511', 'LocalAuthorityName': 'Greenwich', 'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk', 'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk', 'scores': {'Hygiene': 0, 'Structural': 0, 'ConfidenceInManagement': 0}, 'SchemeType': 'FHRS', 'geocode': {'longitude': Decimal128('0.0908895'), 'latitude': Decimal128('51.4871401')}, 'RightToReply': '', 'Distance': 4647.001583335365, 'NewRatingPending': False, 'meta': {'dataSource': None, 'extractDate': '0001-01-01T00:00:00', 'itemCount': 0, 'retur

In [18]:
# Convert result to Pandas DataFrame
df=pd.DataFrame(establishments.find(query).sort(sort))
print(df)

                         _id   FHRSID  ChangesByServerID  \
0   67d32ccb31ad51a27e2112e4   940026                  0   
1   67d32ccb31ad51a27e211250   695223                  0   
2   67d32ccb31ad51a27e211261  1380578                  0   
3   67d32ccb31ad51a27e21127e   694609                  0   
4   67d32ccb31ad51a27e211297   695241                  0   
..                       ...      ...                ...   
82  67d32ccb31ad51a27e2114db  1300359                  0   
83  67d32ccb31ad51a27e21167d  1156702                  0   
84  67d32ccb31ad51a27e2114a7   694625                  0   
85  67d32ccb31ad51a27e21122b  1069652                  0   
86  67d32ccb31ad51a27e2115ba   947998                  0   

   LocalAuthorityBusinessID                         BusinessName  \
0              PI/000177403                           Abbi Wines   
1              PI/000178842                              Iceland   
2                     14425  Howe and Co Fish and Chips - Van 17   
3      

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [None]:
# Create a pipeline that:
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest
match={'$match':{'scores.Hygiene':0}}
group={'$group':{'_id':"$LocalAuthorityName",'count':{'$sum':1}}}
sort={'$sort':{'count':-1}}
pipeline=[match,group,sort]
# Print the number of documents in the result
print(len(list(establishments.aggregate(pipeline))))
# Print the first 10 results
print(list(establishments.aggregate(pipeline))[:10])

55
[{'_id': 'Thanet', 'count': 1130}, {'_id': 'Greenwich', 'count': 882}, {'_id': 'Maidstone', 'count': 713}, {'_id': 'Newham', 'count': 711}, {'_id': 'Swale', 'count': 686}, {'_id': 'Chelmsford', 'count': 680}, {'_id': 'Medway', 'count': 672}, {'_id': 'Bexley', 'count': 607}, {'_id': 'Southend-On-Sea', 'count': 586}, {'_id': 'Tendring', 'count': 542}]


In [26]:
# Convert the result to a Pandas DataFrame
hygiene_df=pd.DataFrame(list(establishments.aggregate(pipeline)))
# Display the number of rows in the DataFrame
len(hygiene_df)
# Display the first 10 rows of the DataFrame
hygiene_df.head(10)

Unnamed: 0,_id,count
0,Thanet,1130
1,Greenwich,882
2,Maidstone,713
3,Newham,711
4,Swale,686
5,Chelmsford,680
6,Medway,672
7,Bexley,607
8,Southend-On-Sea,586
9,Tendring,542
