In [1]:
#*******************************************************************************************
 #
 #  File Name:  restaurant_analysis.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, restaurant_analysis.ipynb, uses the Python 
 #      module, pymongo to analyze restaurant ratings data in a MongoDB database, 
 #      uk_food, for the editors of a food magazine, Eat Safe, Love.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  09/18/2023      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

import logx
import pandasx

import restaurant_analysisx

import pandas as pd

from pprint import pprint
from pymongo import MongoClient

pd.options.mode.chained_assignment = None

In [2]:
CONSTANT_LOCAL_FILE_NAME = 'restaurant_analysis.ipynb'

hover_columns_string_list \
    = ['BusinessName', 'Hygiene' 'BusinessType'
       'AddressLine1', 'AddressLine2', 'AddressLine3'
       'PostCode', 'LocalAuthorityName']


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('restaurant_analysis')

Program execution begins...



# <br> **Section 1: Database Connection**

## **1.1: MongoDB Client**

In [3]:
# This line of code creates an instance of a MongoDB client.
current_pymongo_client = MongoClient(port = 27017)

logx.log_write_object(current_pymongo_client)

## **1.2: MongoDB Database Confirmation**

In [4]:
# These lines of code list all the MongoDB databases and confirm the existence 
# of the database, uk_food.
logx.print_and_log_text \
    ('\033[1mThis is a List of the current MongoDB Databases:\n\033[0m' \
     + str(current_pymongo_client.list_database_names()))

if 'uk_food' in current_pymongo_client.list_database_names():
     
    logx.print_and_log_text \
        ("\033[1m\nThe MongoDB Database, 'uk_food', is in the MongoDB database list.\033[0m")
    
else:
    
    logx.print_and_log_text \
        ('\033[1m' \
         + "\nThe MongoDB Database, 'uk_food', is NOT in the List. "
         + 'Please run NoSQLSetup.ipynb before proceeding!' \
         + '\033[0m') 

[1mThis is a List of the current MongoDB Databases:
[0m['admin', 'atlanta', 'boston', 'chicago', 'config', 'epa', 'fruits_db', 'local', 'met', 'offense_codes', 'petsitly_marketing', 'predictive_map', 'san_francisco', 'uk_food', 'washington_dc']
[1m
The MongoDB Database, 'uk_food', is in the MongoDB database list.[0m


## **1.3: MongoDB Database**

In [5]:
# This line of code creates a PyMongo database for the database, uk_food.
current_pymongo_database = current_pymongo_client.uk_food
    
logx.print_and_log_text(str(current_pymongo_database))

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'uk_food')


## **1.4: MongoDB Collection Confirmation**

In [6]:
# These lines of code list all the collections in the MongoDB database, uk_food, 
# and confirm the existence of the collection, establishments.
logx.print_and_log_text \
    ('\033[1m' \
     + 'Here is a List of the current collections in the MongoDB Database, uk_food:\n' \
     + '\033[0m' \
     + str(current_pymongo_database.list_collection_names()))

if 'establishments' in current_pymongo_database.list_collection_names():
     
    logx.print_and_log_text \
        ("\033[1m\nThe Collection, 'establishments', is in the MongoDB Database, 'uk_food'.\033[0m")
    
else:
    
    logx.print_and_log_text \
        ('\033[1m' \
         + "\nThe collection, 'establishments', is NOT in the MongoDB database, 'uk_food'. " \
         + 'Please run mongodb_setup.ipynb before proceeding!' \
         + '\033[0m') 

[1mHere is a List of the current collections in the MongoDB Database, uk_food:
[0m['establishments']
[1m
The Collection, 'establishments', is in the MongoDB Database, 'uk_food'.[0m


## **1.5: MongoDB Collection**

In [7]:
# This line of code creates a PyMongo collection for the collection, establishments.
establishments_pymongo_collection = current_pymongo_database.establishments

logx.log_write_object(establishments_pymongo_collection)

# <br> **Section 2: Exploratory Analysis**

## **2.1 -- Which establishments have a hygiene score equal to 20 (the worst possible rating)?**

### **Number of Establishments from `count_documents`**

In [8]:
# These lines of code query and display the number of establishments with the lowest hygiene score, 20.
query_dictionary = {'scores.Hygiene': 20}

establishments_count_integer  = establishments_pymongo_collection.count_documents(query_dictionary)

logx.print_and_log_text \
    ('\033[1m' \
     + 'There are {:,} establishments with a hygiene score equal to 20.' \
        .format(establishments_count_integer) \
     + '\033[0m')

[1mThere are 41 establishments with a hygiene score equal to 20.[0m


### **Display First Document**

In [9]:
# This query returns all the documents where the restaurant has a hygiene score equal to 20.
query_results_dictionary_list = establishments_pymongo_collection.find(query_dictionary)

# This line of code displays the first document from the query results.
pprint(query_results_dictionary_list[0])

{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': None,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('662d9800758b1011cbb273bd'),
 'geocode': {'latitude': 50.769705, 'longitude': 0.27694},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel': 'self'}],
 'meta': {'dataSource': None,
          'extractD

### **Worst Hygiene Establishments Data Set**

In [10]:
# This line of code creates the dataframe.
worst_hygiene_establishments_dataframe = pd.DataFrame(list(query_results_dictionary_list))

# This function rearranges the columns in the dataframe.
worst_hygiene_establishments_dataframe \
    = restaurant_analysisx.rearrange_establishments_dataframe_columns \
        (worst_hygiene_establishments_dataframe)

logx.log_write_object(worst_hygiene_establishments_dataframe)

### **Number of Rows in Worst Hygiene Establishments Data Set**

In [11]:
logx.print_and_log_text \
    ('\033[1m' \
     + 'The number of rows in the Lowest Hygiene Establishments DataFrame is {:,}.' \
        .format(len(worst_hygiene_establishments_dataframe)) \
     + '\033[0m')

[1mThe number of rows in the Lowest Hygiene Establishments DataFrame is 41.[0m


### **Worst Hygiene Establishments' Locations Data Set**

In [12]:
worst_hygiene_establishments_dataframe.columns

Index(['_id', 'FHRSID', 'ChangesByServerID', 'LocalAuthorityBusinessID',
       'BusinessName', 'BusinessType', 'BusinessTypeID', 'AddressLine1',
       'AddressLine2', 'AddressLine3', 'AddressLine4', 'PostCode', 'geocode',
       'Distance', 'Phone', 'RatingValue', 'RatingKey', 'RatingDate',
       'NewRatingPending', 'scores', 'LocalAuthorityCode',
       'LocalAuthorityName', 'LocalAuthorityWebSite',
       'LocalAuthorityEmailAddress', 'SchemeType', 'RightToReply', 'meta',
       'links'],
      dtype='object')

In [13]:
worst_hygiene_locations_dataframe \
    = restaurant_analysisx.return_locations_dataframe \
        (worst_hygiene_establishments_dataframe, 'Hygiene')

logx.log_write_object(worst_hygiene_locations_dataframe)

### **Display Worst Hygiene Establishments**

In [14]:
pandasx.return_formatted_table \
    (worst_hygiene_locations_dataframe, 'Table 2.1: Worst Hygiene Establishments') \
        .format({'RatingValue': '{:,.0f}'})

BusinessName,BusinessType,AddressLine1,AddressLine2,AddressLine3,AddressLine4,PostCode,RatingValue,RatingDate,NewRatingPending,LocalAuthorityName,Latitude,Longitude,Hygiene,Structural,ConfidenceInManagement,MarkerSize
The Chase Rest Home,Caring Premises,5-6 Southfields Road,Eastbourne,East Sussex,,BN21 1BU,,2021-09-23T00:00:00,False,Eastbourne,50.769705,0.27694,20,20,20,100.0
Brenalwood,Caring Premises,Hall Lane,Walton-on-the-Naze,Essex,,CO14 8HN,,2022-06-22T00:00:00,False,Tendring,51.857536,1.278721,20,15,30,100.0
Melrose Hotel,Hotel/bed & breakfast/guest house,53 Marine Parade East,Clacton On Sea,Essex,,CO15 6AD,,2021-08-13T00:00:00,False,Tendring,51.789429,1.15927,20,20,20,100.0
Seaford Pizza,Takeaway/sandwich shop,4 High Street,Seaford,East Sussex,,BN25 1PG,1.0,2021-12-23T00:00:00,False,Lewes,50.770885,0.10202,20,10,20,100.0
Golden Palace,Restaurant/Cafe/Canteen,5 South Street,Seaford,East Sussex,,BN25 1HP,1.0,2021-10-07T00:00:00,False,Lewes,50.770724,0.101446,20,10,20,100.0
Ashby's Butchers,Retailers - other,777 Southchurch Road,Southend-On-Sea,Essex,,SS1 2PP,,2022-07-21T00:00:00,False,Southend-On-Sea,51.541448,0.736349,20,20,20,100.0
South Sea Express Cuisine,Restaurant/Cafe/Canteen,33 Alexandra Street,Southend-on-sea,Essex,,SS1 1BW,,2022-08-03T00:00:00,False,Southend-On-Sea,51.535007,0.712167,20,20,20,100.0
Golden Palace,Takeaway/sandwich shop,7 London Road,Rayleigh,Essex,,SS6 9HN,,2022-03-23T00:00:00,False,Rochford,51.591515,0.602364,20,15,30,100.0
The Tulip Tree,Restaurant/Cafe/Canteen,3 The Village,Chiddingstone,KENT,,TN8 7AH,1.0,2022-03-04T00:00:00,False,Sevenoaks,51.185947,0.14645,20,5,20,100.0
F & S,Retailers - other,,81 Southernhay,Basildon,Essex,SS14 1EU,,2021-07-26T00:00:00,False,Basildon,51.57005,0.462307,20,20,20,100.0


### **Display Worst Hygiene Establishments' Locations**

In [15]:
pandasx.display_dataframe_hvplot \
    (worst_hygiene_locations_dataframe, 'Figure 2.1.1: Worst Hygiene Establishments',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (-1.0, 1.5), y_limit_float_tuple = (50.7, 52.2),
     hover_columns_string_list = hover_columns_string_list)

In [16]:
pandasx.display_dataframe_hvplot \
    (worst_hygiene_locations_dataframe, 'Worst Hygiene Establishments\n(Close-Up)',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (-0.05, 0.2), y_limit_float_tuple = (51.45, 51.6),
     hover_columns_string_list = hover_columns_string_list)

## **2.2 -- Which establishments in London have a high rating value greater than or equal to 4 (1-5)?**

### **Number of Establishments from `count_documents`**

In [17]:
# These lines of code query and display the number of establishments in London where the 'RatingValue' 
# field has a value greater than or equal to 4.
query_dictionary = {'LocalAuthorityName': {'$regex': 'London'}, 'RatingValue': {'$gte':4}}

establishments_count_integer = establishments_pymongo_collection.count_documents(query_dictionary)

logx.print_and_log_text \
    ('\033[1mThere are {:,} establishments with a rating value greater than or equal to 4.\033[0m' \
        .format(establishments_count_integer))

[1mThere are 33 establishments with a rating value greater than or equal to 4.[0m


### **Display First Document**

In [18]:
# This query returns all the documents where the establishment is in London and the 'RatingValue' field 
# has a value greater than or equal to 4.
query_results_dictionary_list = establishments_pymongo_collection.find(query_dictionary)

# This line of code displays the first document from the query results.
pprint(query_results_dictionary_list[0])

{'AddressLine1': 'Oak Apple Farm Building 103 Sheernes Docks',
 'AddressLine2': 'Sheppy Kent',
 'AddressLine3': '',
 'AddressLine4': '',
 'BusinessName': "Charlie's",
 'BusinessType': 'Other catering premises',
 'BusinessTypeID': 7841,
 'ChangesByServerID': 0,
 'Distance': 4627.439467780196,
 'FHRSID': 621707,
 'LocalAuthorityBusinessID': 'PI/000025307',
 'LocalAuthorityCode': '508',
 'LocalAuthorityEmailAddress': 'publicprotection@cityoflondon.gov.uk',
 'LocalAuthorityName': 'City of London Corporation',
 'LocalAuthorityWebSite': 'http://www.cityoflondon.gov.uk/Corporation/homepage.htm',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'ME12',
 'RatingDate': '2021-10-18T00:00:00',
 'RatingKey': 'fhrs_4_en-gb',
 'RatingValue': 4,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('662d9800758b1011cbb28dd8'),
 'geocode': {'latitude': 51.369321, 'longitude': 0.508551},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/621707',
            'rel': 'self'}]

### **High Rating Value, London Establishments Data Set**

In [19]:
# This line of code creates the dataframe.
high_rating_london_dataframe = pd.DataFrame(list(query_results_dictionary_list))

# This function rearranges the columns in the dataframe.
high_rating_london_dataframe \
    = restaurant_analysisx.rearrange_establishments_dataframe_columns(high_rating_london_dataframe)

logx.log_write_object(high_rating_london_dataframe)

### **Number of Rows in High Rating Value, London Data Set**

In [20]:
logx.print_and_log_text \
    ('\033[1mThe number of rows in the High Rating Value, London DataFrame is {:,}.\033[0m' \
        .format(len(high_rating_london_dataframe)))

[1mThe number of rows in the High Rating Value, London DataFrame is 33.[0m


### **High Rating Value, London Establishments' Locations Data Set**

In [21]:
high_rating_london_locations_dataframe \
    = restaurant_analysisx.return_locations_dataframe(high_rating_london_dataframe, 'RatingValue', 50.0)

logx.log_write_object(high_rating_london_locations_dataframe)

### **Display High Rating Value, London Establishments**

In [22]:
pandasx.return_formatted_table \
    (high_rating_london_locations_dataframe, 
     'Table 2.2: High Rating Value, London Establishments') \
        .format({'RatingValue': '{:,.0f}'})

BusinessName,BusinessType,AddressLine1,AddressLine2,AddressLine3,AddressLine4,PostCode,RatingValue,RatingDate,NewRatingPending,LocalAuthorityName,Latitude,Longitude,Hygiene,Structural,ConfidenceInManagement,MarkerSize
Charlie's,Other catering premises,Oak Apple Farm Building 103 Sheernes Docks,Sheppy Kent,,,ME12,4,2021-10-18T00:00:00,False,City of London Corporation,51.369321,0.508551,5,10,5,200.0
Mv City Cruises Erasmus,Other catering premises,Cherry Garden Pier,Cherry Garden Street Rotherhithe,Charlton,,TN4 8HR,5,2021-05-14T00:00:00,False,City of London Corporation,51.123623,0.249256,0,5,0,250.0
Benfleet Motor Yacht Club,Other catering premises,Ferry Road,South Benfleet Essex,,,SS7 1NF,4,2018-11-02T00:00:00,False,City of London Corporation,51.543831,0.561954,0,0,10,200.0
Coombs Catering t/a The Lock and Key,Restaurant/Cafe/Canteen,Leslie Ford House,Tilbury,Charlton,,RM18 7EH,5,2020-12-04T00:00:00,False,City of London Corporation,51.464066,0.346337,0,5,0,250.0
Tilbury Seafarers Centre,Restaurant/Cafe/Canteen,Tenants Row Tilbury Docks,Tilbury Essex,,,RM18 7EH,5,2018-11-02T00:00:00,False,City of London Corporation,51.464078,0.346325,0,0,0,250.0
Mv Valulla,Other catering premises,Reeds River Cruises LtdKings ReachRiver ThamesSouthwark,London,,,RM15 5QY,5,2016-08-23T00:00:00,False,City of London Corporation,51.504071,0.287555,0,0,0,250.0
Tereza Joanne,Other catering premises,Funcraft UK Ltd King George V Dock Woolwich Manor Way,London,,,E16 2NJ,5,2021-07-09T00:00:00,False,City of London Corporation,51.501121,0.069286,0,5,5,250.0
Brick Lane Brews,Restaurant/Cafe/Canteen,Air side London City Airport,London,,,E16 2PX,4,2019-01-18T00:00:00,False,City of London Corporation,51.503733,0.048006,10,5,5,200.0
The Nuance Group (UK) Limited,Retailers - other,Duty Free Shop Passenger Terminal,London City AirportRoyal DocksLondon,,,E16 2PX,5,2017-07-17T00:00:00,False,City of London Corporation,51.504112,0.047966,0,0,0,250.0
WH Smith,Retailers - other,London City Airport,Silvertown London,,,E16 2PX,5,2017-07-17T00:00:00,False,City of London Corporation,51.504112,0.047966,0,0,0,250.0


### **Display High Rating Value, London Establishments' Locations**

In [23]:
pandasx.display_dataframe_hvplot \
    (high_rating_london_locations_dataframe, 'Figure 2.2.1: High Rating Value, London',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (-1.0, 1.5), y_limit_float_tuple = (50.7, 52.2),
     hover_columns_string_list = hover_columns_string_list)

In [24]:
pandasx.display_dataframe_hvplot \
    (high_rating_london_locations_dataframe, 'Figure 2.2.2: High Rating Value, London (Close-Up)',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (0.0, 0.1), y_limit_float_tuple = (51.49, 51.54),
     hover_columns_string_list = hover_columns_string_list)

## **2.3 -- What are the top 5 establishments with a rating value of 5, sorted by best hygiene score, nearest to the new restaurant added, "Penang Flavours?**

### **Penang Flavours' Latitude and Longitude**

In [25]:
# This line of code retrieves the Penang Flavours's document fields, 'latitude' and 'longitude'.
penang_flavours_coordinates_dictionary \
    = establishments_pymongo_collection.find_one \
        ({'BusinessName':'Penang Flavours'}, ['geocode.longitude', 'geocode.latitude'])

# This line of code extracts the Penang Flavours's 'longitude' field value.
longitude_float = penang_flavours_coordinates_dictionary['geocode']['longitude']

# This line of code extracts the Penang Flavours's 'latitude' field value.
latitude_float = penang_flavours_coordinates_dictionary['geocode']['latitude']

### **Query Search Range Variables**

In [26]:
# This line of code establishes the search range as ±0.01 degrees.
degree_search_float = 0.01

# These lines of code calculate and assign the minimum and maximum longitude.
minimum_longitude_float = longitude_float - degree_search_float

maximum_longitude_float = longitude_float + degree_search_float

# These lines of code calculate and assign the minimum and maximum latitude.
minimum_latitude_float = latitude_float - degree_search_float

maximum_latitude_float = latitude_float + degree_search_float

### **Query Results**

In [27]:
# These dictionaries are the query parameters.
query_dictionary \
    = {'RatingValue': 5,
       'geocode.longitude': {'$gte': minimum_longitude_float, '$lte': maximum_longitude_float},
       'geocode.latitude': {'$gte': minimum_latitude_float, '$lte': maximum_latitude_float}}
       
sort_values_tuple_list = [('scores.Hygiene', 1)]
       
limit_integer = 5


# This query returns the top 5 establishments with a 'RatingValue' field value of 5, sorted 
# by lowest hygiene score, nearest to the restaurant, 'Penang Flavours'.
query_results_list \
    = list(establishments_pymongo_collection \
               .find(query_dictionary) \
               .sort(sort_values_tuple_list) \
               .limit(limit_integer))

logx.log_write_object(query_results_list)

### **Number of Establishments from `count_documents`**

In [28]:
establishments_count_integer \
    = establishments_pymongo_collection.count_documents(query_dictionary, limit = limit_integer)

logx.print_and_log_text \
    ('\033[1m' \
     + 'Due to the query parameter limiting the number of results, ' \
     + 'there are {:,} establishments with a rating value of 5.' \
            .format(establishments_count_integer) \
     + '\033[0m')

[1mDue to the query parameter limiting the number of results, there are 5 establishments with a rating value of 5.[0m


### **Display All Documents**

In [29]:
pprint(query_results_list)

[{'AddressLine1': 'Restaurant And Premises 107A Plumstead High Street',
  'AddressLine2': '',
  'AddressLine3': 'Plumstead',
  'AddressLine4': 'Greenwich',
  'BusinessName': 'Howe and Co Fish and Chips - Van 17',
  'BusinessType': 'Mobile caterer',
  'BusinessTypeID': 7846,
  'ChangesByServerID': 0,
  'Distance': 4646.95593107927,
  'FHRSID': 1380578,
  'LocalAuthorityBusinessID': '14425',
  'LocalAuthorityCode': '511',
  'LocalAuthorityEmailAddress': 'health@royalgreenwich.gov.uk',
  'LocalAuthorityName': 'Greenwich',
  'LocalAuthorityWebSite': 'http://www.royalgreenwich.gov.uk',
  'NewRatingPending': False,
  'Phone': '',
  'PostCode': 'SE18 1SE',
  'RatingDate': '2021-11-11T00:00:00',
  'RatingKey': 'fhrs_5_en-gb',
  'RatingValue': 5,
  'RightToReply': '',
  'SchemeType': 'FHRS',
  '_id': ObjectId('662d9800758b1011cbb2cbff'),
  'geocode': {'latitude': 51.4875335693359, 'longitude': 0.0925370007753372},
  'links': [{'href': 'http://api.ratings.food.gov.uk/establishments/1380578',
   

### Highest Rating, Best Hygiene Establishments Data Set**

In [30]:
# This line of code creates the dataframe.
high_rating_best_hygiene_dataframe = pd.DataFrame(query_results_list)

# This function rearranges the columns in the dataframe.
high_rating_best_hygiene_dataframe \
    = restaurant_analysisx.rearrange_establishments_dataframe_columns(high_rating_best_hygiene_dataframe)

logx.log_write_object(high_rating_best_hygiene_dataframe)

### **Number of Rows in Highest Rating, Best Hygiene Establishments Data Set**

In [31]:
logx.print_and_log_text \
    ('\033[1mThe number of rows in the Highest Rating, Best Hygiene DataFrame is {:,}.\033[0m' \
        .format(len(high_rating_best_hygiene_dataframe)))

[1mThe number of rows in the Highest Rating, Best Hygiene DataFrame is 5.[0m


### **Highest Rating, Best Hygiene Establishments' Locations Data Set**

In [32]:
high_rating_london_locations_dataframe \
    = restaurant_analysisx.return_locations_dataframe \
        (high_rating_best_hygiene_dataframe, 'RatingValue', 50.0)

logx.log_write_object(high_rating_london_locations_dataframe)

### **Display Highest Rating, Best Hygiene Establishments**

In [33]:
# This line of code displays a formatted DataFrame hiding those columns that are not necessary 
# for editorial decisions.
pandasx.return_formatted_table \
    (high_rating_london_locations_dataframe, 
     'Table 2.3: Highest Rating, Best Hygiene Establishments') \
        .format({'RatingValue': '{:,.0f}'})

BusinessName,BusinessType,AddressLine1,AddressLine2,AddressLine3,AddressLine4,PostCode,RatingValue,RatingDate,NewRatingPending,LocalAuthorityName,Latitude,Longitude,Hygiene,Structural,ConfidenceInManagement,MarkerSize
Howe and Co Fish and Chips - Van 17,Mobile caterer,Restaurant And Premises 107A Plumstead High Street,,Plumstead,Greenwich,SE18 1SE,5,2021-11-11T00:00:00,False,Greenwich,51.487534,0.092537,0,0,0,250.0
Atlantic Fish Bar,Takeaway/sandwich shop,35 Lakedale Road,,Plumstead,Greenwich,SE18 1PR,5,2021-06-16T00:00:00,False,Greenwich,51.48673,0.091216,0,0,5,250.0
Plumstead Manor Nursery,Caring Premises,Plumstead Manor School Old Mill Road,,Plumstead,Greenwich,SE18 1QG,5,2021-06-16T00:00:00,False,Greenwich,51.481518,0.085994,0,0,5,250.0
Iceland,Retailers - supermarkets/hypermarkets,144 - 146 Plumstead High Street,,Plumstead,Greenwich,SE18 1JQ,5,2019-11-13T00:00:00,False,Greenwich,51.487148,0.09242,0,5,5,250.0
Volunteer,Pub/bar/nightclub,130 - 132 Plumstead High Street,,Plumstead,Greenwich,SE18 1JQ,5,2019-08-05T00:00:00,False,Greenwich,51.487344,0.09208,0,0,0,250.0


### **Display Highest Rating, Best Hygiene Establishments' Locations**

In [34]:
pandasx.display_dataframe_hvplot \
    (high_rating_london_locations_dataframe, 'Figure 2.3.1: Highest Rating, Best Hygiene',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (-1.0, 1.5), y_limit_float_tuple = (50.7, 52.2),
     hover_columns_string_list = hover_columns_string_list)

In [35]:
pandasx.display_dataframe_hvplot \
    (high_rating_london_locations_dataframe, 'Figure 2.3.2: Highest Rating, Best Hygiene (Close-Up)',
     'BusinessName', 'MarkerSize', 'Longitude', 'Latitude',
     x_limit_float_tuple = (0.08, 0.10), y_limit_float_tuple = (51.48, 51.49),
     hover_columns_string_list = hover_columns_string_list)

## **2.4 -- How many establishments in each Local Authority area have a hygiene score of 0 (the best possible rating)? Sort the number of establishments from highest to lowest, and print out the top ten local authority areas.**

### **Aggregation Query Parameters**

In [36]:
# 1. This Dictionary matches establishments with a hygiene score of 0.
match_query_dictionary = {'$match': {'scores.Hygiene': 0}}

# 2. This Dictionary groups the matches by Local Authority and counts how many establishments 
# meet the match criteria for each Local Authority.
group_query_dictionary = {'$group': {'_id': '$LocalAuthorityName', 'count': {'$sum': 1}}}

# 3. This Dictionary sorts the matches in descending order (from highest to lowest) based on count.
sort_values_dictionary \
    = {'$sort': {'count': -1}}

pipeline_dictionary_list = [match_query_dictionary, group_query_dictionary, sort_values_dictionary]

### **Aggregation Query Results**

In [37]:
aggregate_query_results_dictionary_list \
    = list(establishments_pymongo_collection.aggregate(pipeline_dictionary_list))

logx.log_write_object(aggregate_query_results_dictionary_list)

### **Number of Authorities and Establishments from Aggregation Query Results**

In [38]:
authority_count_integer \
    = len([element for element in aggregate_query_results_dictionary_list if isinstance(element, dict)])

establishments_count_integer \
    = sum([element['count'] for element in aggregate_query_results_dictionary_list if isinstance(element, dict)])

logx.print_and_log_text \
    ('\033[1m' \
     + 'From the aggregation query results, there are {:,} establishments with a hygiene score of 0 '\
        .format(establishments_count_integer)
     + 'in {:,} Local Authorities.' \
        .format(authority_count_integer) \
     + '\033[0m')

[1mFrom the aggregation query results, there are 16,827 establishments with a hygiene score of 0 in 55 Local Authorities.[0m


### **Number of Authorities and Establishments from `count_documents`**

In [39]:
authority_count_integer \
    = len(establishments_pymongo_collection.distinct('LocalAuthorityName', {'scores.Hygiene': 0}))

establishments_count_integer \
    = establishments_pymongo_collection.count_documents({'scores.Hygiene': 0})

logx.print_and_log_text \
    ('\033[1m' \
     + "From the 'count_documents' method, there are {:,} " \
        .format(establishments_count_integer)
     + 'establishments with a hygiene score of 0 in {:,} Local Authorities.' \
        .format(authority_count_integer) \
     + '\033[0m')

[1mFrom the 'count_documents' method, there are 16,827 establishments with a hygiene score of 0 in 55 Local Authorities.[0m


### **Display Documents**

In [40]:
pprint(aggregate_query_results_dictionary_list[0:10])

[{'_id': 'Thanet', 'count': 1130},
 {'_id': 'Greenwich', 'count': 882},
 {'_id': 'Maidstone', 'count': 713},
 {'_id': 'Newham', 'count': 711},
 {'_id': 'Swale', 'count': 686},
 {'_id': 'Chelmsford', 'count': 680},
 {'_id': 'Medway', 'count': 672},
 {'_id': 'Bexley', 'count': 607},
 {'_id': 'Southend-On-Sea', 'count': 586},
 {'_id': 'Tendring', 'count': 542}]


### **Highest Hygiene Establishments Data Set**

In [41]:
highest_hygiene_score_dataframe = pd.DataFrame(aggregate_query_results_dictionary_list)

highest_hygiene_score_dataframe.rename \
    (columns = {'_id': 'LocalAuthorityName', 'count': 'Count'}, inplace = True)

logx.log_write_object(highest_hygiene_score_dataframe)

### **Number of Rows in Highest Hygiene Establishments Data Set**

In [42]:
logx.print_and_log_text \
    ('\033[1m' \
     + 'The number of rows in the Highest Hygiene DataFrame is {:,}.' \
        .format(len(highest_hygiene_score_dataframe)) \
     + '\033[0m')

[1mThe number of rows in the Highest Hygiene DataFrame is 55.[0m


### **Display Highest Hygiene Establishments**

In [43]:
pandasx.return_formatted_table \
    (highest_hygiene_score_dataframe, 
     'Table 2.4: Local Authorities with Number of Establishments with Highest Hygiene Score')

LocalAuthorityName,Count
Thanet,1130
Greenwich,882
Maidstone,713
Newham,711
Swale,686
Chelmsford,680
Medway,672
Bexley,607
Southend-On-Sea,586
Tendring,542


In [44]:
# logx.end_program()