In [63]:
import json

In [79]:
def read_restaurant_data(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as json_file:
            data = json.load(json_file)
            return data
    except FileNotFoundError:
        print("File not found. Please provide the correct file path.")
        return []
    except json.JSONDecodeError:
        print("Error decoding JSON data. Ensure that the file is in valid JSON format.")
        return []

# Example usage:
file_path = r'C:\Users\marno\Wiley Edge\Code\Week 4\restaurant.json'  # Provide the correct file path
restaurant_data = read_restaurant_data(file_path)


In [83]:
import json
import datetime

# Define the function to convert timestamps within a single restaurant's grades
def convert_timestamp(restaurant):
    if 'grades' in restaurant:
        for grade in restaurant['grades']:
            if 'date' in grade:
                try:
                    timestamp = grade['date']['$date'] / 1000  # Convert to seconds
                    formatted_date = datetime.datetime.fromtimestamp(timestamp).strftime('%Y/%m/%d')
                    grade['date'] = formatted_date
                except (ValueError, KeyError):
                    grade['date'] = 'Invalid Date'
    return restaurant

# Load your JSON data from a file
with open('restaurant.json', 'r') as file:
    restaurant_data = json.load(file)

# Use map and lambda to apply the function to all instances in the JSON data
updated_data = list(map(convert_timestamp, restaurant_data))

# Print the updated data
for restaurant in updated_data:
    print(json.dumps(restaurant))




{"address": {"building": "87-69", "coord": [-73.8309503, 40.7001121], "street": "Lefferts Boulevard", "zipcode": "11418"}, "borough": "Queens", "cuisine": "American", "grades": [{"date": "2014/02/25", "grade": "A", "score": 7}, {"date": "2013/08/14", "grade": "A", "score": 11}, {"date": "2012/08/07", "grade": "A", "score": 7}, {"date": "2012/03/26", "grade": "A", "score": 10}, {"date": "2011/11/04", "grade": "A", "score": 0}, {"date": "2011/06/29", "grade": "A", "score": 4}], "name": "Snack Time Grill", "restaurant_id": "40363590"}
{"address": {"building": "1418", "coord": [-73.95685019999999, 40.7753401], "street": "Third Avenue", "zipcode": "10028"}, "borough": "Manhattan", "cuisine": "Continental", "grades": [{"date": "2014/06/02", "grade": "A", "score": 9}, {"date": "2013/12/27", "grade": "A", "score": 8}, {"date": "2013/03/18", "grade": "B", "score": 26}, {"date": "2012/02/01", "grade": "A", "score": 7}, {"date": "2011/07/06", "grade": "B", "score": 25}], "name": "Lorenzo & Maria'

In [84]:
# Filter restaurants with at least 5 reviews
def has_at_least_5_reviews(restaurant):
    if 'grades' in restaurant:
        return len(restaurant['grades']) >= 5
    return False

# Use the filter function to get restaurants with at least 5 reviews
restaurants_with_5_or_more_reviews = list(filter(has_at_least_5_reviews, updated_data))

# Print the filtered restaurants
for restaurant in restaurants_with_5_or_more_reviews:
    print(json.dumps(restaurant))


{"address": {"building": "87-69", "coord": [-73.8309503, 40.7001121], "street": "Lefferts Boulevard", "zipcode": "11418"}, "borough": "Queens", "cuisine": "American", "grades": [{"date": "2014/02/25", "grade": "A", "score": 7}, {"date": "2013/08/14", "grade": "A", "score": 11}, {"date": "2012/08/07", "grade": "A", "score": 7}, {"date": "2012/03/26", "grade": "A", "score": 10}, {"date": "2011/11/04", "grade": "A", "score": 0}, {"date": "2011/06/29", "grade": "A", "score": 4}], "name": "Snack Time Grill", "restaurant_id": "40363590"}
{"address": {"building": "1418", "coord": [-73.95685019999999, 40.7753401], "street": "Third Avenue", "zipcode": "10028"}, "borough": "Manhattan", "cuisine": "Continental", "grades": [{"date": "2014/06/02", "grade": "A", "score": 9}, {"date": "2013/12/27", "grade": "A", "score": 8}, {"date": "2013/03/18", "grade": "B", "score": 26}, {"date": "2012/02/01", "grade": "A", "score": 7}, {"date": "2011/07/06", "grade": "B", "score": 25}], "name": "Lorenzo & Maria'

In [148]:
from collections import defaultdict
import json

# Load your JSON data (updated_data) here

# Step 1: Find the MOST_RECENT_YEAR
most_recent_year = max(
    int(grade['date'].split(' ')[-1])  # Extract the year from the date string
    if grade['date'].count('/') == 0
    else int(grade['date'].split('/')[0])  # Handle 'yyyy/mm/dd' format
    for restaurant in updated_data
    for grade in restaurant.get('grades', [])
)


# Step 2: Use map and lambda to find the year of each review
def find_review_years(restaurant):
    if 'grades' in restaurant:
        return list(map(
            lambda grade: int(grade['date'].split(' ')[-1])  # Extract the year from the date string
            if grade['date'].count('/') == 0
            else int(grade['date'].split('/')[0]),  # Handle 'yyyy/mm/dd' format
            restaurant['grades']
        ))
    return []

years_per_restaurant = list(map(find_review_years, updated_data))

# Step 3: Group the reviews by year and count the reviews per year for each restaurant
reviews_per_year = defaultdict(int)
for years in years_per_restaurant:
    for year in years:
        reviews_per_year[year] += 1

print("reviews_per_year:", reviews_per_year)

# print number of reviews from each year for each restaurant
for year, review_count in reviews_per_year.items():
    print(year, review_count)



# Step 4: Filter restaurants with more than five reviews in the most recent year using lambda and filter functions
filtered_restaurants = list(filter(
    lambda restaurant: reviews_per_year[most_recent_year] >= 5,
    updated_data
))


print("MOST_RECENT_YEAR:", most_recent_year)
print("filtered_restaurants:", filtered_restaurants)
print("Total restaurants in filtered_data:", len(filtered_restaurants))

# Print the filtered restaurants
for restaurant in filtered_restaurants:
    print(json.dumps(restaurant))


reviews_per_year: defaultdict(<class 'int'>, {2014: 65, 2013: 73, 2012: 65, 2011: 38, 2015: 3})
2014 65
2013 73
2012 65
2011 38
2015 3
MOST_RECENT_YEAR: 2015
filtered_restaurants: []
Total restaurants in filtered_data: 0


In [141]:
from functools import reduce

# Assuming you already have 'reviews_per_year' as a dictionary of year -> review count

# Step 5: Use the reduce function to compute the average number of reviews per restaurant
review_counts = reviews_per_year.values()
total_reviews = reduce(lambda x, y: x + y, review_counts, 0)  # Calculate the total number of reviews
total_restaurants = len(updated_data)  # Calculate the total number of restaurants
average_reviews_per_restaurant = total_reviews / total_restaurants  # Calculate the average

print("Average number of reviews per restaurant:", average_reviews_per_restaurant)


6


In [157]:
from functools import reduce

# Calculate the average number of reviews per restaurant
average_reviews_per_restaurant = sum(reviews_per_year.values()) / len(reviews_per_year)

# Use the filter function to identify restaurants with more reviews than the average
filtered_restaurants_more_reviews = list(filter(
    lambda restaurant: (
        'grades' in restaurant
        and reviews_per_year.get(most_recent_year, 0) > average_reviews_per_restaurant
    ),
    updated_data
))

# Print the filtered restaurants
for restaurant in filtered_restaurants_more_reviews:
    print(json.dumps(restaurant, indent=2))

print("Total restaurants with more reviews than the average:", len(filtered_restaurants_more_reviews))


Total restaurants with more reviews than the average: 0
