In [32]:
"""A Yelp-powered Restaurant Recommendation Program"""

from abstractions import *
from data import ALL_RESTAURANTS, CATEGORIES, USER_FILES, load_user_file
from ucb import main, trace, interact
from utils import distance, mean, zip, enumerate, sample
from visualize import draw_map

##################################
# Phase 2: Unsupervised Learning #
##################################


def find_closest(location, centroids):
    """Return the centroid in centroids that is closest to location.
    If multiple centroids are equally close, return the first one.

    >>> find_closest([3.0, 4.0], [[0.0, 0.0], [2.0, 3.0], [4.0, 3.0], [5.0, 5.0]])
    [2.0, 3.0]
    """
    # BEGIN Question 3
    return min([pos for pos in centroids],key=lambda pos: distance(location,pos))
    # END Question 3


def group_by_first(pairs):
    """Return a list of pairs that relates each unique key in the [key, value]
    pairs to a list of all values that appear paired with that key.

    Arguments:
    pairs -- a sequence of pairs

    >>> example = [ [1, 2], [3, 2], [2, 4], [1, 3], [3, 1], [1, 2] ]
    >>> group_by_first(example)
    [[2, 3, 2], [2, 1], [4]]
    """
    keys = []
    for key, _ in pairs:
        if key not in keys:
            keys.append(key)
    return [[y for x, y in pairs if x == key] for key in keys]


def group_by_centroid(restaurants, centroids):
    """Return a list of clusters, where each cluster contains all restaurants
    nearest to a corresponding centroid in centroids. Each item in
    restaurants should appear once in the result, along with the other
    restaurants closest to the same centroid.
    """
    # BEGIN Question 4
    ls=[]
    for res in restaurants:
        res_center=min([c for c in centroids],key=lambda c: distance(c,restaurant_location(res)))
        ls.append([res_center,res])
    return group_by_first(ls)
    # END Question 4


def find_centroid(cluster):
    """Return the centroid of the locations of the restaurants in cluster."""
    # BEGIN Question 5
    x=mean([restaurant_location(res)[0] for res in cluster])
    y=mean([restaurant_location(res)[1] for res in cluster])
    return [x,y]
    # END Question 5


def k_means(restaurants, k, max_updates=100):
    """Use k-means to group restaurants by location into k clusters."""
    assert len(restaurants) >= k, 'Not enough restaurants to cluster'
    old_centroids, n = [], 0
    # Select initial centroids randomly by choosing k different restaurants
    centroids = [restaurant_location(r) for r in sample(restaurants, k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        # BEGIN Question 6
        newclusters = group_by_centroid(restaurants, old_centroids)
        centroids=[]
        for group in newclusters:
            center = find_centroid([res for res in group])
            centroids.append(center)
        # END Question 6
        n += 1
    return centroids

In [34]:
k_means(restaurants1,2)

[[-0.5, -4.0], [1.0, -1.0]]

In [13]:
k=2

In [19]:
old_centroids, n = [], 0
    # Select initial centroids randomly by choosing k different restaurants
centroids = [restaurant_location(r) for r in sample(restaurants1, k)]
old_centroids = centroids
newclusters = group_by_centroid(restaurants1, old_centroids)

In [18]:
restaurants1 = [
    make_restaurant('A', [-3, -4], [], 3, [make_review('A', 2)]),
    make_restaurant('B', [1, -1],  [], 1, [make_review('B', 1)]),
    make_restaurant('C', [2, -4],  [], 1, [make_review('C', 5)])]

#k_means(restaurants1,k)

In [27]:
newclusters

[[['A', [-3, -4], [], 3, [['A', 2]]]],
 [['B', [1, -1], [], 1, [['B', 1]]], ['C', [2, -4], [], 1, [['C', 5]]]]]

In [31]:
[restaurant_location(res) for res in newclusters[1]]

[[1, -1], [2, -4]]

In [35]:
def k_means(restaurants, k, max_updates=100):
    """Use k-means to group restaurants by location into k clusters."""
    assert len(restaurants) >= k, 'Not enough restaurants to cluster'
    old_centroids, n = [], 0
    # Select initial centroids randomly by choosing k different restaurants
    centroids = [restaurant_location(r) for r in sample(restaurants, k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        # BEGIN Question 6
        newclusters = group_by_centroid(restaurants, old_centroids)
        centroids=[]
        for group in newclusters:
            center = find_centroid([res for res in group])
            centroids.append(center)
        # END Question 6
        n += 1
    return centroids



################################
# Phase 3: Supervised Learning #
################################


def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    #a dictionary having (user-reviewed) restaurant names and ratings respectively
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]  #all restaurant features(sequence of numbers)
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants] #user-reviewed restaurant scores

    # BEGIN Question 7
    xbar=mean(xs)
    ybar=mean(ys)
    xmid=[x-xbar for x in xs]
    ymid=[y-ybar for y in ys]
    Sxx=sum([x**2 for x in xmid])
    Syy=sum([y**2 for y in ymid])
    Sxy=sum([x*y for x,y in zip(xmid,ymid)])
    b=Sxy/Sxx
    a=ybar-b*xbar
    r_squared=Sxy**2/(Sxx*Syy)
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared


def best_predictor(user, restaurants, feature_fns):
    """Find the feature within feature_fns that gives the highest R^2 value
    for predicting ratings by the user; return a predictor using that feature.

    Arguments:
    user -- A user
    restaurants -- A list of restaurants
    feature_fns -- A sequence of functions that each takes a restaurant
    """
    reviewed = user_reviewed_restaurants(user, restaurants)
    # BEGIN Question 8
    return max([find_predictor(user, reviewed, feature) for feature in feature_fns],key=lambda x:x[1])[0]
    # END Question 8


def rate_all(user, restaurants, feature_fns):
    """Return the predicted ratings of restaurants by user using the best
    predictor based on a function from feature_fns.

    Arguments:
    user -- A user
    restaurants -- A list of restaurants
    feature_fns -- A sequence of feature functions
    """
    predictor = best_predictor(user, ALL_RESTAURANTS, feature_fns)
    reviewed = user_reviewed_restaurants(user, restaurants)
    # BEGIN Question 9
    ratings=[user_rating(user, restaurant_name(x)) for x in reviewed]
    final={}
    i=0
    for res in restaurants:
        if res in reviewed:
            final[restaurant_name(res)]=ratings[i]
            i=i+1
        else:
            final[restaurant_name(res)]=predictor(res)
    return final
    # END Question 9

In [45]:
user = make_user('Mr. Mean Rating Minus One', [
make_review('A', 3),
make_review('B', 4),
make_review('C', 1),
])
cluster = [
make_restaurant('A', [1, 2], [], 4, [
make_review('A', 4),
make_review('A', 4)
]),
make_restaurant('B', [4, 2], [], 3, [
make_review('B', 5)
]),
make_restaurant('C', [-2, 6], [], 4, [
make_review('C', 2)
]),
make_restaurant('D', [4, 4], [], 3.5, [
make_review('D', 2.5),
make_review('D', 3.5),
]),
]
restaurants = {restaurant_name(r): r for r in cluster}
ALL_RESTAURANTS = cluster
to_rate = cluster[2:]
fns = [restaurant_price, lambda r: mean(restaurant_ratings(r))]
#ratings = rate_all(user, to_rate, fns)

In [69]:
predictor = best_predictor(user, ALL_RESTAURANTS, fns)
reviewed = user_reviewed_restaurants(user, to_rate)

In [72]:
predictor(cluster[0])

3.0

In [70]:
reviewed

[['C', [-2, 6], [], 4, [['C', 2]]]]

In [73]:
user

['Mr. Mean Rating Minus One', {'A': ['A', 3], 'B': ['B', 4], 'C': ['C', 1]}]

In [75]:
ratings=user_rating(user, 'A')

In [77]:
ratings

3

In [63]:
reviewed

['A', 'B', 'C']

In [66]:
reviewed_by_user['A']

['A', 3]

In [67]:
review_rating(reviewed_by_user['A'])

3

In [68]:
[user_rating(user, name) for name in reviewed]

[3, 4, 1]

In [78]:
[user_rating(user, restaurant_name(x)) for x in reviewed]

[1]

In [61]:
reviewed_by_user = user_reviews(user)
user_review = reviewed_by_user[reviewed]

TypeError: unhashable type: 'list'

In [None]:
>>> user = make_user('Mr. Mean Rating Minus One', [
          ...     make_review('A', 3),
          ...     make_review('B', 4),
          ...     make_review('C', 1),
          ... ])
          >>> cluster = [
          ...     make_restaurant('A', [1, 2], [], 4, [
          ...         make_review('A', 4),
          ...         make_review('A', 4)
          ...     ]),
          ...     make_restaurant('B', [4, 2], [], 3, [
          ...         make_review('B', 5)
          ...     ]),
          ...     make_restaurant('C', [-2, 6], [], 4, [
          ...         make_review('C', 2)
          ...     ]),
          ...     make_restaurant('D', [4, 4], [], 3.5, [
          ...         make_review('D', 2.5),
          ...         make_review('D', 3.5),
          ...     ]),
          ... ]
          >>> restaurants = {restaurant_name(r): r for r in cluster}
          >>> recommend.ALL_RESTAURANTS = cluster
          >>> to_rate = cluster[2:]
          >>> fns = [restaurant_price, lambda r: mean(restaurant_ratings(r))]
          >>> ratings = rate_all(user, to_rate, fns)
          >>> type(ratings)
          <class 'dict'>
          >>> len(ratings) # Only the restaurants passed to rate_all
          2
          >>> ratings['C'] # A restaurant rated by the user (should be an integer)
          1
          >>> round(ratings['D'], 5) # A predicted rating (should be a decimal)
          2.0
          """,

In [7]:
################################
# Phase 3: Supervised Learning #
################################


def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for a user by performing least-squares linear regression using feature_fn
    on the items in restaurants. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    # BEGIN Question 7
    b, a, r_squared = 0, 0, 0  # REPLACE THIS LINE WITH YOUR SOLUTION
    # END Question 7

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared


def best_predictor(user, restaurants, feature_fns):
    """Find the feature within feature_fns that gives the highest R^2 value
    for predicting ratings by the user; return a predictor using that feature.

    Arguments:
    user -- A user
    restaurants -- A list of restaurants
    feature_fns -- A sequence of functions that each takes a restaurant
    """
    reviewed = user_reviewed_restaurants(user, restaurants)
    # BEGIN Question 8
    "*** YOUR CODE HERE ***"
    # END Question 8


def rate_all(user, restaurants, feature_fns):
    """Return the predicted ratings of restaurants by user using the best
    predictor based on a function from feature_fns.

    Arguments:
    user -- A user
    restaurants -- A list of restaurants
    feature_fns -- A sequence of feature functions
    """
    predictor = best_predictor(user, ALL_RESTAURANTS, feature_fns)
    reviewed = user_reviewed_restaurants(user, restaurants)
    # BEGIN Question 9
    "*** YOUR CODE HERE ***"
    # END Question 9


def search(query, restaurants):
    """Return each restaurant in restaurants that has query as a category.

    Arguments:
    query -- A string
    restaurants -- A sequence of restaurants
    """
    # BEGIN Question 10
    "*** YOUR CODE HERE ***"
    # END Question 10


def feature_set():
    """Return a sequence of feature functions."""
    return [lambda r: mean(restaurant_ratings(r)),
            restaurant_price,
            lambda r: len(restaurant_ratings(r)),
            lambda r: restaurant_location(r)[0],
            lambda r: restaurant_location(r)[1]]


@main
def main(*args):
    import argparse
    parser = argparse.ArgumentParser(
        description='Run Recommendations',
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument('-u', '--user', type=str, choices=USER_FILES,
                        default='test_user',
                        metavar='USER',
                        help='user file, e.g.\n' +
                        '{{{}}}'.format(','.join(sample(USER_FILES, 3))))
    parser.add_argument('-k', '--k', type=int, help='for k-means')
    parser.add_argument('-q', '--query', choices=CATEGORIES,
                        metavar='QUERY',
                        help='search for restaurants by category e.g.\n'
                        '{{{}}}'.format(','.join(sample(CATEGORIES, 3))))
    parser.add_argument('-p', '--predict', action='store_true',
                        help='predict ratings for all restaurants')
    parser.add_argument('-r', '--restaurants', action='store_true',
                        help='outputs a list of restaurant names')
    args = parser.parse_args()

    # Output a list of restaurant names
    if args.restaurants:
        print('Restaurant names:')
        for restaurant in sorted(ALL_RESTAURANTS, key=restaurant_name):
            print(repr(restaurant_name(restaurant)))
        exit(0)

    # Select restaurants using a category query
    if args.query:
        restaurants = search(args.query, ALL_RESTAURANTS)
    else:
        restaurants = ALL_RESTAURANTS

    # Load a user
    assert args.user, 'A --user is required to draw a map'
    user = load_user_file('{}.dat'.format(args.user))

    # Collect ratings
    if args.predict:
        ratings = rate_all(user, restaurants, feature_set())
    else:
        restaurants = user_reviewed_restaurants(user, restaurants)
        names = [restaurant_name(r) for r in restaurants]
        ratings = {name: user_rating(user, name) for name in names}

    # Draw the visualization
    if args.k:
        centroids = k_means(restaurants, min(args.k, len(restaurants)))
    else:
        centroids = [restaurant_location(r) for r in restaurants]
    draw_map(centroids, restaurants, ratings)