In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import sparse
from scipy.sparse import csr_matrix
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight, BM25Recommender
from implicit.als import AlternatingLeastSquares
from IPython.core.interactiveshell import InteractiveShell
import time

InteractiveShell.ast_node_interactivity = 'all'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class DataLoader:
    def __init__(self, userData:Path , restaurantData: Path) -> None:
        self.userData = userData
        self.restaurantData = restaurantData
        self.restaurantDF = None

    def load_user_data(self) -> csr_matrix:
        df = pd.read_csv(self.userData, sep=",")

        df.set_index(['userID', 'restaurantID'], inplace=True)
        coo = sparse.coo_matrix(
            (
                df["rating"].astype(float),
                (df.index.get_level_values(0), df.index.get_level_values(1)),
            )
        )
        coo = bm25_weight(coo, K1=100, B=0.8)

        return coo.tocsr()

    def get_restaurant(self, rest_id: int) -> str:
        return self.restaurantData.iloc[rest_id]['name']

    def load_restaurant_data(self) -> None:
        df = pd.read_csv(self.restaurantData, sep=',')
        df = df.set_index('id')
        
        df['score'] = df['score'].fillna(np.random.uniform(0, 10))
        df['ratings'] = df['ratings'].fillna(np.random.randint(0, 100))
        df['price_range'] = df['price_range'].fillna(np.random.randint(1,2) * "$")

        df['zip_code'] = df['zip_code'].str.split('-', n=1, expand=True)[0]
        # df['zip_code']
        self.restaurantData = df
        return 



In [3]:
class Model:
    def __init__(self, user_items: csr_matrix):
      self.model= AlternatingLeastSquares(factors=128, iterations=10, regularization=0.01)
      self.user_items = user_items
      
    
    def fitModel(self):
      start = time.time()
      self.model.fit(self.user_items)

      print(f"Finished training the model at {time.time() - start}")
    
    def recommend_items(self, userID: int, n: int = 10):
      recommendations, scores = self.model.recommend(userID, self.user_items[n], N=n, recalculate_user=True)

      return recommendations, scores
    
    def similar_items(self, restaurant_id): 
      id, scores = self.model.similar_items(restaurant_id)

      return id, scores

In [4]:
loader = DataLoader(Path('user-data.csv'), Path('restaurants.csv'))
csr = loader.load_user_data()
loader.load_restaurant_data()

model = Model(csr)
model.fitModel()


  check_blas_config()
100%|██████████| 10/10 [00:15<00:00,  1.53s/it]

Finished training the model at 15.396453857421875





In [5]:
restaurants, scores = model.recommend_items(2140, 50) 
results = [loader.get_restaurant(rest_id) for rest_id in restaurants]

results, scores 

# restauraunts, scores = model.similar_items(2004)
# results = [loader.get_restaurant(rest_id) for rest_id in restaurants]

# restaurants , scores

(['Panda Korea Restaurant',
  'The Very Good Butchers (5832 Fairdale Ln)',
  'Dairy Queen (245 E Hampton Ave)',
  'Papa Johns (740 Volunteer Pkwy)',
  'Palette Indian Kitchen',
  'Ember Smoked BBQ (2809 W. Expwy. 83)',
  'Thai Go',
  'Crumbl Cookies (TX - Magnolia)',
  'Kolache Factory - 006',
  'Shipley Donuts (1900 W University Dr #8)',
  'Mr. Seafood',
  'TCBY (4775 W Panther Creek Dr)',
  'Duck Donuts (South Jordan)',
  'KFC (1208 50th Street)',
  'Potbelly Sandwich Shop (10921 W Broad St | 508)',
  'Safeway Express (15000 Ne 24Th St)',
  "Saccone's Pizza and Subs",
  'Outback Steakhouse (225 Fort Evans Rd NE)',
  "Ben &amp; Jerry's (Bellevue)",
  'Another Wing by DJ Khaled (POR30-1)',
  'Safeway Express (15805 Pacific Ave S)',
  'Red Robin Gourmet Burgers (11200 Broadway St #1500)',
  'Baskin-Robbins (800 N Loop 336 W)',
  'Chick-fil-A  (387 E FM 1382)',
  'Sabor Latino 2021 Inc',
  'Sake Hato Tejas',
  "It's Just Wings (1940 N Central Expy)",
  'DiveOn75 Hookah Lounge &amp; Bar',