In [94]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import sparse
from scipy.sparse import csr_matrix
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
from IPython.core.interactiveshell import InteractiveShell
from typing import List
import time

InteractiveShell.ast_node_interactivity = 'all'

In [111]:
class DataLoader:
    def __init__(self, userData:Path , restaurantData: Path) -> None:
        self.userData = userData
        self.restaurantData = restaurantData

        #for user data 
        df = pd.read_csv(self.userData, sep=',')

        self.userDataDF = df

        #for restaurant data
        df = pd.read_csv(self.restaurantData, sep= ',')
        self.restaurantDF = df

    def load_user_data(self) -> csr_matrix:
        df = self.userDataDF

        df = df.set_index(['userID', 'restaurantID'])
        coo = sparse.coo_matrix(
            (
                df["rating"].astype(float),
                ( 
                    df.index.get_level_values(0), 
                   	df.index.get_level_values(1)
                ),
            )
        )
        coo = bm25_weight(coo, K1=1000, B=0.75) 
        return coo.tocsr()

    def get_restaurant(self, rest_id: int) -> str:
        return self.restaurantDF.iloc[rest_id]['name']

    def load_restaurant_data(self) -> None:
        df = pd.read_csv(self.restaurantData, sep=',')
        df = df.set_index('id')
        
        df['score'] = df['score'].fillna(np.random.uniform(0, 10))
        df['ratings'] = df['ratings'].fillna(np.random.randint(1, 100))
        df['price_range'] = df['price_range'].fillna(np.random.randint(1,2) * "$")

        df['zip_code'] = df['zip_code'].str.split('-', n=1, expand=True)[0]
        # df['zip_code']
        self.restaurantDF = df
        return 
    
    def get_user_data(self, userID: int) -> pd.DataFrame:
        df = self.userDataDF

        return df.loc[df['userID'] == userID]



In [96]:
class Model:
    def __init__(self, user_items: csr_matrix, restaurant_data: pd.DataFrame) -> None:
      self.model= AlternatingLeastSquares(factors = 50, iterations=15, regularization=0.01)
      self.user_items = user_items
      self.recommend_data = restaurant_data
      
    
    def fitModel(self) -> None:
      start = time.time()
      self.model.fit(self.user_items)

      print(f"Finished training the model at {time.time() - start}")
    
    def recommend_items(self, userID: int, n: int = 10):
      recommendations, scores = self.model.recommend(userID, self.user_items[n], N=n, recalculate_user=True)

      return recommendations, scores
    
    def similar_items(self, restaurant_id): 
      id, scores = self.model.similar_items(restaurant_id)

      return id, scores

In [112]:
loader = DataLoader(Path('user-data.csv'), Path('restaurants.csv'))
user_data = loader.load_user_data()
resturant_data = loader.load_restaurant_data()


In [114]:
#REGULAR 
model = Model(user_data, resturant_data)
model.fitModel()

  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [00:12<00:00,  1.22it/s]

Finished training the model at 12.274335622787476





In [120]:
restaurants, scores = model.recommend_items(10, 50) 
results = [loader.get_restaurant(rest_id) for rest_id in restaurants]

df = loader.userDataDF
results, scores

df[df['userID'] == 10]['restaurantID']

(['Popeyes (1436 Beltline Rd)',
  'Cooking With Flavour',
  "MJ's Gaming Cafe",
  'Sonic (3260 North Goliad Street)',
  "Hui's Cantonese &amp; American Restaurant (Milwaukee)",
  'Pressed - Southlake Market Place',
  'Safeway Express (27035 Pacific Hwy S)',
  'Lemonshark Poke',
  'Biryani Hub',
  'Bunk Sandwiches (Water Ave)',
  'Subway (400 Santa Fe Dr Ste B)',
  'Stir Fry Chef (Addison)',
  'Tataki sushi bar',
  'Cancun Mexican restaurant',
  'Angry Chicken',
  'Rite Aid (1850 SOUTH MILDRED STREET)',
  'Sonic (2564 Ironwood Drive)',
  "Carl's Jr. (9330 Dyer St)",
  'Cupbop (Draper)',
  'Wawa 8617 (10060 Jefferson Davis Highway)',
  'Karma Burger (3524 12th St)',
  'Soul Bird Chkn Shack',
  'Safeway Express (2725 Ne Sunset Blvd)',
  'Pizza express',
  'Vivalapizzadelivery21 (5610 N Interstate Hwy 35)',
  'Renew Coffee N Bakery',
  'Capital Crab Company',
  "McDonald's® (701 WILLIAMSON RD)",
  'Enliten Bakery &amp; Cafe',
  'Twelve',
  'Wild Burger (1180 W Sunset Dr.)',
  'Lawaiian’s',

922     49233
923      7887
924     21219
925     44793
926     23063
        ...  
1008    63391
1009    56624
1010    57427
1011    32320
1012    39038
Name: restaurantID, Length: 91, dtype: int64

In [100]:
artists, users , plays = get_lastfm()
np.arange(len(users))
# plays.tocsr()
# user_plays = plays.T.tocsr()

# model2 = AlternatingLeastSquares(factors=128)
# model2.fit(user_plays)
# to_generate = np.arange(len(users))



array([     0,      1,      2, ..., 358865, 358866, 358867])

In [101]:
# batch = to_generate[0: 0 + 1000]
# ids, scores = model2.recommend(batch, user_plays[batch], filter_already_liked_items=True)

# ids, scores