# Env setup

In [2]:
import datetime as dt
from datetime import date
import random

import pandas as pd

# Requirements

1. Implicit or explicit ratings.
    - Rating created date required for applying rating time decay.

In [15]:
today = date.today()
ratings = [{
    "user_id": random.randint(1, 100), 
    "item_id": random.randint(1, 100), 
    "rating": random.random() * 10, 
    "created_date": today - dt.timedelta(days=random.randint(0, 100))
} for _ in range(1000)]

ratings_df = pd.DataFrame(ratings)
ratings_df["days_since"] = (today - ratings_df["created_date"]).apply(lambda delta: delta.days)
ratings_df = ratings_df.loc[ratings_df[["user_id", "item_id"]].drop_duplicates().index]

# Static non-personalised recommenders

## Top 10

### All-time

In [21]:
ratings_df.groupby("item_id")["rating"].mean().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,item_id,rating
0,90,7.571043
1,45,7.50101
2,27,7.312686
3,26,7.279596
4,25,6.942081
5,76,6.790045
6,67,6.750636
7,41,6.731403
8,68,6.677052
9,88,6.604283


### Time-weighted

Decay functions can take multiple forms (linear, exponential, etc.).

In [23]:
ratings_df["weighted_rating"] = ratings_df["rating"] / (1 + ratings_df["days_since"])
ratings_df.groupby("item_id")["weighted_rating"].mean().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,item_id,weighted_rating
0,38,1.924935
1,90,1.225905
2,76,1.215119
3,69,0.85981
4,24,0.856505
5,48,0.766219
6,14,0.725356
7,58,0.724769
8,98,0.722965
9,37,0.685236


## Frequently bought together (FBT) recommendation 

Create using the [online retail dataset](https://archive.ics.uci.edu/dataset/352/online+retail) from the UCI ML repository. Should be accessible as `data/online_retail.xlsx`.


In [29]:
retail_df = pd.read_excel("../data/online_retail.xlsx")

In [30]:
retail_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    541909 non-null  object        
 1   StockCode    541909 non-null  object        
 2   Description  540455 non-null  object        
 3   Quantity     541909 non-null  int64         
 4   InvoiceDate  541909 non-null  datetime64[ns]
 5   UnitPrice    541909 non-null  float64       
 6   CustomerID   406829 non-null  float64       
 7   Country      541909 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 33.1+ MB
