In [1]:
import numpy as np
import pandas as pd

# **Recommendation System**
---


The `recommendations_system` function is designed to provide product recommendations based on specific customer clusters (`RFM_segment`) and states (`customer_state`). It operates on a given DataFrame (`data`) containing purchase and review information. The function allows for customization of various parameters such as the number of recommendations, minimum and maximum review ratings, and the specific clusters and states of interest.

## Parameters:

- `data`: *DataFrame* (default: `df`)
  - Input data containing purchase and review information.
- `clusters`: *list/str* (default: `df['RFM_segment'].unique()`)
  - Target customer clusters for which recommendations are generated.
- `states`: *list/str* (default: `df['customer_state'].unique()`)
  - Target customer states for which recommendations are generated.
- `recommendations`: *int* (default: `10`)
  - Number of recommendations to provide.
- `min_rating`: *float* (default: `0`)
  - Minimum review rating for inclusion in recommendations.
- `max_rating`: *float* (default: `5`)
  - Maximum review rating for inclusion in recommendations.




In [2]:
df_cluster = pd.read_csv('data/clustered_data.csv')
df_cluster.drop(columns='Unnamed: 0', inplace=True)
df = pd.read_csv('data/olist_cleaned_dataset.csv')
geo = pd.read_csv('data/olist_geolocation_cleaned_dataset.csv')
df = pd.merge(df, df_cluster, how='left', on='customer_unique_id')

In [18]:
def recommendations_system(data=df, clusters=df['RFM_segment'].unique(), states=df['customer_state'].unique(), recommendations=10, min_rating=0, max_rating=5):

    if isinstance(clusters, str):
        clusters = [clusters] 

    if isinstance(states, str):
        states = [states] 
    
    conditions = ((data['RFM_segment'].isin(clusters)) & (data['customer_state'].isin(states)))
    filtered_df = data[conditions]
    result_df = (
        filtered_df.groupby(['product_category_name']).size().reset_index(name='purchase_count').sort_values(by='purchase_count', ascending=False).reset_index(drop=True))
    
    mean_review_df = (
        filtered_df.groupby(by='product_category_name')['review_score'].mean().reset_index(name='average_review'))

    result_df = pd.merge(result_df, mean_review_df, on='product_category_name', how='left')
    return result_df[(result_df['average_review']>min_rating) & (result_df['average_review']<max_rating)][['product_category_name', 'purchase_count', 'average_review']].head(recommendations)

recommendations_system()

Unnamed: 0,product_category_name,purchase_count,average_review
0,Bed Bath Table,11988,3.903654
1,Health Beauty,10032,4.144338
2,Sports Leisure,9004,4.113616
3,Furniture Decor,8832,3.92312
4,Computers Accessories,8150,3.941963
5,Housewares,7380,4.066667
6,Watches Gifts,6213,4.025913
7,Telephony,4726,3.956411
8,Garden Tools,4590,4.030719
9,Auto,4400,4.073636


In [13]:
# Usage example
recommendations_system(data=df, clusters=['Promising'], states=['SP'], recommendations=5, min_rating=3.5, max_rating=5.0)

Unnamed: 0,product_category_name,purchase_count,average_review
0,Bed Bath Table,2172,3.966851
1,Health Beauty,1660,4.240361
2,Furniture Decor,1445,4.047751
3,Sports Leisure,1375,4.175273
4,Computers Accessories,1290,3.933333


In [15]:
recommendations_system(df, min_rating=0, max_rating=4)

Unnamed: 0,product_category_name,purchase_count,average_review
0,Bed Bath Table,11988,3.903654
3,Furniture Decor,8832,3.92312
4,Computers Accessories,8150,3.941963
7,Telephony,4726,3.956411
18,Office Furniture,1788,3.53915
19,Unknown,1709,3.852545
26,Home Construction,635,3.962205
28,Furniture Living Room,532,3.934211
30,Home Comfort,509,3.903733
32,Audio,381,3.853018
