# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb)

You can view an article that explains concepts in this notebook: [![View Article](https://img.shields.io/badge/View%20Article-blue)](https://mdb.link/rrf_rsf_demo)

# Setup

In [None]:
!pip install pymongo

import pandas as pd
import pymongo
from google.colab import userdata

Collecting pymongo
  Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.8.0-py3-none-any.whl (331 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.1/331.1 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.8.0 pymongo-4.15.4


In [None]:
def get_mongo_client(mongo_uri):
    """Establish connection to MongoDB"""
    try:
        client = pymongo.MongoClient(mongo_uri, appname="devrel.blueprint.hybrid")
        print("Connection to MongoDB successful")
        return client
    except pymongo.errors.ConnectionFailure as e:
        print(f"Connection failed: {e}")
        return None


mongodb_uri = userdata.get("MONGODB_URI")
if not mongodb_uri:
    print("MONGODB_URI not set in environment variables")

client = get_mongo_client(mongodb_uri)

collection = client["test"]["simple_fusion"]
collection.delete_many({})

data = [
    {"_id": 1, "name": "Yummy Grub", "distance": 2, "rating": 4.1},
    {"_id": 2, "name": "Hao Chi Fan", "distance": 15, "rating": 4.9},
    {"_id": 3, "name": "All Daysayuno", "distance": 5, "rating": 4.3},
    {"_id": 4, "name": "Soup for Supper", "distance": 3, "rating": 3.5},
    {"_id": 5, "name": "Salada Grande", "distance": 6, "rating": 4.2},
    {"_id": 6, "name": "Veggie Bites", "distance": 3, "rating": 4},
    {"_id": 7, "name": "Food Fiesta", "distance": 1, "rating": 2.5},
    {"_id": 8, "name": "Pizza & Pie", "distance": 4, "rating": 4.4},
    {"_id": 9, "name": "Burger Bazaar", "distance": 3, "rating": 4.2},
]

collection.insert_many(data)

Connection to MongoDB successful


InsertManyResult([1, 2, 3, 4, 5, 6, 7, 8, 9], acknowledged=True)

# Example documents

In [None]:
all_docs = collection.find({}).to_list()
pd.DataFrame(all_docs)

Unnamed: 0,_id,name,distance,rating
0,1,Yummy Grub,2,4.1
1,2,Hao Chi Fan,15,4.9
2,3,All Daysayuno,5,4.3
3,4,Soup for Supper,3,3.5
4,5,Salada Grande,6,4.2
5,6,Veggie Bites,3,4.0
6,7,Food Fiesta,1,2.5
7,8,Pizza & Pie,4,4.4
8,9,Burger Bazaar,3,4.2


## Top closest and top rated

In [None]:
top_closest_ranked = [{"$sort": {"distance": 1, "rating": -1}}, {"$limit": 5}]
top_closest_results_ranked = collection.aggregate(top_closest_ranked).to_list()

top_rated_ranked = [{"$sort": {"rating": -1, "distance": 1}}, {"$limit": 5}]
top_rated_results_ranked = collection.aggregate(top_rated_ranked).to_list()

In [None]:
pd.DataFrame(top_closest_results_ranked)

Unnamed: 0,_id,name,distance,rating
0,7,Food Fiesta,1,2.5
1,1,Yummy Grub,2,4.1
2,9,Burger Bazaar,3,4.2
3,6,Veggie Bites,3,4.0
4,4,Soup for Supper,3,3.5


In [None]:
pd.DataFrame(top_rated_results_ranked)

Unnamed: 0,_id,name,distance,rating
0,2,Hao Chi Fan,15,4.9
1,8,Pizza & Pie,4,4.4
2,3,All Daysayuno,5,4.3
3,9,Burger Bazaar,3,4.2
4,5,Salada Grande,6,4.2


# RRF: Reciprocal Rank Fusion

In [None]:
rrf_results = collection.aggregate(
    [
        {
            "$rankFusion": {
                "input": {
                    "pipelines": {
                        "distance_pipeline": top_closest_ranked,
                        "rating_pipeline": top_rated_ranked,
                    }
                },
                "combination": {
                    "weights": {"distance_pipeline": 35, "rating_pipeline": 30}
                },
                "scoreDetails": True,
            }
        },
        {
            "$addFields": {
                "score": {"$meta": "score"},
                "scoreDetails": {"$meta": "scoreDetails"},
            }
        },
    ]
).to_list()

pd.DataFrame(rrf_results)

Unnamed: 0,_id,name,distance,rating,score,scoreDetails
0,9,Burger Bazaar,3,4.2,1.024306,"{'value': 1.0243055555555556, 'description': '..."
1,7,Food Fiesta,1,2.5,0.57377,"{'value': 0.5737704918032788, 'description': '..."
2,1,Yummy Grub,2,4.1,0.564516,"{'value': 0.564516129032258, 'description': 'v..."
3,6,Veggie Bites,3,4.0,0.546875,"{'value': 0.546875, 'description': 'value outp..."
4,4,Soup for Supper,3,3.5,0.538462,"{'value': 0.5384615384615385, 'description': '..."
5,2,Hao Chi Fan,15,4.9,0.491803,"{'value': 0.49180327868852464, 'description': ..."
6,8,Pizza & Pie,4,4.4,0.483871,"{'value': 0.4838709677419355, 'description': '..."
7,3,All Daysayuno,5,4.3,0.47619,"{'value': 0.47619047619047616, 'description': ..."
8,5,Salada Grande,6,4.2,0.461538,"{'value': 0.46153846153846156, 'description': ..."


# Relative Score Fusion

In [None]:
top_closest_scored = [
    {
        "$score": {
            "score": {"$subtract": [100, {"$multiply": [5.0, "$distance"]}]},
            "normalization": "none",
        }
    },
    {"$sort": {"score": {"$meta": "score"}}},
    {"$limit": 5},
]
top_closest_results_scored = collection.aggregate(top_closest_scored).to_list()

top_rated_scored = [
    {
        "$score": {
            "score": "$rating",
            "normalization": "none",
        }
    },
    {"$sort": {"score": {"$meta": "score"}}},
    {"$limit": 5},
]
top_rated_results_scored = collection.aggregate(top_rated_scored).to_list()

rsf_results = collection.aggregate(
    [
        {
            "$scoreFusion": {
                "input": {
                    "pipelines": {
                        "distance_pipeline": top_closest_scored,
                        "rating_pipeline": top_rated_scored,
                    },
                    "normalization": "sigmoid",
                },
                "combination": {
                    "weights": {"distance_pipeline": 1, "rating_pipeline": 1},
                    "method": "avg",
                },
                "scoreDetails": True,
            }
        },
        {
            "$addFields": {
                "computed_distance_score": {
                    "$subtract": [100, {"$multiply": [5.0, "$distance"]}]
                },
                "score": {"$meta": "score"},
                "scoreDetails": {"$meta": "scoreDetails"},
            }
        },
    ]
).to_list()

In [None]:
pd.DataFrame(top_closest_results_scored)

Unnamed: 0,_id,name,distance,rating
0,7,Food Fiesta,1,2.5
1,1,Yummy Grub,2,4.1
2,9,Burger Bazaar,3,4.2
3,4,Soup for Supper,3,3.5
4,6,Veggie Bites,3,4.0


In [None]:
pd.DataFrame(top_rated_results_scored)

Unnamed: 0,_id,name,distance,rating
0,2,Hao Chi Fan,15,4.9
1,8,Pizza & Pie,4,4.4
2,3,All Daysayuno,5,4.3
3,9,Burger Bazaar,3,4.2
4,5,Salada Grande,6,4.2


In [None]:
pd.DataFrame(rsf_results)

Unnamed: 0,_id,name,distance,rating,computed_distance_score,score,scoreDetails
0,9,Burger Bazaar,3,4.2,85.0,0.992613,"{'value': 0.9926129841533635, 'description': '..."
1,1,Yummy Grub,2,4.1,90.0,0.5,"{'value': 0.5, 'description': 'the value calcu..."
2,4,Soup for Supper,3,3.5,85.0,0.5,"{'value': 0.5, 'description': 'the value calcu..."
3,6,Veggie Bites,3,4.0,85.0,0.5,"{'value': 0.5, 'description': 'the value calcu..."
4,7,Food Fiesta,1,2.5,95.0,0.5,"{'value': 0.5, 'description': 'the value calcu..."
5,2,Hao Chi Fan,15,4.9,25.0,0.496304,"{'value': 0.49630422932785906, 'description': ..."
6,8,Pizza & Pie,4,4.4,80.0,0.493936,"{'value': 0.49393578250786285, 'description': ..."
7,3,All Daysayuno,5,4.3,75.0,0.493307,"{'value': 0.49330654108616756, 'description': ..."
8,5,Salada Grande,6,4.2,70.0,0.492613,"{'value': 0.49261298415336346, 'description': ..."
