In [1]:
from os import getenv
from pymongo import MongoClient
from pandas import DataFrame, Series, merge
from dotenv import load_dotenv

In [2]:
filepath = '/Users/jasongersing/PycharmProjects/fastApiProject/.env'
load_dotenv(filepath)
url = getenv('MONGO_URL')
database = "thruTheGrapevine"
collection_GB = "GrapeBuyers"
collection_GS = "GrapeSellers"

In [3]:
db_GB = MongoClient(url)[database][collection_GB]
grape_buyers = DataFrame(
    db_GB.find(projection={
        "_id": False, 
        "profile_id": True, 
        "grapes_seeking": True, 
        "volume_seeking": True, 
    })
)
grape_buyers = grape_buyers.explode(column=["grapes_seeking", "volume_seeking"])
grape_buyers.rename(columns={"grapes_seeking": "variety", "volume_seeking": "tons"}, inplace=True)
grape_buyers["side"] = "Buyer"
grape_buyers["tons"] = Series([int(num) for num in grape_buyers["tons"]])
grape_buyers.head()

Unnamed: 0,profile_id,variety,tons,side
0,K1hwd7N64N668a4H,Mourvedre,25,Buyer
0,K1hwd7N64N668a4H,Pinot blanc,25,Buyer
0,K1hwd7N64N668a4H,Pinot noir,25,Buyer
0,K1hwd7N64N668a4H,Syrah,25,Buyer
1,tQ3M3s60NM7808Bv,Sauvignon blanc,25,Buyer


In [4]:
db_GS = MongoClient(url)[database][collection_GS]
grape_sellers = DataFrame(
    db_GS.find(projection={
        "_id": False, 
        "profile_id": True, 
        "grapes_selling": True, 
        "volume_selling": True,
    })
)
grape_sellers = grape_sellers.explode(column=["grapes_selling", "volume_selling"])
grape_sellers.rename(columns={"grapes_selling": "variety", "volume_selling": "tons"}, inplace=True)
grape_sellers["side"] = "Seller"
grape_sellers["tons"] = Series([int(num) for num in grape_sellers["tons"]])
grape_sellers.head()

Unnamed: 0,profile_id,variety,tons,side
0,7xcKb18t353Z7p5Y,Granache,5,Seller
0,7xcKb18t353Z7p5Y,Riesling,5,Seller
0,7xcKb18t353Z7p5Y,Pinot gris,5,Seller
0,7xcKb18t353Z7p5Y,Sangiovese,5,Seller
1,1726Z0qu1t6Zs7oT,Pinot gris,20,Seller


In [5]:
together = merge(grape_buyers, grape_sellers, how='inner', on=['variety'])
together = together.loc[(together.tons_x <= together.tons_y)]
together["tons_difference"] = together["tons_y"] - together["tons_x"]
together.head()

Unnamed: 0,profile_id_x,variety,tons_x,side_x,profile_id_y,tons_y,side_y,tons_difference
0,K1hwd7N64N668a4H,Mourvedre,25,Buyer,dtv7i758zm5n40q1,45,Seller,20
2,K1hwd7N64N668a4H,Mourvedre,25,Buyer,EBI50Rq0206h4u4k,50,Seller,25
3,K1hwd7N64N668a4H,Mourvedre,25,Buyer,o5B500L63bHv50sv,50,Seller,25
7,Xm636aK82Kc31QH8,Mourvedre,45,Buyer,dtv7i758zm5n40q1,45,Seller,0
9,Xm636aK82Kc31QH8,Mourvedre,45,Buyer,EBI50Rq0206h4u4k,50,Seller,5


In [6]:
def return_seller_profile_ids(n_matches: int, profile_id: str):
    buyer_indexes = (together[together["profile_id_x"] == profile_id]).index.tolist()
    seller_profile_id = [together.iloc[num]["profile_id_y"] for num in buyer_indexes]
    return seller_profile_id[:n_matches]

In [7]:
return_seller_profile_ids(3, 'K1hwd7N64N668a4H')

['dtv7i758zm5n40q1', 'o5B500L63bHv50sv', 'dtv7i758zm5n40q1']