# Pair People

This uses a continuous version, inspired by the Gape-Shapley algorithm, to pair people in a way that minimizes the distance between people.

Since you have 1 group and continuous features, you can just have a bag of all combinations and then use a greedy algorithm to just pick the best pair at each step. This seems to work well at least in the first iteration.

We use cosine similarity as the distance metric between embeddings.

In [2]:
from angle_emb import AnglE

MALE = 0
FEMALE = 1
OTHER = 2

SEX_MAPPING = {
    "male": MALE,
    "female": FEMALE,
    "other": OTHER
}

model = AnglE.from_pretrained(
    'WhereIsAI/UAE-Large-V1', pooling_strategy='cls').cuda()

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  from .autonotebook import tqdm as notebook_tqdm


In [317]:
import torch

def cos_sim(a, b):
    a = torch.tensor(a).to("mps")
    b = torch.tensor(b).to("mps")
    return torch.nn.functional.cosine_similarity(a, b, dim=0)

In [318]:
class Person:
    def __init__(self,
                 id: str,
                 name: str,
                 sex: int,
                 desc: str,
                 want: str,
                 pref: [int]
                 ):
        self.id = id
        self.name = name
        self.desc = desc
        self.want = want
        self.pref = pref
        self.sex = sex
        self.similarities = []

        self.embedding = model.encode([desc])[0]
        self.want_embedding = model.encode([want])[0]

    def calc_similarity(self, other: 'Person'):
        if self == other:
            return 1.0
        for sim in self.similarities:
            if sim[0] == other:
                return sim[1]

        sim = cos_sim(self.embedding, other.embedding)
        self.similarities.append(
            (other, sim)
        )
        return sim

    def sex_match(self, other: 'Person'):
        return (self.sex in other.pref) and (other.sex in self.pref)

    def get_best_match(self):
        return max(self.similarities, key=lambda x: x[1])
    
    def get_rankings(self):
        rankings = list(map(lambda x: x[0], sorted(self.similarities, key=lambda x: x[1])))

        # filter out non sex matches
        rankings = list(filter(lambda p: p.sex_match(self), rankings))

        return rankings

In [319]:
def convert_to_sexint(l: list[str]):
    return list(map(lambda x: SEX_MAPPING[x], l))

In [3]:
from pyairtable import Api
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv('AIRTABLE_API_KEY')
base_id = os.getenv('AIRTABLE_BASE')

api = Api(api_key)

descrip_table = api.table(base_id, 'descs')
people_table = api.table(base_id, 'people')

In [321]:
from tqdm import tqdm

records = descrip_table.all()

people = []
for record in tqdm(records):
    fields = record['fields']
    person = people_table.get(fields['people'][0])
    person['fields']['id'] = person['id']
    person = person['fields']

    if "undesired" in person:
        print(f"skipping {person['name']}")
        continue

    people.append(
        Person(
            person['id'],
            person['name'],
            convert_to_sexint(person['sex'])[0],
            fields['desc'],
            fields['want'],
            convert_to_sexint(person['attraction'])
        )
    )

for person in tqdm(people):
    for other in people:
        if person.id != other.id:
            person.calc_similarity(other)

 34%|███▍      | 27/80 [00:19<00:24,  2.15it/s]

skipping Vansh Gehlot


 94%|█████████▍| 75/80 [00:49<00:02,  2.40it/s]

skipping Arav Bhattacharya


100%|██████████| 80/80 [00:52<00:00,  1.53it/s]
100%|██████████| 78/78 [00:11<00:00,  6.55it/s]


In [322]:
def get_all_possible(people: list[Person]):
    all_possible_matches = []

    for person in people:
        for other in people:
            if person.id != other.id and person.sex_match(other):
                all_possible_matches.append((person, other, float(person.calc_similarity(other))))

    return all_possible_matches

In [327]:
all_possible_matches = get_all_possible(people)

In [None]:
matches = []
unmatched = [x for x in people]

while len(all_possible_matches) != 0:
    all_possible_matches = sorted(all_possible_matches, key=lambda x: x[2])
    best_match = all_possible_matches.pop()

    unmatched = [x for x in unmatched if x not in best_match]

    print(f"{best_match[0].name} matched with {best_match[1].name} with a score of {best_match[2]}")
    all_possible_matches = [x for x in all_possible_matches if not (x[0] in best_match or x[1] in best_match)]
    
    print(f"Removed all matches with {best_match[0].name} and {best_match[1].name}")
    matches.append(best_match)

In [None]:
for match in matches:
    print(f"{match[0].name} matched with {match[1].name}")
    match_table = api.table(base_id, 'matches')
    match_table.create({
        "p1": [match[0].id],
        "p2": [match[1].id],
        "percent": match[2]
    })

print("\n\n======================")
print("Unmatched:")    

for person in unmatched:
    print(f"{person.name} was unmatched")

In [4]:
from openai import OpenAI
import json
from tqdm import tqdm


client = OpenAI(
    api_key=os.getenv('OPENAI_API_KEY')
)

In [6]:
matches_table = api.table(base_id, 'matches')
matches = matches_table.all()

for match in tqdm(matches):
    fields = match['fields']
    p1 = people_table.get(fields['p1'][0])['fields']
    p2 = people_table.get(fields['p2'][0])['fields']

    p1 = json.dumps(p1)
    p2 = json.dumps(p2)

    print("starting")
    response = client.chat.completions.create(
            model="gpt-4-0125-preview",  # Adjust the model if necessary
            messages=[
                {"role": "system", "content": "You write email bodies and are charismatic and have a fun sense of humor."},
                {"role": "user", "content": f"""
                Generate an introductory email body (JUST the body) for a blind date between these two people. Include a brief summary of each person's profile. Provide a couple intro starter questions for both to answer (bulleted), 1 quirky one related to their profiles, 1 slightly deeper ones, and 1 really deep question. wnrs style
                
                use lots of exclamation marks! be slightly ironic, and use typed emojis :)
                only include the body of the email, no subject line, greeting, or closing.

                use the style of a we're not really strangers type email, all lowercase and vibey
                ===
                {p1}
                ===
                {p2}
                ===
                """}
            ],
            temperature=0.3,
            max_tokens=500,  # Adjust based on how long you expect the summary to be
        )

    print("done")
    matches_table.update(match['id'], {
        "email_body": response.choices[0].message.content
    })
    

  0%|          | 0/39 [00:00<?, ?it/s]

starting


100%|██████████| 39/39 [00:15<00:00,  2.57it/s]

done
hey there, cosmic travelers! 🌌✨

so, the universe in its infinite wisdom (or maybe just a quirky algorithm) has decided that you two might just be the binary stars of each other's galaxies. let's dive into the brief dossiers of our interstellar candidates:

- **divik chotani**: a jedi in his own right, divik's life orbits around star wars. whether it's watching the saga for the umpteenth time or engaging in philosophical debates about the force, he's your go-to galaxy guy. his friends value his smarts and encyclopedic knowledge of all things star wars. he's on a quest to find an online princess leia to his han solo.

- **swasinya jayaraman**: a renaissance woman for the modern age, swasinya's talents span from engineering marvels to creating musical symphonies. she seeks adventures that are as fun and challenging as defeating the empire, and values humor, honesty, and growth in her companions. whether it's hiking up the mountains of endor or crafting a masterpiece, she's all about


