# The Scoring Logic

# Setup

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import os
import sys
import time
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")
from sentence_transformers import SentenceTransformer

project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from utils import get_table_from_supabase, build_relationship_cols, build_financial_history

#get keys from env
load_dotenv()
url = os.getenv("SUPABASE_URL")
key = os.getenv("SUPABASE_KEY")

----

# Retrieving Data from Supabase and Building Dataframes

As with my EDA, I will connect to Supabase and retrieve all records, I will create one dataframe for funder information, and another for grants and recipients information. This will allow me to easily access funders' giving history, plus the classifications for both funders and recipients, to be used as part of the calculation of the alignment score.

In [3]:
#get tables and build dataframes
tables = ["funders", "causes", "areas", "beneficiaries", "grants",
               "funder_causes", "funder_areas", "funder_beneficiaries", "funder_grants", 
               "financials", "funder_financials",
               "embedding_pairs", "evaluation_pairs", "logic_pairs",
               "area_hierarchy"]

for table in tables:
    globals()[table] = get_table_from_supabase(url, key, table)

#get recipients with filter
recipients = get_table_from_supabase(url, key, "recipients", batch_size=50, filter_recipients=True)
all_recipient_ids = set(recipients["recipient_id"].unique())

#get and filter recipient join tables
recipient_join_tables = ["recipient_grants", "recipient_areas", "recipient_beneficiaries", "recipient_causes"]
for table in recipient_join_tables:
    df = get_table_from_supabase(url, key, table)
    globals()[table] = df[df["recipient_id"].isin(all_recipient_ids)]

## The Funders Dataframe

### Main Table

In [4]:
funders_df = funders.copy()

#define table relationships for funders
funder_rels = [
    {
        "join_table": funder_causes,
        "lookup_table": causes,
        "key": "cause_id",
        "value_col": "cause_name",
        "result_col": "causes"
    },
    {
        "join_table": funder_areas,
        "lookup_table": areas,
        "key": "area_id",
        "value_col": "area_name",
        "result_col": "areas"
    },
    {
        "join_table": funder_beneficiaries,
        "lookup_table": beneficiaries,
        "key": "ben_id",
        "value_col": "ben_name",
        "result_col": "beneficiaries"
    }
]

#add relationship columns
funders_df = build_relationship_cols(funders_df, "registered_num", funder_rels)

#round to 2 decimal places
funders_df = funders_df.round(2)
pd.set_option("display.float_format", "{:.2f}".format)

### Financial History Table

In [5]:
funders_df = build_financial_history(funders_df, "registered_num", funder_financials, financials)

### The List Entries

In [6]:
#get list entries
list_entries = get_table_from_supabase(url, key, "list_entries")
funder_list = get_table_from_supabase(url, key, "funder_list")
list_with_info = funder_list.merge(list_entries, on="list_id")

#get list of entries for each funder
list_grouped = list_with_info.groupby("registered_num")["list_info"].apply(list).reset_index()
list_grouped.columns = ["registered_num", "list_entries"]

#merge with funders and replace nans
funders_df = funders_df.merge(list_grouped, on="registered_num", how="left")
funders_df["list_entries"] = funders_df["list_entries"].apply(lambda x: x if isinstance(x, list) else [])

In [8]:
#get checkpoint folder
checkpoint_folder = Path("./10.1_checkpoints/")

#create checkpoint - save df to pickle
# funders_df.to_pickle(checkpoint_folder / "funders_df.pkl")
# print("Saved funders_df to checkpoint")

Saved funders_df to checkpoint


## The Grants Dataframe

### Main Table

In [9]:
grants_df = grants.copy()

#ddd funder info
grants_df = grants_df.merge(funder_grants, on="grant_id")
grants_df = grants_df.merge(funders[["registered_num", "name"]], on="registered_num")
grants_df = grants_df.rename(columns={"name": "funder_name", "registered_num": "funder_num"})

#ddd recipient info  
grants_df = grants_df.merge(recipient_grants, on="grant_id")
grants_df = grants_df.merge(recipients[["recipient_id", "recipient_name", "recipient_activities", "recipient_objectives", 
                                        "recipient_name_em", "recipient_activities_em", "recipient_objectives_em", "recipient_concat_em", "is_recipient"]], 
                        on="recipient_id", 
                        how="left")

#define relationships for recipients
recipient_rels = [
    {
        "join_table": recipient_areas,
        "lookup_table": areas,
        "key": "area_id",
        "value_col": "area_name",
        "result_col": "recipient_areas"
    },
    {
        "join_table": recipient_causes,
        "lookup_table": causes,
        "key": "cause_id",
        "value_col": "cause_name",
        "result_col": "recipient_causes"
    },
    {
        "join_table": recipient_beneficiaries,
        "lookup_table": beneficiaries,
        "key": "ben_id",
        "value_col": "ben_name",
        "result_col": "recipient_beneficiaries"
    }
]

#add relationship columns
grants_df = build_relationship_cols(grants_df, "recipient_id", recipient_rels)

#add source of grant
grants_df["source"] = grants_df["grant_id"].apply(lambda x: "Accounts" if str(x).startswith("2") else "360Giving")

#round to 2 decimal places
grants_df = grants_df.round(2)

In [11]:
#create checkpoint - save df to pickle
# grants_df.to_pickle(checkpoint_folder / "grants_df.pkl")
# print("Saved grants_df to checkpoint")

Saved grants_df to checkpoint


## The Pairs Dataframe

In [12]:
pairs_df = logic_pairs.copy()

In [13]:
#merge to enrich with funder data
pairs_enriched = pairs_df.merge(
    funders_df,
    left_on="funder_registered_num",
    right_on="registered_num",
    how="left",
    suffixes=("", "_funder")
)

#drop duplicate col
pairs_enriched = pairs_enriched.drop("registered_num", axis=1)

#merge to enrich with recipient data
pairs_enriched = pairs_enriched.merge(
    grants_df[["recipient_id", "recipient_name", "recipient_activities", "recipient_objectives",
                "recipient_areas", "recipient_causes",
"recipient_beneficiaries"]].drop_duplicates(subset=["recipient_id"]),
    on="recipient_id",
    how="left"
)

pairs_df = pairs_enriched.copy()

In [14]:
#create checkpoint - save df to pickle
# pairs_df.to_pickle(checkpoint_folder / "pairs_df.pkl")
# print("Saved pairs_df to checkpoint")

Saved pairs_df to checkpoint


---

# Retrieving Data from Checkpoints

In [2]:
#get checkpoint folder
checkpoint_folder = Path("./10.1_checkpoints/")

#get checkpoint
funders_df = pd.read_pickle(checkpoint_folder / "funders_df.pkl")
grants_df = pd.read_pickle(checkpoint_folder / "grants_df.pkl")
pairs_df = pd.read_pickle(checkpoint_folder / "pairs_df.pkl")

----

# Logic Development Steps

## Preparation of Variables

In [3]:
#simulate user's input
test_user_num = pairs_df.loc[0, "recipient_id"]
test_user_name = pairs_df.loc[0, "recipient_name"]
test_user_activities = pairs_df.loc[0, "recipient_activities"]
test_user_objectives = pairs_df.loc[0, "recipient_objectives"]
test_user_areas = pairs_df.loc[0, "recipient_areas"]
test_user_beneficiaries = pairs_df.loc[0, "recipient_beneficiaries"]
test_user_causes = pairs_df.loc[0, "recipient_causes"]

In [4]:
#embed user's input
model = SentenceTransformer("all-roberta-large-v1")

test_user_name_em = model.encode(test_user_name)
test_user_activities_em = model.encode(test_user_activities)
test_user_objectives_em = model.encode(test_user_objectives)

In [15]:
#get funder basic details
test_funder_num = pairs_df.loc[0, "funder_registered_num"]
test_funder_name = pairs_df.loc[0, "name"]
test_funder_income = pairs_df.loc[0, "income_history"]
test_funder_expenditure = pairs_df.loc[0, "expenditure_history"]
test_funder_areas = pairs_df.loc[0, "areas"]
test_funder_beneficiaries = pairs_df.loc[0, "beneficiaries"]
test_funder_causes = pairs_df.loc[0, "causes"]
test_funder_extracted_class = pairs_df.loc[0, "extracted_class"]

#get funder booleans
test_funder_sbf = pairs_df.loc[0, "is_potential_sbf"]
test_funder_nua = pairs_df.loc[0, "is_nua"]
test_funder_list = pairs_df.loc[0, "is_on_list"]

#get funder embeddings
test_funder_name_em = pairs_df.loc[0, "name_em"]
test_funder_concat_em = pairs_df.loc[0, "concat_em"]

#single columns - might not need
test_funder_activities_em = pairs_df.loc[0, "activities_em"] 
test_funder_objectives_em = pairs_df.loc[0, "objectives_em"]
test_funder_objacts_em = pairs_df.loc[0, "objectives_activities_em"]
test_funder_achievements_em = pairs_df.loc[0, "achievements_performance_em"]
test_funder_policy_em = pairs_df.loc[0, "grant_policy_em"]

## Step 1 - Does a Relationship Exist?

In [None]:
def check_for_relationship(funder_num, user_num, grants_df):
    """
    Checks whether a funder has ever given a grant to the applicant.
    """
    relationship = grants_df[
        (grants_df["funder_num"] == funder_num) &
        (grants_df["recipient_id"] == user_num)
    ]

    return len(relationship) > 0

has_relationship = check_for_relationship(test_funder_num, test_user_num, grants_df)