# The Scoring Logic

# Setup

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import os
import sys
import time
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")
from sentence_transformers import SentenceTransformer

project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from utils import get_table_from_supabase, build_relationship_cols, build_financial_history

#get keys from env
load_dotenv()
url = os.getenv("SUPABASE_URL")
key = os.getenv("SUPABASE_KEY")

----

# Retrieving Data from Supabase and Building Dataframes

As with my EDA, I will connect to Supabase and retrieve all records, I will create one dataframe for funder information, and another for grants and recipients information. This will allow me to easily access funders' giving history, plus the classifications for both funders and recipients, to be used as part of the calculation of the alignment score.

In [3]:
#get tables and build dataframes
tables = ["funders", "causes", "areas", "beneficiaries", "grants",
               "funder_causes", "funder_areas", "funder_beneficiaries", "funder_grants", 
               "financials", "funder_financials",
               "embedding_pairs", "evaluation_pairs", "logic_pairs",
               "area_hierarchy"]

for table in tables:
    globals()[table] = get_table_from_supabase(url, key, table)

#get recipients with filter
recipients = get_table_from_supabase(url, key, "recipients", batch_size=50, filter_recipients=True)
all_recipient_ids = set(recipients["recipient_id"].unique())

#get and filter recipient join tables
recipient_join_tables = ["recipient_grants", "recipient_areas", "recipient_beneficiaries", "recipient_causes"]
for table in recipient_join_tables:
    df = get_table_from_supabase(url, key, table)
    globals()[table] = df[df["recipient_id"].isin(all_recipient_ids)]

## The Funders Dataframe

### Main Table

In [4]:
funders_df = funders.copy()

#define table relationships for funders
funder_rels = [
    {
        "join_table": funder_causes,
        "lookup_table": causes,
        "key": "cause_id",
        "value_col": "cause_name",
        "result_col": "causes"
    },
    {
        "join_table": funder_areas,
        "lookup_table": areas,
        "key": "area_id",
        "value_col": "area_name",
        "result_col": "areas"
    },
    {
        "join_table": funder_beneficiaries,
        "lookup_table": beneficiaries,
        "key": "ben_id",
        "value_col": "ben_name",
        "result_col": "beneficiaries"
    }
]

#add relationship columns
funders_df = build_relationship_cols(funders_df, "registered_num", funder_rels)

#round to 2 decimal places
funders_df = funders_df.round(2)
pd.set_option("display.float_format", "{:.2f}".format)

### Financial History Table

In [5]:
funders_df = build_financial_history(funders_df, "registered_num", funder_financials, financials)

### The List Entries

In [6]:
#get list entries
list_entries = get_table_from_supabase(url, key, "list_entries")
funder_list = get_table_from_supabase(url, key, "funder_list")
list_with_info = funder_list.merge(list_entries, on="list_id")

#get list of entries for each funder
list_grouped = list_with_info.groupby("registered_num")["list_info"].apply(list).reset_index()
list_grouped.columns = ["registered_num", "list_entries"]

#merge with funders and replace nans
funders_df = funders_df.merge(list_grouped, on="registered_num", how="left")
funders_df["list_entries"] = funders_df["list_entries"].apply(lambda x: x if isinstance(x, list) else [])

In [8]:
#get checkpoint folder
checkpoint_folder = Path("./10.1_checkpoints/")

#create checkpoint - save df to pickle
# funders_df.to_pickle(checkpoint_folder / "funders_df.pkl")
# print("Saved funders_df to checkpoint")

Saved funders_df to checkpoint


## The Grants Dataframe

### Main Table

In [9]:
grants_df = grants.copy()

#ddd funder info
grants_df = grants_df.merge(funder_grants, on="grant_id")
grants_df = grants_df.merge(funders[["registered_num", "name"]], on="registered_num")
grants_df = grants_df.rename(columns={"name": "funder_name", "registered_num": "funder_num"})

#ddd recipient info  
grants_df = grants_df.merge(recipient_grants, on="grant_id")
grants_df = grants_df.merge(recipients[["recipient_id", "recipient_name", "recipient_activities", "recipient_objectives", 
                                        "recipient_name_em", "recipient_activities_em", "recipient_objectives_em", "recipient_concat_em", "is_recipient"]], 
                        on="recipient_id", 
                        how="left")

#define relationships for recipients
recipient_rels = [
    {
        "join_table": recipient_areas,
        "lookup_table": areas,
        "key": "area_id",
        "value_col": "area_name",
        "result_col": "recipient_areas"
    },
    {
        "join_table": recipient_causes,
        "lookup_table": causes,
        "key": "cause_id",
        "value_col": "cause_name",
        "result_col": "recipient_causes"
    },
    {
        "join_table": recipient_beneficiaries,
        "lookup_table": beneficiaries,
        "key": "ben_id",
        "value_col": "ben_name",
        "result_col": "recipient_beneficiaries"
    }
]

#add relationship columns
grants_df = build_relationship_cols(grants_df, "recipient_id", recipient_rels)

#add source of grant
grants_df["source"] = grants_df["grant_id"].apply(lambda x: "Accounts" if str(x).startswith("2") else "360Giving")

#round to 2 decimal places
grants_df = grants_df.round(2)

In [11]:
#create checkpoint - save df to pickle
# grants_df.to_pickle(checkpoint_folder / "grants_df.pkl")
# print("Saved grants_df to checkpoint")

Saved grants_df to checkpoint


## The Pairs Dataframe

In [12]:
pairs_df = logic_pairs.copy()

In [13]:
#merge to enrich with funder data
pairs_enriched = pairs_df.merge(
    funders_df,
    left_on="funder_registered_num",
    right_on="registered_num",
    how="left",
    suffixes=("", "_funder")
)

#drop duplicate col
pairs_enriched = pairs_enriched.drop("registered_num", axis=1)

#merge to enrich with recipient data
pairs_enriched = pairs_enriched.merge(
    grants_df[["recipient_id", "recipient_name", "recipient_activities", "recipient_objectives",
                "recipient_areas", "recipient_causes",
"recipient_beneficiaries"]].drop_duplicates(subset=["recipient_id"]),
    on="recipient_id",
    how="left"
)

pairs_df = pairs_enriched.copy()

In [14]:
#create checkpoint - save df to pickle
# pairs_df.to_pickle(checkpoint_folder / "pairs_df.pkl")
# print("Saved pairs_df to checkpoint")

Saved pairs_df to checkpoint


---

# Retrieving Data from Checkpoints

In [2]:
#get checkpoint folder
checkpoint_folder = Path("./10.1_checkpoints/")

#get checkpoint
funders_df = pd.read_pickle(checkpoint_folder / "funders_df.pkl")
grants_df = pd.read_pickle(checkpoint_folder / "grants_df.pkl")
pairs_df = pd.read_pickle(checkpoint_folder / "pairs_df.pkl")

----

# Preparation of User and Funder Data

## Users

I will use the data for the recipient at index 0 in `pairs_df` as a proxy for a real user's input, to simulate the functionality of the final artefact as I build the logic. 

First, the user will input information about their charity (the applicant), then embeddings will be created for the inputted text data.

In [3]:
#simulate user's input
user_df = pairs_df.iloc[[0]][["recipient_id", "recipient_name", "recipient_activities", "recipient_objectives", "recipient_areas", "recipient_causes", "recipient_beneficiaries"]]
user_df["funder_num"] = "1124856"

In [10]:
#save registered numbers
user_num = user_df["recipient_id"].iloc[0]
funder_num = user_df["funder_num"].iloc[0]

In [4]:
#embed user's input
model = SentenceTransformer("all-roberta-large-v1")
user_cols = ["recipient_name", "recipient_activities", "recipient_objectives"]

for col in user_cols:
    #replace nans with empty string
    texts = user_df[col].fillna("").tolist()
    embeddings = model.encode(texts)
    
    #add to df
    user_df[f"{col}_em"] = list(embeddings)

user_df["concat_text"] = user_df[user_cols[0]].fillna("")
for col in user_cols[1:]:
    user_df["concat_text"] += " " + user_df[col].fillna("")

#make lowercase
user_df["concat_text"] = user_df["concat_text"].str.lower()

#create embeddings
texts = user_df["concat_text"].tolist()
embeddings = model.encode(texts)
user_df["concat_em"] = list(embeddings)

#drop concatenated text
user_df = user_df.drop(columns=["concat_text"])

In [15]:
pd.set_option("display.max_columns", None)
user_df.head()

Unnamed: 0,recipient_id,recipient_name,recipient_activities,recipient_objectives,recipient_areas,recipient_causes,recipient_beneficiaries,funder_num,recipient_name_em,recipient_activities_em,recipient_objectives_em,concat_em
0,328729,ASYLUM AID,THE PROVISION OF LEGAL ADVICE AND REPRESENTATI...,2. OBJECTS2.1 THE CHARITY IS ESTABLISHED FOR T...,[Throughout England And Wales],"[Education/training, The Prevention Or Relief ...",[People Of A Particular Ethnic Or Racial Origi...,1124856,"[-0.008021089, 0.01503942, -0.022991765, -0.02...","[-0.022860372, 0.029296849, -0.017596042, -0.0...","[0.016410686, 0.00963383, -0.04015383, -0.0136...","[-0.0008344314, -0.01810797, -0.0055338936, -0..."


## Funders

I will next build a dataframe for the funder selected by the user, and a separate dataframe to store details of previous grants given by, and recipients of, this funder.

In [12]:
#get funder data from number inputted by user
funder_df = funders_df[funders_df["registered_num"] == funder_num].copy()
funder_df.head()

Unnamed: 0,registered_num,name,website,activities,objectives,income_latest,expenditure_latest,objectives_activities,achievements_performance,grant_policy,is_potential_sbf,is_on_list,is_nua,name_em,activities_em,objectives_em,objectives_activities_em,achievements_performance_em,grant_policy_em,concat_em,extracted_class,causes,areas,beneficiaries,income_history,expenditure_history,list_entries
257,1124856,ROSA FUND,https://www.rosauk.org,ROSA IS THE FIRST UK-WIDE FUND FOR WOMEN'S INI...,THE OBJECTS OF THE CHARITY ARE TO FURTHER ANY ...,1407453.0,1372296.0,,,,False,False,False,"[0.036068745,0.02428467,-0.026885081,-0.001224...","[0.005698055,0.011768709,-0.015513399,0.004063...","[-0.023482107,-0.02466424,0.0036000705,-0.0259...","[-0.019817753,-0.00571729,0.022262126,-0.03666...","[-0.019817753,-0.00571729,0.022262126,-0.03666...","[-0.019817753,-0.00571729,0.022262126,-0.03666...","[0.00031545621,0.014991585,-0.003386881,0.0022...","[""Uk"",""Wales"",""Girls"",""Women"",""Charity and VCS...",[General Charitable Purposes],[Throughout England And Wales],"[Other Charities Or Voluntary Bodies, Other De...","{2020: 155612.0, 2021: 4478996.0, 2022: 237267...","{2020: 974678.0, 2021: 2118687.0, 2022: 266530...",[]


In [16]:
#get grants for selected funder
funder_grants_df = grants_df[grants_df["funder_num"] == funder_num].copy()
funder_grants_df.head(1)

Unnamed: 0,grant_title,grant_desc,amount,year,grant_id,source,grant_title_em,grant_desc_em,grant_concat_em,funder_num,funder_grants_id,funder_name,recipient_id,recipient_grants_id,recipient_name,recipient_activities,recipient_objectives,recipient_name_em,recipient_activities_em,recipient_objectives_em,recipient_concat_em,is_recipient,recipient_areas,recipient_causes,recipient_beneficiaries
30638,C19 R1-HGWA,FUNDING OVER 12 MONTHS TO RESPOND TO THE PRACT...,20000.0,2020,360G-RosaUK-1820-01-169601819,360Giving,"[-0.021811347,-0.014835423,-0.016724072,-0.012...","[-0.03757492,-0.0068165953,-0.025009565,0.0187...","[-0.048174538,-0.02058363,0.010783903,-0.01708...",1124856,42877,ROSA FUND,invalid_100,41687,PROJECT ONE,,,"[0.008081452,0.02795066,0.0020238636,0.0029561...","[-0.019817753,-0.00571729,0.022262126,-0.03666...","[-0.019817753,-0.00571729,0.022262126,-0.03666...","[-0.019817753,-0.00571729,0.022262126,-0.03666...",True,[],[],[]


In [7]:
#make dictionary of grants
funder_grants_list = funder_grants_df[[
    "grant_title", "grant_desc", "amount", "year",
    "grant_title_em", "grant_desc_em",
    "recipient_id", "recipient_name", "recipient_activities", "recipient_objectives",
    "recipient_name_em", "recipient_concat_em",
    "recipient_areas", "recipient_beneficiaries", "recipient_causes"
]].to_dict("records")

----

# Binary Criteria

## Step 1 - Single-Beneficiary Funders

In [17]:
is_sbf = funder_df["is_potential_sbf"].iloc[0]

## Step 2 - No Unsolicited Applications

In [18]:
is_nua = funder_df["is_nua"].iloc[0]

## Step 3 - The List

In [22]:
is_on_list = funder_df["is_on_list"].iloc[0]

## Step 4 - Existing Relationship

In [24]:
relationship = grants_df[
    (grants_df["funder_num"] == funder_num) &
    (grants_df["recipient_id"] == user_num)
]

if len(relationship) > 0:
    existing_relationship = True

## Step 5 - Time Lapsed Since Last Grant

In [12]:
def check_time_lapsed():

    #get last year of grant and current year
    #calculate difference
    pass

# Classification Criteria

## Step 6 - Areas

## Step 7 - Beneficiaries

## Step 7 - Causes