# Recipient Recommender

This notebook contains the implementation of a recommender that recommends recipients to a Grant-Writer.

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pickle

In [20]:
# input Grant Writer preferences
inp = np.empty([18])
fields = list(pd.read_csv("kiva_loans.csv//recep_data.csv").columns)
for i in fields:
    inp[fields.index(i)] = input(i+': ')
print(inp)

loan_amount: 3500
Services: 0
Clothing: 0
Transportation: 0
Personal Use: 0
Retail: 0
Health: 1
Food: 0
Education: 1
Manufacturing: 0
Wholesale: 0
Construction: 0
Arts: 0
Agriculture: 0
Entertainment: 0
Housing: 0
male: 0
female: 1
[3.5e+03 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 1.0e+00 0.0e+00 1.0e+00
 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 1.0e+00]


In [21]:
# # normalize grant amount
# df_norm = pd.read_csv("kiva_loans.csv//kiva_loans.csv")
# df_norm = df_norm[df_norm['country_code'] == 'IN']
# inp[0] = (inp[0] - df_norm['loan_amount'].min())/(df_norm['loan_amount'].max() - df_norm['loan_amount'].min())
# print(inp[0])

In [22]:
len(fields)

18

In [23]:
# load classifier and classify input
clf = pickle.load(open("kiva_loans.csv//clf_lender.p", "rb"))
cluster = clf.predict(inp.reshape(1, -1))
print(cluster)

[8]


In [24]:
# load the cluster distances table
cluster_dist_tab = np.load(open("kiva_loans.csv//cluster_dist_tab.npy", "rb"))

In [25]:
# calculate cluster nearest to input's cluster
print(cluster_dist_tab[:, cluster])
nearest_cluster = np.where(cluster_dist_tab[:, cluster] == np.amin(cluster_dist_tab[:, cluster]))
print(nearest_cluster[0][0])

[[ 1361.97595636]
 [  966.1981303 ]
 [  466.83536336]
 [ 2787.33769977]
 [  917.63723949]
 [ 1638.23350277]
 [11317.26199272]
 [   12.67661477]
 [ 1170.0005905 ]
 [  523.51196292]]
7


In [26]:
# read recipient data
df = pd.read_csv("kiva_loans.csv//recep_data.csv")
labels = pd.read_csv("kiva_loans.csv//recep_labels.csv")
df['labels'] = labels

In [27]:
# calculate distances from input of all points within nearest cluster  
df = df[df['labels'] == nearest_cluster[0][0]].loc[:, df.columns != 'labels']
df['dists'] = np.linalg.norm(df[df.columns] - inp)

In [28]:
# sort points in ascending order of distance from input
df.sort_values(by=['dists']).loc[:, df.columns != 'dists'].iloc[:20]

Unnamed: 0,loan_amount,Services,Clothing,Transportation,Personal Use,Retail,Health,Food,Education,Manufacturing,Wholesale,Construction,Arts,Agriculture,Entertainment,Housing,male,female
52,1775.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
5017,1825.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4974,1525.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
4968,1525.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4966,1525.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4919,1825.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4915,1525.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4880,1850.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5031,1550.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4876,1525.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1
