# Grant-Writer Recommender

This notebook contains the implementation of a recommender that recommends grant-writers to a Recipient 

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pickle

In [2]:
# input Recipient amount and sector(s)
inp = np.empty([18])
fields = list(pd.read_csv("kiva_loans.csv//recep_data.csv").columns)
for i in fields:
    inp[fields.index(i)] = input(i+': ')
print(inp)

loan_amount: 10000
Services: 1
Clothing: 0
Transportation: 0
Personal Use: 0
Retail: 0
Health: 1
Food: 0
Education: 1
Manufacturing: 0
Wholesale: 0
Construction: 0
Arts: 0
Agriculture: 0
Entertainment: 0
Housing: 0
male: 0
female: 1
[1.e+04 1.e+00 0.e+00 0.e+00 0.e+00 0.e+00 1.e+00 0.e+00 1.e+00 0.e+00
 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 1.e+00]


In [3]:
# # normalize grant amount
# df_norm = pd.read_csv("kiva_loans.csv//kiva_loans.csv")
# df_norm = df_norm[df_norm['country_code'] == 'IN']
# inp[0] = (inp[0] - df_norm['loan_amount'].min())/(df_norm['loan_amount'].max() - df_norm['loan_amount'].min())
# print(inp[0])

In [4]:
# load classifier and classify input
clf = pickle.load(open("kiva_loans.csv//clf_loaner.p", "rb"))
cluster = clf.predict(inp.reshape(1, -1))
print(cluster)

[1]


In [5]:
# load the cluster distances table
cluster_dist_tab = np.load(open("kiva_loans.csv//cluster_dist_tab.npy", "rb"))

In [6]:
# calculate cluster nearest to input's cluster
print(cluster_dist_tab[cluster, :])
nearest_cluster = np.where(cluster_dist_tab[cluster, :] == np.amin(cluster_dist_tab[cluster, :]))
print(nearest_cluster[0])

[[2369.6235084   390.98169251 2086.03300797 2159.39721747 1435.97323913
   571.30195238 2248.28491855 1840.70458451  966.1981303    37.26989533]]
[0]


In [7]:
# read grant-writer data
df = pd.read_csv("kiva_loans.csv//lender_data.csv")
labels = pd.read_csv("kiva_loans.csv//lender_labels.csv")
df['labels'] = labels

In [8]:
# calculate distances from input of all points within nearest cluster  
df = df[df['labels'] == nearest_cluster[0][0]].loc[:, df.columns != 'labels']
df['dists'] = np.linalg.norm(df[df.columns] - inp)

In [9]:
# sort points in ascending order of distance from input
df.sort_values(by=['dists']).loc[:, df.columns != 'dists'].iloc[:20]

Unnamed: 0,loan_amount,Services,Clothing,Transportation,Personal Use,Retail,Health,Food,Education,Manufacturing,Wholesale,Construction,Arts,Agriculture,Entertainment,Housing,male,female
5,200.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
673,225.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
670,200.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
664,150.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
659,225.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
655,175.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
654,175.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
651,175.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
650,250.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
641,250.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
