# Grant-Writer Recommender

This notebook contains the implementation of a recommender that recommends grant-writers to a Recipient 

In [11]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pickle

In [12]:
# input Recipient amount and sector(s)
inp = np.empty([18])
fields = list(pd.read_csv("kiva_loans.csv//recep_data.csv").columns)
for i in fields:
    inp[fields.index(i)] = input(i+': ')
print(inp)

loan_amount: 1500
Services: 0
Clothing: 0
Transportation: 0
Personal Use: 0
Retail: 0
Health: 1
Food: 0
Education: 1
Manufacturing: 0
Wholesale: 0
Construction: 0
Arts: 0
Agriculture: 0
Entertainment: 0
Housing: 0
male: 0
female: 1
[1.5e+03 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 1.0e+00 0.0e+00 1.0e+00
 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 0.0e+00 1.0e+00]


In [20]:
# # normalize grant amount
# df_norm = pd.read_csv("kiva_loans.csv//kiva_loans.csv")
# df_norm = df_norm[df_norm['country_code'] == 'IN']
# inp[0] = (inp[0] - df_norm['loan_amount'].min())/(df_norm['loan_amount'].max() - df_norm['loan_amount'].min())
# print(inp[0])

In [21]:
# load classifier and classify input
clf = pickle.load(open("kiva_loans.csv//clf_loaner.p", "rb"))
cluster = clf.predict(inp.reshape(1, -1))
print(cluster)

[7]


In [22]:
# load the cluster distances table
cluster_dist_tab = np.load(open("kiva_loans.csv//cluster_dist_tab.npy", "rb"))

In [23]:
# calculate cluster nearest to input's cluster
print(cluster_dist_tab[cluster, :])
nearest_cluster = np.where(cluster_dist_tab[cluster, :] == np.amin(cluster_dist_tab[cluster, :]))
print(nearest_cluster[0])

[[211.25965211 211.16031959 211.24341007 211.26244768 211.24493425
  211.24693735 211.22698601 211.17428542 211.22890068 211.23151986]]
[0]


In [24]:
# read grant-writer data
df = pd.read_csv("kiva_loans.csv//lender_data.csv")
labels = pd.read_csv("kiva_loans.csv//lender_labels.csv")
df['labels'] = labels

In [25]:
# calculate distances from input of all points within nearest cluster  
df = df[df['labels'] == nearest_cluster[0][0]].loc[:, df.columns != 'labels']
df['dists'] = np.linalg.norm(df[df.columns] - inp)

In [19]:
# sort points in ascending order of distance from input
df.sort_values(by=['dists']).loc[:, df.columns != 'dists'].iloc[:20]

Unnamed: 0,loan_amount,Services,Clothing,Transportation,Personal Use,Retail,Health,Food,Education,Manufacturing,Wholesale,Construction,Arts,Agriculture,Entertainment,Housing,male,female
2,1650.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
743,450.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
715,375.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
701,200.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
693,225.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
684,150.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
670,200.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
662,1525.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
658,775.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
647,325.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1
