# Deal Recommendation Engine - Liquidity Digital

In [16]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MinMaxScaler
import time


# Configure Pandas display options
pd.set_option('mode.chained_assignment', None)
pd.options.display.width = 0
    
    
cols_to_use = [0, 1, 2, 3, 5]
df = pd.read_csv('mock_deal_database_1.csv', usecols=cols_to_use)
df_old = pd.read_csv('mock_deal_database_1.csv')


# df.dropna(how="all", inplace=True)
# df.drop_duplicates(inplace=True)


# ohe = OneHotEncoder(categories='auto', handle_unknown='ignore')
# ohe.fit_transform(df[['Industry Type', 'Issuance Type', 'Location']])

def normalize(amount_text):
    d = {
        'K': 1000,
        'M': 1000000,
        'B': 1000000000
    }
    if amount_text[-1] in d:
        # separate out the K, M, or B
        num, magnitude = amount_text[:-1], amount_text[-1]
        return int(float(num[1:]) * d[magnitude])
    else:
        return float(amount_text[1:])


# scaler = MinMaxScaler()
# df['Target Raise'] = scaler.fit_transform(df[['Target Raise']])
process = 1.7

## Deal List / Before DRE

In [17]:
num_rows = df_old.shape[0]
print("# Deals: " + str(num_rows))
df_old

# Deals: 2999


Unnamed: 0,Deals,Industry Type,Issuance Type,Target Raise,Amount Raised,Location
0,EFF Ventures,Agriculture,STO,$10M,$30M,Denmark
1,YAD Capital,Private Equity,Equity,$5M,$5M,Israel
2,Intercom,Software,Convertible Note,$600K,$250K,Belgium
3,Komp,Other,Other,$500K,$1M,Thailand
4,CarbonV,Healthcare,Equity,$2M,$500K,Thailand
...,...,...,...,...,...,...
2994,"MajorWeb, LLC",Healthcare,STO,$9M,$93M,United States
2995,Makad Energy,Consumer Staples,Convertible Note,$2M,$22M,United Kingdom
2996,Makana Solutions,Healthcare,STO,$11M,$5M,Malaysia
2997,Makani Power,Energy,Debt,$3M,$83M,Canada


In [18]:
## Data Pipeline

from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.neighbors import NearestNeighbors

class Columns(BaseEstimator, TransformerMixin):
    def __init__(self, names=None):
        self.names = names

    def fit(self, X, y=None, **fit_params):
        return self

    def transform(self, X):
        return X[self.names]

numeric = ["Target Raise"]
categorical = ["Industry Type", "Issuance Type", "Location"]

model=NearestNeighbors(metric="euclidean",algorithm="brute", n_neighbors=2, n_jobs=1)

pipe = Pipeline([
    ("features", FeatureUnion([
        ('numeric', make_pipeline(Columns(names=numeric),StandardScaler())),
        ('categorical', make_pipeline(Columns(names=categorical),OneHotEncoder(sparse=False)))
    ])),
    ('model', model)
])




In [19]:
# ## Take in investor input for preferences:
print("Please Enter Your Deal Preferences Below.")
industry_pref = input("Industry: ").capitalize()
issuance_pref = input("Issuance Type: ").capitalize()
investment_amt = input("Ideal Investment Amount ($1M, $100k, etc): ").upper()
location_pref = input("Location: ").capitalize()

name = "INVESTOR"

new_row = {"Deals": name, "Industry Type": industry_pref, "Issuance Type": issuance_pref, "Target Raise": investment_amt, "Location": location_pref}
df = df.append(new_row, ignore_index=True)
df_old = df_old.append(new_row, ignore_index=True)
test_index = len(df) - 1

Please Enter Your Deal Preferences Below.
Industry: Healthcare
Issuance Type: Equity
Ideal Investment Amount ($1M, $100k, etc): $1M
Location: Singapore


## Pre-Processing Input

In [20]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler

df['Target Raise'] = df['Target Raise'].apply(lambda x: (normalize(x) if x != "n/a" else x))

X = df

currency_type = "$"

## TRANSFORMER METHOD
column_trans = make_column_transformer(
(OneHotEncoder(categories='auto'), ['Industry Type', 'Issuance Type', 'Location']),
(StandardScaler(), ['Target Raise']))


## This is what the data looks like going into the model
#print(column_trans.fit_transform(X).toarray())
T = column_trans.fit_transform(X)

## Model Fitting

In [21]:
nbrs = NearestNeighbors(metric = 'euclidean', n_neighbors=10, algorithm = 'auto').fit(T.toarray())
deal_index = test_index
distances, indices = nbrs.kneighbors([T.toarray()[deal_index]])

## Output

In [22]:
from numerize import numerize

deals = []
industries = []
issuance_types = []
amount_raised = []
locations = []
target_raises = []
euclids = []

for i in range(len(indices[0])):
    ind = indices[0][i]
    deals.append(df["Deals"][ind])
    industries.append(df["Industry Type"][ind])
    issuance_types.append(df['Issuance Type'][ind])
    amount_raised.append(df_old['Amount Raised'][ind])
    locations.append(df['Location'][ind])
    target_raises.append(df["Target Raise"][ind])
    euclids.append(str(distances[0][i]))


output_df = pd.DataFrame({'Deals': deals,
                          'Industry': industries,
                          'Issuance Type': issuance_types,
                          'Target Raise': target_raises,
                          'Amount Raised': amount_raised,
                          'Location': locations,
                         'Match': euclids})

def similar(distance):
    to_return = (1 / (1 + float(distance))) * 100
    return str(round(to_return, 3)) + "%"

def match(distance):
    to_return = (1 / (1 + float(distance))) * 100
    rounded = round(to_return, 3)
    if rounded <= 42:
        return "LOW"
    elif rounded >= 43 and rounded < 80:
        return "MEDIUM"
    else:
        return "HIGH"

output_df['Target Raise'] = output_df['Target Raise'].apply(lambda x: currency_type + (numerize.numerize(x) if x != "n/a" else x))
output_df['Match'] = output_df['Match'].apply(lambda x: match(x) if x != "n/a" else x)
output_df = output_df.drop(0)
# df_old = df_old.drop(test_index)

print("Deal Recommendations for you, based on your preferences: \n")
time.sleep(process + 1.5)
output_df

Deal Recommendations for you, based on your preferences: 



Unnamed: 0,Deals,Industry,Issuance Type,Target Raise,Amount Raised,Location,Match
1,Dealer Inspire,Healthcare,Equity,$604K,$1M,Singapore,HIGH
2,Deep Identity,Healthcare,Equity,$367K,$10M,Singapore,HIGH
3,Jobdoh,Healthcare,Equity,$7M,$3M,Singapore,MEDIUM
4,GymForLess,Healthcare,Equity,$1M,$9M,Mexico,LOW
5,CertificationPoint,Healthcare,Equity,$982K,$6M,Thailand,LOW
6,ams AG,Healthcare,Equity,$966K,$3M,Belgium,LOW
7,Delta Plant Technologies,Healthcare,Equity,$942K,$6M,Afghanistan,LOW
8,Certona,Healthcare,Equity,$871K,$6M,Denmark,LOW
9,BringIt,Healthcare,Equity,$868K,$2M,New Zealand,LOW


## More Deals Like This....

In [34]:
DEAL_TO_FIND_OTHERS = input("More Deals Like: ")
new_deal_index = df[df['Deals'].str.lower() == DEAL_TO_FIND_OTHERS.lower()].index.values.astype(int)[0]

nbrs_clone_deal = NearestNeighbors(metric = 'euclidean', n_neighbors=10, algorithm = 'auto').fit(T.toarray())
new_distances, new_indices = nbrs_clone_deal.kneighbors([T.toarray()[new_deal_index]])

#Final Step: need to remove investor input from DF
# if new_deal_index in new_indices:
#     #index = np.argwhere(==new_deal_index)
#     new_indices.remove(new_deal_index)

More Deals Like: GymForLess


In [35]:
new_distances, new_indices = nbrs.kneighbors([T.toarray()[new_deal_index]])

deals = []
industries = []
issuance_types = []
amount_raised = []
locations = []
target_raises = []
euclids = []

for i in range(len(indices[0])):
    ind = new_indices[0][i]
    deals.append(df["Deals"][ind])
    industries.append(df["Industry Type"][ind])
    issuance_types.append(df['Issuance Type'][ind])
    amount_raised.append(df_old['Amount Raised'][ind])
    locations.append(df['Location'][ind])
    target_raises.append(df["Target Raise"][ind])
    euclids.append(str(new_distances[0][i]))


new_output_df = pd.DataFrame({'Deals': deals,
                          'Industry': industries,
                          'Issuance Type': issuance_types,
                          'Target Raise': target_raises,
                          'Amount Raised': amount_raised,
                          'Location': locations,
                         'Similarity': euclids})


new_output_df['Target Raise'] = new_output_df['Target Raise'].apply(lambda x: currency_type + (numerize.numerize(x) if x != "n/a" else x))
new_output_df['Similarity'] = new_output_df['Similarity'].apply(lambda x: similar(x) if x != "n/a" else x)
new_output_df = new_output_df.drop(0)

print("Deal Recommendations Processing for " + df["Deals"][new_deal_index] + "... \n" )
time.sleep(process)
print("Here are some deals we think you'd like, based on your interest in " + df["Deals"][new_deal_index] + ":")
new_output_df

Deal Recommendations Processing for GymForLess... 

Here are some deals we think you'd like, based on your interest in GymForLess:


Unnamed: 0,Deals,Industry,Issuance Type,Target Raise,Amount Raised,Location,Similarity
1,Dealflow.com,Healthcare,Equity,$679K,$1M,Mexico,93.769%
2,USTC iFLYTEK Science and Technology,Healthcare,Equity,$196K,$3M,Mexico,85.731%
3,Extend Media,Real Estate,Equity,$1M,$89M,Mexico,41.421%
4,INVESTOR,Healthcare,Equity,$1M,,Singapore,41.421%
5,Transpond,Energy,Equity,$1M,$100M,Mexico,41.421%
6,CertificationPoint,Healthcare,Equity,$982K,$6M,Thailand,41.421%
7,ChainSync,Energy,Equity,$980K,$4M,Mexico,41.421%
8,ams AG,Healthcare,Equity,$966K,$3M,Belgium,41.421%
9,Delta Plant Technologies,Healthcare,Equity,$942K,$6M,Afghanistan,41.42%
