# Identifying VC Firms for Top Equity Pledge Companies

## Sophia Skowronski | July 13, 2020

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

SFDC Report: https://p1.lightning.force.com/lightning/r/Report/00O1Y000006uwbgUAA/view

In [2]:
# Data pulled from SFDC and Crunchbase integration
df = pd.read_csv('data.csv')

# Create new DataFrame with just the relevant columns
df = df[['Account Name', 'Investors']]

# Drop the irrelevant companies (THIS MIGHT NEED TO BE EDITED FOR FUTURE USE)
df = df.drop([2,6,14,15,16,22,23,24,26,28,29,30,31,32,33,37,38,39,40], axis = 0) 

# Restart the index
df.reset_index(inplace = True, drop = True)

# Create list of all investors
Investors = df.Investors.tolist()
df.drop('Investors', axis=1, inplace=True)

# Create unique list of VCs
# Loop through each row, cast as a string, and append to string
string = '' # Empty string
for item in Investors: 
    string += '; ' + str(item)
    
# From one long string:
# Split using the semi-colon pattern, use set to remove duplicates, and transform into a list
Unique_Investor_List = sorted(list(set(string.split('; '))))[1:]

# Print out first 5 investors
print(Unique_Investor_List[0:5])

['#Angels', '137 Ventures', '500 Startups', '8VC', 'A-Grade Investments']


## Create matrix

In [3]:
# Create a row for each unique VC value, fill with zeros
df=pd.concat([df,pd.DataFrame(columns=Unique_Investor_List)]).fillna(0)

# Check it out
df.head()

Unnamed: 0,Account Name,#Angels,137 Ventures,500 Startups,8VC,A-Grade Investments,A.Capital Ventures,AFSquare,AOL,Accel,...,Wells Fargo,WestRiver Group,WestSummit Capital,Wildcat Venture Partners,William Hambrecht,Y Combinator,You & Mr Jones Brandtech Ventures,Yuri Sagalov,blisce/,iGlobe Partners
0,appfire,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Atlassian,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Code42,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Coinbase,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Crunchbase,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Populate the matrix

In [4]:
for i in range(df.shape[0]): # By each company
    a_list = Investors[i].split('; ') # By each VC
    for item in a_list:
        df[item][i] = 1

# Check it out
df.tail()

Unnamed: 0,Account Name,#Angels,137 Ventures,500 Startups,8VC,A-Grade Investments,A.Capital Ventures,AFSquare,AOL,Accel,...,Wells Fargo,WestRiver Group,WestSummit Capital,Wildcat Venture Partners,William Hambrecht,Y Combinator,You & Mr Jones Brandtech Ventures,Yuri Sagalov,blisce/,iGlobe Partners
23,Unity Technologies,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
24,Upwork,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,Vlocity,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
26,Weebly,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
27,Zuora,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Create totals rows & columns
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sum.html
df.loc['Total',:] = df.sum(axis = 0) # Create "Total" row
df.loc[:,'Total'] = df.sum(axis = 1) # Create "Total" column

# Replace nan
df.iloc[df.shape[0]-1,0] = 'Total'

# Clean up, reset index
df.reset_index(inplace = True, drop = True)

# Check it out
df.tail()

Unnamed: 0,Account Name,#Angels,137 Ventures,500 Startups,8VC,A-Grade Investments,A.Capital Ventures,AFSquare,AOL,Accel,...,WestRiver Group,WestSummit Capital,Wildcat Venture Partners,William Hambrecht,Y Combinator,You & Mr Jones Brandtech Ventures,Yuri Sagalov,blisce/,iGlobe Partners,Total
24,Upwork,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0
25,Vlocity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
26,Weebly,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0
27,Zuora,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0
28,Total,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,4.0,...,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,432.0


## Which VCs have the highest hit rate?

In [6]:
# Grab the "Total" row at the bottom, exclude first and last summed value
VC_totals = list(df.iloc[df.shape[0]-1,:])[1:-1]

# What VC firm has the highest number of hits?
VC_totals_max_index = VC_totals.index(max(VC_totals)) # Find index where the max value is

print("For Equity Pledge Companies, what VC firm has the highest number of hits?\n\n", Unique_Investor_List[VC_totals_max_index])

For Equity Pledge Companies, what VC firm has the highest number of hits?

 SV Angel


## Create filter of Top VCs

In [7]:
# Create new dataframe to filter by total counts
df_VC_top = pd.DataFrame(dict(zip(Unique_Investor_List, VC_totals)),index=[0])
df_VC_top = df_VC_top[df_VC_top > 2].dropna(axis=1)

# Print out of top VCs with more than 2 hits in the matrix
print(df_VC_top.T[0].astype(int))

# Save top VC list with counts
df_VC_top = df_VC_top.T
df_VC_top.reset_index(inplace=True)
df_VC_top.columns=["VC Firm", "Counts"]

Accel                                    4
All Blue Capital                         3
Altimeter Capital                        3
Andreessen Horowitz                      3
Bain Capital Ventures                    3
Bessemer Venture Partners                5
BlackRock                                3
Employee Stock Option Fund (ESO Fund)    3
EquityZen                                5
Felicis Ventures                         3
FundersClub                              3
G Squared                                4
GV                                       4
Goldman Sachs                            3
Greylock Partners                        4
Index Ventures                           4
Kleiner Perkins                          4
Light Street Capital                     3
Lowercase Capital                        3
SV Angel                                 9
Salesforce Ventures                      5
Sequoia Capital                          4
SharesPost Investment Management         4
Silicon Val

## Create top VC matrix

In [8]:
# Create smaller dataframe, filtering by the top VCs
df_top = df[['Account Name'] + df_VC_top['VC Firm'].to_list()]

# Add "Total" row (column not needed)
df_top.loc[:,'Total'] = df_top.sum(axis = 1)

# Check it out
df_top.tail()

Unnamed: 0,Account Name,Accel,All Blue Capital,Altimeter Capital,Andreessen Horowitz,Bain Capital Ventures,Bessemer Venture Partners,BlackRock,Employee Stock Option Fund (ESO Fund),EquityZen,...,Sequoia Capital,SharesPost Investment Management,Silicon Valley Bank,Spark Capital,T. Rowe Price,Techstars,Webb Investment Network,Wellington Management,Y Combinator,Total
24,Upwork,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
25,Vlocity,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
26,Weebly,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0
27,Zuora,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,7.0
28,Total,4.0,3.0,3.0,3.0,3.0,5.0,3.0,3.0,5.0,...,4.0,4.0,3.0,3.0,7.0,3.0,3.0,7.0,4.0,119.0


## Save data

In [9]:
df_VC_top.to_csv("top_VC_list.csv") # Top hit list of VCs
df_top.to_csv('top_company_matrix.csv') # Top VC company matrix
df.to_csv('company_matrix.csv') # All company matrix