# Identifying VC Firms of Pledge Companies on CNBC List

## Sophia Skowronski | June 25, 2020

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read in from webscraped dataset
df = pd.read_csv('cnbc_50_p1_sfdc.csv')

# Create new DataFrame with just the relevant columns
df = df[['company', 'Investors']]

# Create list of all investors
Investors = df.Investors.tolist()
df.drop('Investors', axis=1, inplace=True)

# Create unique list of VCs
# Loop through each row, cast as a string, and append to string
string = '' # Empty string
for item in Investors: 
    string += '; ' + str(item)
    
# From one long string:
# Split using the semi-colon pattern, use set to remove duplicates, and transform into a list
Unique_Investor_List = sorted(list(set(string.split('; '))))[1:]

# Print out first 5 investors
print(Unique_Investor_List[0:5])

['137 Ventures', '1776 Ventures', '83North', '9Yards Capital', 'A-Grade Investments']


## Create matrix

In [3]:
# Create a row for each unique VC value, fill with zeros
df=pd.concat([df,pd.DataFrame(columns=Unique_Investor_List)]).fillna(0)

# Check it out
df.head()

Unnamed: 0,company,137 Ventures,1776 Ventures,83North,9Yards Capital,A-Grade Investments,AID Partners Capital,AME Cloud Ventures,Aaron Levie,Aboud Khaddam,...,Wayne Chang,Wellcome Trust,Wellington Management,Wellington Partners,Western Technology Investment,Wing Venture Capital,Workday Ventures,Y Combinator,Yuan Capital,Zander Lurie
0,Stripe,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Coursera,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,SoFi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,DoorDash,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Affirm,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Populate the matrix

In [4]:
for i in range(df.shape[0]): # By each company
    a_list = Investors[i].split('; ') # By each VC
    for item in a_list:
        df[item][i] = 1

# Check it out
df.tail()

Unnamed: 0,company,137 Ventures,1776 Ventures,83North,9Yards Capital,A-Grade Investments,AID Partners Capital,AME Cloud Ventures,Aaron Levie,Aboud Khaddam,...,Wayne Chang,Wellcome Trust,Wellington Management,Wellington Partners,Western Technology Investment,Wing Venture Capital,Workday Ventures,Y Combinator,Yuan Capital,Zander Lurie
11,Airbnb,1,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
12,Guild Education,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
13,Robinhood,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
14,Impossible Foods,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
15,UiPath,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [5]:
# Create totals rows & columns
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sum.html
df.loc['Total',:] = df.sum(axis = 0) # Create "Total" row
df.loc[:,'Total'] = df.sum(axis = 1) # Create "Total" column

# Replace nan
df.iloc[df.shape[0]-1,0] = 'Total'

# Clean up, reset index
df.reset_index(inplace = True, drop = True)

# Check it out
df.tail()

Unnamed: 0,company,137 Ventures,1776 Ventures,83North,9Yards Capital,A-Grade Investments,AID Partners Capital,AME Cloud Ventures,Aaron Levie,Aboud Khaddam,...,Wellcome Trust,Wellington Management,Wellington Partners,Western Technology Investment,Wing Venture Capital,Workday Ventures,Y Combinator,Yuan Capital,Zander Lurie,Total
12,Guild Education,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,23.0
13,Robinhood,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0
14,Impossible Foods,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0
15,UiPath,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0
16,Total,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,...,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,351.0


## Which VCs have the highest hit rate?

In [6]:
# Grab the "Total" row at the bottom, exclude first and last summed value
VC_totals = list(df.iloc[df.shape[0]-1,:])[1:-1]

# What VC firm has the highest number of hits?
VC_totals_max_index = VC_totals.index(max(VC_totals)) # Find index where the max value is

print("For CNBC Disrupter Companies, what VC firm has the highest number of hits?\n\n", Unique_Investor_List[VC_totals_max_index])

For CNBC Disrupter Companies, what VC firm has the highest number of hits?

 Kleiner Perkins


## Create filter of Top VCs

In [7]:
# Create new dataframe to filter by total counts
df_VC_top = pd.DataFrame(dict(zip(Unique_Investor_List, VC_totals)),index=[0])
df_VC_top = df_VC_top[df_VC_top > 2].dropna(axis=1)

# Print out of top VCs with more than 2 hits in the matrix
print(df_VC_top.T[0].astype(int))

# Save top VC list with counts
df_VC_top = df_VC_top.T
df_VC_top.reset_index(inplace=True)
df_VC_top.columns=["VC Firm", "Counts"]

Bracket Capital               3
CapitalG                      4
Coatue Management             4
DST Global                    3
Dragoneer Investment Group    5
G Squared                     3
GV                            4
General Catalyst              3
Geodesic Capital              3
ICONIQ Capital                5
Kleiner Perkins               6
Morgan Stanley                3
Redpoint                      3
SV Angel                      4
Sequoia Capital               6
T. Rowe Price                 3
Wellington Management         3
Name: 0, dtype: int32


## Create top VC matrix

In [8]:
# Create smaller dataframe, filtering by the top VCs
df_top = df[['company'] + df_VC_top['VC Firm'].to_list()]

# Add "Total" row (column not needed)
df_top.loc[:,'Total'] = df_top.sum(axis = 1)

# Check it out
df_top

Unnamed: 0,company,Bracket Capital,CapitalG,Coatue Management,DST Global,Dragoneer Investment Group,G Squared,GV,General Catalyst,Geodesic Capital,ICONIQ Capital,Kleiner Perkins,Morgan Stanley,Redpoint,SV Angel,Sequoia Capital,T. Rowe Price,Wellington Management,Total
0,Stripe,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,8.0
1,Coursera,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2,SoFi,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0
3,DoorDash,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,7.0
4,Affirm,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0
5,Kabbage,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
6,Chime,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
7,Dave,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
8,Ripple,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
9,Marqeta,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0


## Save results to csv

In [9]:
df_top.to_csv('cnbc_50_p1_investors.csv')