## This notebook explores startup data, investors, funding type and funding

In [2]:
# imports pandas, numpy and matplotlib modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import dateutil
import warnings
warnings.filterwarnings('ignore')

In [3]:
## load csv data
file_path = 'startup_funding.csv'
df = pd.read_csv(file_path)

## Explore the data

In [4]:
## total number of rows and columns
df.shape

(2372, 10)

In [5]:
df.head(5)

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,


In [None]:
## data types of the columns
df.dtypes

## Clean data

In [6]:
## drop the remarks column as its NAs
df.drop('Remarks', axis=1, inplace=True)

In [7]:
df.head(5)

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0


In [8]:
## rename column names
old_names = ['Date', 'StartupName', 'IndustryVertical', 'SubVertical','CityLocation','InvestorsName', 'InvestmentType','AmountInUSD' ] 
new_names = ['date', 'name', 'industry', 'category', 'city', 'investor', 'investtype','amount']
df.rename(columns=dict(zip(old_names, new_names)), inplace=True)

In [9]:
df.head(5)

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0


In [10]:
## drop rows with NAs'
df = df[pd.notnull(df['name'])]
df = df[pd.notnull(df['industry'])]
df = df[pd.notnull(df['category'])]
df = df[pd.notnull(df['city'])]
df = df[pd.notnull(df['investor'])]
df = df[pd.notnull(df['investtype'])]
df = df[pd.notnull(df['amount'])]

In [11]:
df.head(5)

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000
5,5,01/07/2017,Billion Loans,Consumer Internet,Peer to Peer Lending platform,Bangalore,Reliance Corporate Advisory Services Ltd,Seed Funding,1000000
6,6,03/07/2017,Ecolibriumenergy,Technology,Energy management solutions provider,Ahmedabad,"Infuse Ventures, JLL",Private Equity,2600000


In [12]:
## replace the commas in the amount and convert the data tyype to float
df["amount"] = df["amount"].apply(lambda x: float(str(x).replace(",","")))

In [29]:
df.head(5)

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount,yearmonth
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,201708
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,201708
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,201708
5,5,01/07/2017,Billion Loans,Consumer Internet,Peer to Peer Lending platform,Bangalore,Reliance Corporate Advisory Services Ltd,Seed Funding,1000000.0,201707
6,6,03/07/2017,Ecolibriumenergy,Technology,Energy management solutions provider,Ahmedabad,"Infuse Ventures, JLL",Private Equity,2600000.0,201707


## Answer questions based on the data

In [46]:
## top 5 startups by amount of funding
#df.groupby('name')['amount'].max().head(5)
df.sort_values(by=['amount', 'name'], ascending=[False, True]).head(5)

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount,yearmonth
294,294,21/03/2017,Flipkart,eCommerce,ECommerce Marketplace,Bangalore,"Microsoft, eBay, Tencent Holdings",Private Equity,1400000000.0,201703
158,158,18/05/2017,Paytm,ECommerce,Mobile Wallet & ECommerce platform,Bangalore,SoftBank Group,Private Equity,1400000000.0,201705
252,252,01/03/2017,Ola,Consumer Internet,App based cab aggregator,Bangalore,SoftBank Group Corp,Private Equity,330000000.0,201703
217,217,14/04/2017,Ola Cabs,Consumer Internet,Cab Aggregator App,Bangalore,SIMI Pacific Pte,Private Equity,260000000.0,201704
234,234,24/04/2017,Oyo Rooms,Consumer Internet,Branded Budget Hotels Aggregator,Gurgaon,"SoftBank Vision Fund, Lightspeed Venture Partn...",Private Equity,250000000.0,201704


In [47]:
## bottom 5 startups by amount of funding
df.sort_values(by=['amount', 'name'], ascending=[True, True]).head(5)

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount,yearmonth
439,439,30/01/2017,Maptags,Consumer Internet,Smart Online address tags,Bangalore,The Ten Minute Million,Seed Funding,18000.0,201701
437,437,30/01/2017,Cloudrino,Technology,Cloud based Virtual Servers,New Delhi,The Ten Minute Million,Seed Funding,22500.0,201701
1237,1237,02/2/2016,Gamooz,Technology,Augmented Reality Solutions,Gurgaon,Multiple investors through Ten Minute Million ...,Seed Funding,22500.0,201602
1238,1238,02/2/2016,SoundSurround,Technology,Audio Technology Solutions,Mumbai,Multiple investors through Ten Minute Million ...,Seed Funding,22500.0,201602
1235,1235,02/2/2016,Strike,Consumer Internet,Mobile Email productivity tool,Bangalore,Multiple investors through Ten Minute Million ...,Seed Funding,22500.0,201602


In [14]:
## startup with the least funding
df.loc[df['amount'].idxmin()]

SNo                                 439
date                         30/01/2017
name                            Maptags
industry              Consumer Internet
category      Smart Online address tags
city                          Bangalore
investor         The Ten Minute Million
investtype                 Seed Funding
amount                            18000
Name: 439, dtype: object

In [15]:
## startup with the most funding
df.loc[df['amount'].idxmax()]

SNo                                          158
date                                  18/05/2017
name                                       Paytm
industry                               ECommerce
category      Mobile Wallet & ECommerce platform
city                                   Bangalore
investor                          SoftBank Group
investtype                        Private Equity
amount                                   1.4e+09
Name: 158, dtype: object

In [48]:
## Unique industries funded
df.industry.unique()

array(['Technology', 'Consumer Internet', 'eCommerce', 'Healthcare',
       'Logistics', 'Food & Beverage', 'ECommerce', 'Finance', 'Education',
       'Food & Beverages', 'ecommerce', 'Others', 'healthcare', 'FMCG',
       'Reality', 'Real Estate', 'Ecommerce'], dtype=object)

In [49]:
## Unique categories within industries funded
df.category.unique()

array(['Predictive Care Platform', 'DIY Ecommerce platform',
       'healthcare service aggregator', 'Peer to Peer Lending platform',
       'Energy management solutions provider',
       'Online marketplace for automobiles',
       'online marketplace for food and grocery',
       'B2B marketplace for Industrial products',
       'Hyperlocal home services provider', 'Digital Media Video platform',
       'Workshop Management Software Platform',
       'Salon & Spa Aggregation & Discovery platform',
       'Video Content Discovery Platform', 'Digital payments solutions',
       'Construction site operations and analytics platform',
       'Designer consumer products Marketplace',
       'Sales Solutions for Fashion Brands',
       'Enterprise Banking Solutions', 'Consumer Leasing Platform',
       'Gamified Learning App', 'Online payments platform',
       'Online Marketplace for Construction Material',
       'Mobile-first Enterprise communication platform',
       'Mobile Learning Ap

In [22]:
#Startups funded by industry
df['industry'].value_counts()

Consumer Internet    459
Technology           189
eCommerce            112
ECommerce             32
Logistics             16
Education             15
Healthcare            14
Food & Beverage       12
Finance                7
Others                 4
Ecommerce              2
FMCG                   2
Food & Beverages       1
healthcare             1
Real Estate            1
ecommerce              1
Reality                1
Name: industry, dtype: int64

In [23]:
#Startups funded by location
df['city'].value_counts()

Bangalore             267
Mumbai                181
New Delhi             130
Gurgaon                97
Pune                   37
Hyderabad              35
Noida                  33
Chennai                32
Ahmedabad              17
Jaipur                  5
Kolkata                 5
Vadodara                3
Indore                  3
Chandigarh              2
Singapore               2
Panaji                  1
Goa                     1
Trivandrum              1
Mumbai / UK             1
Kerala                  1
Bangalore/ Bangkok      1
Bangalore / SFO         1
Pune / Dubai            1
Gwalior                 1
Missourie               1
Pune/Seattle            1
Delhi                   1
Coimbatore              1
New Delhi / US          1
Varanasi                1
Lucknow                 1
Kanpur                  1
SFO / Bangalore         1
Jodhpur                 1
Bhopal                  1
Name: city, dtype: int64

In [24]:
## Startups funded by investment type
df['investtype'].value_counts()

Private Equity    476
Seed Funding      392
Debt Funding        1
Name: investtype, dtype: int64

In [25]:
## Startups by investor
df['investor'].value_counts()

Undisclosed Investors                                                                             17
Undisclosed investors                                                                             16
undisclosed investors                                                                             11
Kalaari Capital                                                                                    9
Undisclosed investor                                                                               7
Info Edge (India) Ltd                                                                              7
Indian Angel Network                                                                               7
Brand Capital                                                                                      7
Trifecta Capital                                                                                   6
Undisclosed Investor                                                                       

In [26]:
df.loc[df['city'] == 'Panaji']

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount
372,372,03/01/2017,TempoGo,Technology,IoT & SAAS Solutions for Transportation Industry,Panaji,K2 Capital,Seed Funding,825000.0


In [27]:
df.loc[df['investor'] == 'Brand Capital']

Unnamed: 0,SNo,date,name,industry,category,city,investor,investtype,amount
307,307,28/03/2017,DoneThing,Consumer Internet,On-demand Personal Assistant service,Gurgaon,Brand Capital,Private Equity,300000.0
433,433,26/01/2017,AirMed Labs,Consumer Internet,Health Tests Booking platform & ePharmacy,Ahmedabad,Brand Capital,Private Equity,1400000.0
749,749,03/08/2016,ExtraCarbon,Consumer Internet,Waste Recycle Management platform,Gurgaon,Brand Capital,Seed Funding,225000.0
768,768,12/08/2016,Faircent.com,Consumer Internet,peer-to-peer lending platform,Gurgaon,Brand Capital,Private Equity,1500000.0
777,777,16/08/2016,SaleBhai,eCommerce,"Sweets, Dry Fruits & Handicrafts etailer",Ahmedabad,Brand Capital,Private Equity,1500000.0
786,786,19/08/2016,Quikr,Consumer Internet,Classified Listings platform,Mumbai,Brand Capital,Private Equity,20000000.0
985,985,30/06/2016,Meru Cabs,Consumer Internet,Online/ Mobile Tax Cab booking,Mumbai,Brand Capital,Private Equity,25000000.0


In [28]:
df["yearmonth"] = (pd.to_datetime(df['date'],format='%d/%m/%Y').dt.year*100)+(pd.to_datetime(df['date'],format='%d/%m/%Y').dt.month)

df['yearmonth'].value_counts()

201601    57
201608    56
201602    56
201606    54
201603    52
201605    51
201610    50
201703    48
201704    47
201706    47
201611    45
201609    44
201701    41
201612    41
201607    40
201705    39
201604    38
201702    35
201707    25
201708     3
Name: yearmonth, dtype: int64

In [90]:
df.head(5)

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,yearmonth
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,201708
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,201708
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,201708
5,5,01/07/2017,Billion Loans,Consumer Internet,Peer to Peer Lending platform,Bangalore,Reliance Corporate Advisory Services Ltd,Seed Funding,1000000.0,201707
6,6,03/07/2017,Ecolibriumenergy,Technology,Energy management solutions provider,Ahmedabad,"Infuse Ventures, JLL",Private Equity,2600000.0,201707
