In [1]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
pd.options.display.float_format = '{:.2f}'.format # Setting format options for displaying float values

## Checkpoint 1: Data Cleaning 1

In [3]:
# Read the companies.txt file
companies = pd.read_csv('companies.txt',sep='\t',encoding='ISO-8859-1')
companies

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/Organization/-Fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
1,/Organization/-Qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
2,/Organization/-The-One-Of-Them-Inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
3,/Organization/0-6-Com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
4,/Organization/004-Technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010
5,/Organization/01Games-Technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,
6,/Organization/0Ndine-Biomedical-Inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997
7,/Organization/0Xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011
8,/Organization/1,One Inc.,http://whatis1.com,Mobile,operating,USA,CA,SF Bay Area,San Francisco,01-08-2011
9,/Organization/1-2-3-Listo,"1,2,3 Listo",http://www.123listo.com,E-Commerce,operating,CHL,12,Santiago,Las Condes,01-01-2012


In [4]:
# Check for description
companies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66368 entries, 0 to 66367
Data columns (total 10 columns):
permalink        66368 non-null object
name             66367 non-null object
homepage_url     61310 non-null object
category_list    63220 non-null object
status           66368 non-null object
country_code     59410 non-null object
state_code       57821 non-null object
region           58338 non-null object
city             58340 non-null object
founded_at       51147 non-null object
dtypes: object(10)
memory usage: 5.1+ MB


In [5]:
# Removing special characters from the dataframe 
companies = companies.applymap(lambda x: x.encode('utf-8').decode('ascii', 'ignore') if not isinstance(x, float) else x)
companies['permalink'] = companies['permalink'].str.lower()
companies

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
1,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
2,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
3,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
4,/organization/004-technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010
5,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,
6,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997
7,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011
8,/organization/1,One Inc.,http://whatis1.com,Mobile,operating,USA,CA,SF Bay Area,San Francisco,01-08-2011
9,/organization/1-2-3-listo,"1,2,3 Listo",http://www.123listo.com,E-Commerce,operating,CHL,12,Santiago,Las Condes,01-01-2012


In [6]:
# Table:1.1 
# 2. How many unique companies are present in companies?
companies['permalink'].nunique()

66368

In [7]:
# Checking for NaN values (column-wise)
companies.isnull().sum()

permalink            0
name                 1
homepage_url      5058
category_list     3148
status               0
country_code      6958
state_code        8547
region            8030
city              8028
founded_at       15221
dtype: int64

In [8]:
# Table:1.1 
# 3. In the companies data frame, which column can be used as the unique key for each company? 

num_of_rows = companies.shape[0]
for column in companies.columns:
    unique_rows = companies[column].apply(lambda x: x.lower() if not isinstance(x, float) else x).nunique()
    if num_of_rows == unique_rows:
        print("Unique Column : {}".format(column))

companies.nunique() # We can also check the count for each column which is equal to the number of rows i.e 66368

Unique Column : permalink


permalink        66368
name             66102
homepage_url     61191
category_list    27296
status               4
country_code       137
state_code         311
region            1092
city              5111
founded_at        3978
dtype: int64

In [9]:
# Read the rounds2.csv file
rounds2 = pd.read_csv('rounds2.csv',encoding='ISO-8859-1')
rounds2

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.00
1,/ORGANIZATION/-QOUNTER,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.00
3,/ORGANIZATION/-THE-ONE-OF-THEM-INC-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.00
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.00
5,/ORGANIZATION/004-TECHNOLOGIES,/funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830,venture,,24-07-2014,
6,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,41250.00
7,/ORGANIZATION/0NDINE-BIOMEDICAL-INC,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,43360.00
8,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,719491.00
9,/ORGANIZATION/0XDATA,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3000000.00


In [10]:
rounds2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114949 entries, 0 to 114948
Data columns (total 6 columns):
company_permalink          114949 non-null object
funding_round_permalink    114949 non-null object
funding_round_type         114949 non-null object
funding_round_code         31140 non-null object
funded_at                  114949 non-null object
raised_amount_usd          94959 non-null float64
dtypes: float64(1), object(5)
memory usage: 5.3+ MB


In [11]:
# Removing special characters from the dataframe 
rounds2 = rounds2.applymap(lambda x: x.encode('utf-8').decode('ascii', 'ignore') if not isinstance(x, float) else x)
rounds2['company_permalink'] = rounds2['company_permalink'].str.lower()
rounds2

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.00
1,/organization/-qounter,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.00
3,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.00
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.00
5,/organization/004-technologies,/funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830,venture,,24-07-2014,
6,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,41250.00
7,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,43360.00
8,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,719491.00
9,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3000000.00


In [12]:
# summing up the missing values (column-wise)
rounds2.isnull().sum()

company_permalink              0
funding_round_permalink        0
funding_round_type             0
funding_round_code         83809
funded_at                      0
raised_amount_usd          19990
dtype: int64

In [13]:
# Table:1.1 
# 1. How many unique companies are present in rounds2?
rounds2['company_permalink'].nunique()

66368

In [15]:
# Table:1.1 
# 4. Are there any companies in the rounds2 file which are not present in companies?
not(rounds2.loc[~rounds2['company_permalink'].isin(companies['permalink']), :].empty) # No such companies present

False

In [16]:
# Before merge
print(companies.shape)
print(rounds2.shape)

(66368, 10)
(114949, 6)


In [17]:
master_frame = pd.merge(companies,rounds2,how='inner',left_on='permalink', right_on='company_permalink')
master_frame

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.00
1,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,/organization/-qounter,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.00
3,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.00
4,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.00
5,/organization/004-technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010,/organization/004-technologies,/funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830,venture,,24-07-2014,
6,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,41250.00
7,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,43360.00
8,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,719491.00
9,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3000000.00


In [18]:
# Table:1.1 
# 5. How many observations are present in master_frame ?
master_frame.info() # 114949 rows and 16 columns

<class 'pandas.core.frame.DataFrame'>
Int64Index: 114949 entries, 0 to 114948
Data columns (total 16 columns):
permalink                  114949 non-null object
name                       114948 non-null object
homepage_url               108815 non-null object
category_list              111539 non-null object
status                     114949 non-null object
country_code               106271 non-null object
state_code                 104003 non-null object
region                     104782 non-null object
city                       104785 non-null object
founded_at                 94428 non-null object
company_permalink          114949 non-null object
funding_round_permalink    114949 non-null object
funding_round_type         114949 non-null object
funding_round_code         31140 non-null object
funded_at                  114949 non-null object
raised_amount_usd          94959 non-null float64
dtypes: float64(1), object(15)
memory usage: 14.9+ MB


In [19]:
master_frame.nunique()

permalink                   66368
name                        66102
homepage_url                61191
category_list               27296
status                          4
country_code                  137
state_code                    311
region                       1092
city                         5111
founded_at                   3978
company_permalink           66368
funding_round_permalink    114949
funding_round_type             14
funding_round_code              8
funded_at                    5033
raised_amount_usd           22095
dtype: int64

In [161]:
# summing up the missing values (column-wise)
master_frame.isnull().sum()

permalink                      0
name                           1
homepage_url                6134
category_list               3410
status                         0
country_code                8678
state_code                 10946
region                     10167
city                       10164
founded_at                 20521
company_permalink              0
funding_round_permalink        0
funding_round_type             0
funding_round_code         83809
funded_at                      0
raised_amount_usd          19990
dtype: int64

In [20]:
# check how many rows have all missing values
master_frame.isnull().all(axis=1).sum()

0

In [21]:
# summing up the missing values (column-wise)
round(100*(master_frame.isnull().sum()/len(master_frame.index)), 2)

permalink                  0.00
name                       0.00
homepage_url               5.34
category_list              2.97
status                     0.00
country_code               7.55
state_code                 9.52
region                     8.84
city                       8.84
founded_at                17.85
company_permalink          0.00
funding_round_permalink    0.00
funding_round_type         0.00
funding_round_code        72.91
funded_at                  0.00
raised_amount_usd         17.39
dtype: float64

In [22]:
# removing the columns having high percentage of missing values
master_frame = master_frame.drop('funding_round_code', axis=1)
master_frame = master_frame.drop('founded_at', axis=1)
round(100*(master_frame.isnull().sum()/len(master_frame.index)), 2)

permalink                  0.00
name                       0.00
homepage_url               5.34
category_list              2.97
status                     0.00
country_code               7.55
state_code                 9.52
region                     8.84
city                       8.84
company_permalink          0.00
funding_round_permalink    0.00
funding_round_type         0.00
funded_at                  0.00
raised_amount_usd         17.39
dtype: float64

In [23]:
# removing NaN raised_amount_usd rows
master_frame = master_frame[~np.isnan(master_frame['raised_amount_usd'])]
round(100*(master_frame.isnull().sum()/len(master_frame.index)), 2)

permalink                 0.00
name                      0.00
homepage_url              4.56
category_list             1.10
status                    0.00
country_code              6.16
state_code                8.01
region                    7.42
city                      7.42
company_permalink         0.00
funding_round_permalink   0.00
funding_round_type        0.00
funded_at                 0.00
raised_amount_usd         0.00
dtype: float64

In [24]:
master_frame.shape

(94959, 14)

## Checkpoint 2: Funding Type Analysis

In [25]:
#rounds_by_amt groupby object created by grouping the dataset by funding_round_type
rounds_by_amt = master_frame.groupby('funding_round_type')

#fund_avg dataframe created after finding the average of raised_amount_usd of each group by element
fund_avg = pd.DataFrame(rounds_by_amt['raised_amount_usd'].mean())
fund_avg.reset_index(inplace=True)

fund_avg

Unnamed: 0,funding_round_type,raised_amount_usd
0,angel,958694.47
1,convertible_note,1453438.54
2,debt_financing,17043526.02
3,equity_crowdfunding,538368.21
4,grant,4300576.34
5,non_equity_assistance,411203.05
6,post_ipo_debt,168704571.82
7,post_ipo_equity,82182493.87
8,private_equity,73308593.03
9,product_crowdfunding,1363131.07


In [26]:
# Finding out the type of investment suited for Spark funds
fund_avg.loc[(fund_avg['raised_amount_usd'] >= 5000000) & (fund_avg['raised_amount_usd'] <= 15000000),:]

Unnamed: 0,funding_round_type,raised_amount_usd
13,venture,11748949.13


In [27]:
# Table 2.1
# 1.  Average funding amount of venture type
print(fund_avg[fund_avg['funding_round_type'] == 'venture']['raised_amount_usd'].iloc[0])

# 2. Average funding amount of angel type
print(fund_avg[fund_avg['funding_round_type'] == 'angel']['raised_amount_usd'].iloc[0])

# 3. Average funding amount of seed type
print(fund_avg[fund_avg['funding_round_type'] == 'seed']['raised_amount_usd'].iloc[0])

# 4. Average funding amount of private equity type
print(fund_avg[fund_avg['funding_round_type'] == 'private_equity']['raised_amount_usd'].iloc[0])

# 5. Considering that Spark Funds wants to invest between 5 to 15 million USD per investment round, which investment type is the most suitable for it?
print(fund_avg[(fund_avg['raised_amount_usd'] >= 5000000) & (fund_avg['raised_amount_usd'] <= 15000000)]['funding_round_type'].iloc[0])

11748949.1295
958694.469753
719817.996907
73308593.0294
venture


## Checkpoint 3: Country Analysis

In [28]:
master_frame[master_frame['funding_round_type']=='venture']

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.00
3,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,30-01-2014,3406878.00
4,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,19-03-2008,2000000.00
8,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,21-12-2009,719491.00
10,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.00
11,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.00
12,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,19-07-2014,8900000.00
22,/organization/1-mainstream,1 Mainstream,http://www.1mainstream.com,Apps|Cable|Distribution|Software,acquired,USA,CA,SF Bay Area,Cupertino,/organization/1-mainstream,/funding-round/b952cbaf401f310927430c97b68162ea,venture,17-03-2015,5000000.00
28,/organization/10-minutes-with,10 Minutes With,http://10minuteswith.com,Education,operating,GBR,H9,London,London,/organization/10-minutes-with,/funding-round/0faccbbcc5818dc5326469f13f5a8ac8,venture,09-10-2014,4000000.00
34,/organization/1000memories,1000memories,http://1000memories.com,Curated Web,acquired,USA,CA,SF Bay Area,San Francisco,/organization/1000memories,/funding-round/502bd0e50c27616995e4bdad24605ef8,venture,16-02-2011,2520000.00


In [29]:
# Find countries that received the highest total funding 
rounds_by_countries = master_frame.groupby('country_code')
top_countries = pd.DataFrame(rounds_by_countries['raised_amount_usd'].sum().round()).sort_values('raised_amount_usd',ascending=False)
top_countries.reset_index(inplace=True)
top9 = top_countries.head(9)
top9

Unnamed: 0,country_code,raised_amount_usd
0,USA,669482123821.0
1,CHN,75703565796.0
2,GBR,32767048060.0
3,IND,27686336560.0
4,CAN,18424675109.0
5,RUS,11279120120.0
6,DEU,10017763740.0
7,ISR,9713884650.0
8,FRA,9059770757.0


##### Table 3.1 <br>
From the pdf file shared for official language, we came to the following conclusion :<br>
Top English-speaking country : USA<br>
Second  English-speaking country : GBR<br>
Third English-speaking country : IND<br>

## Checkpoint 4: Sector Analysis 1

In [31]:
#extracting primary sector
master_frame['primary_sector']=master_frame['category_list'].apply(lambda x:x.split('|')[0] if not isinstance(x, float) else x)
master_frame

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,primary_sector
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.00,Media
2,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,01-03-2014,700000.00,Application Platforms
3,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,30-01-2014,3406878.00,Apps
4,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,19-03-2008,2000000.00,Curated Web
6,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,01-07-2014,41250.00,Games
7,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,11-09-2009,43360.00,Biotechnology
8,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,21-12-2009,719491.00,Biotechnology
9,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,22-05-2013,3000000.00,Analytics
10,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.00,Analytics
11,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.00,Analytics


In [32]:
master_frame.isnull().sum()

permalink                     0
name                          1
homepage_url               4332
category_list              1044
status                        0
country_code               5851
state_code                 7604
region                     7048
city                       7045
company_permalink             0
funding_round_permalink       0
funding_round_type            0
funded_at                     0
raised_amount_usd             0
primary_sector             1044
dtype: int64

In [41]:
#reading mapping.csv
mapping = pd.read_csv('mapping.csv')
mapping

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
0,,0,1,0,0,0,0,0,0,0
1,3D,0,0,0,0,0,1,0,0,0
2,3D Printing,0,0,0,0,0,1,0,0,0
3,3D Technology,0,0,0,0,0,1,0,0,0
4,Accounting,0,0,0,0,0,0,0,0,1
5,Active Lifestyle,0,0,0,0,1,0,0,0,0
6,Ad Targeting,0,0,0,0,0,0,0,0,1
7,Advanced Materials,0,0,0,0,0,1,0,0,0
8,Adventure Travel,1,0,0,0,0,0,0,0,0
9,Advertising,0,0,0,0,0,0,0,0,1


In [42]:
mapping.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 688 entries, 0 to 687
Data columns (total 10 columns):
category_list                              687 non-null object
Automotive & Sports                        688 non-null int64
Blanks                                     688 non-null int64
Cleantech / Semiconductors                 688 non-null int64
Entertainment                              688 non-null int64
Health                                     688 non-null int64
Manufacturing                              688 non-null int64
News, Search and Messaging                 688 non-null int64
Others                                     688 non-null int64
Social, Finance, Analytics, Advertising    688 non-null int64
dtypes: int64(9), object(1)
memory usage: 53.8+ KB


In [43]:
mapping['category_list'] = mapping['category_list'].str.replace('0','na')
mapping

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
0,,0,1,0,0,0,0,0,0,0
1,3D,0,0,0,0,0,1,0,0,0
2,3D Printing,0,0,0,0,0,1,0,0,0
3,3D Technology,0,0,0,0,0,1,0,0,0
4,Accounting,0,0,0,0,0,0,0,0,1
5,Active Lifestyle,0,0,0,0,1,0,0,0,0
6,Ad Targeting,0,0,0,0,0,0,0,0,1
7,Advanced Materials,0,0,0,0,0,1,0,0,0
8,Adventure Travel,1,0,0,0,0,0,0,0,0
9,Advertising,0,0,0,0,0,0,0,0,1


In [44]:
mapping.isnull().sum()

category_list                              1
Automotive & Sports                        0
Blanks                                     0
Cleantech / Semiconductors                 0
Entertainment                              0
Health                                     0
Manufacturing                              0
News, Search and Messaging                 0
Others                                     0
Social, Finance, Analytics, Advertising    0
dtype: int64

In [45]:
# Dropped the row containing category_list as NaN
mapping = mapping.dropna(subset=['category_list'])
mapping.isnull().sum()

category_list                              0
Automotive & Sports                        0
Blanks                                     0
Cleantech / Semiconductors                 0
Entertainment                              0
Health                                     0
Manufacturing                              0
News, Search and Messaging                 0
Others                                     0
Social, Finance, Analytics, Advertising    0
dtype: int64

In [46]:
mapping = mapping.set_index('category_list')
mapping['main_sector'] = mapping.idxmax(axis=1)
mapping

Unnamed: 0_level_0,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising",main_sector
category_list,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3D,0,0,0,0,0,1,0,0,0,Manufacturing
3D Printing,0,0,0,0,0,1,0,0,0,Manufacturing
3D Technology,0,0,0,0,0,1,0,0,0,Manufacturing
Accounting,0,0,0,0,0,0,0,0,1,"Social, Finance, Analytics, Advertising"
Active Lifestyle,0,0,0,0,1,0,0,0,0,Health
Ad Targeting,0,0,0,0,0,0,0,0,1,"Social, Finance, Analytics, Advertising"
Advanced Materials,0,0,0,0,0,1,0,0,0,Manufacturing
Adventure Travel,1,0,0,0,0,0,0,0,0,Automotive & Sports
Advertising,0,0,0,0,0,0,0,0,1,"Social, Finance, Analytics, Advertising"
Advertising Exchanges,0,0,0,0,0,0,0,0,1,"Social, Finance, Analytics, Advertising"


In [47]:
mapping = mapping.reset_index()
map_df = mapping[['category_list','main_sector']]
map_df.columns = ['primary_sector','main_sector']
map_df['primary_sector'] = map_df['primary_sector'].str.lower()
map_df

Unnamed: 0,primary_sector,main_sector
0,3d,Manufacturing
1,3d printing,Manufacturing
2,3d technology,Manufacturing
3,accounting,"Social, Finance, Analytics, Advertising"
4,active lifestyle,Health
5,ad targeting,"Social, Finance, Analytics, Advertising"
6,advanced materials,Manufacturing
7,adventure travel,Automotive & Sports
8,advertising,"Social, Finance, Analytics, Advertising"
9,advertising exchanges,"Social, Finance, Analytics, Advertising"


In [48]:
map_df.shape

(687, 2)

In [49]:
master_frame.shape

(94959, 15)

In [50]:
master_frame['primary_sector'] = master_frame['primary_sector'].str.lower()
merged_master_frame = pd.merge(master_frame,map_df,how='left',left_on='primary_sector', right_on='primary_sector')
merged_master_frame

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,primary_sector,main_sector
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.00,media,Entertainment
1,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,01-03-2014,700000.00,application platforms,"News, Search and Messaging"
2,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,30-01-2014,3406878.00,apps,"News, Search and Messaging"
3,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,19-03-2008,2000000.00,curated web,"News, Search and Messaging"
4,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,01-07-2014,41250.00,games,Entertainment
5,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,11-09-2009,43360.00,biotechnology,Cleantech / Semiconductors
6,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,21-12-2009,719491.00,biotechnology,Cleantech / Semiconductors
7,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,22-05-2013,3000000.00,analytics,"Social, Finance, Analytics, Advertising"
8,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,09-11-2015,20000000.00,analytics,"Social, Finance, Analytics, Advertising"
9,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,03-01-2013,1700000.00,analytics,"Social, Finance, Analytics, Advertising"


In [51]:
print(merged_master_frame.shape)
merged_master_frame.isnull().sum()

(94959, 16)


permalink                     0
name                          1
homepage_url               4332
category_list              1044
status                        0
country_code               5851
state_code                 7604
region                     7048
city                       7045
company_permalink             0
funding_round_permalink       0
funding_round_type            0
funded_at                     0
raised_amount_usd             0
primary_sector             1044
main_sector                1131
dtype: int64

In [52]:
# Find unique records which came as NaN but for them corresponding primary sector value is present
merged_master_frame[merged_master_frame['main_sector'].isnull() & ~merged_master_frame['primary_sector'].isnull()]['primary_sector'].unique().tolist()

['self development',
 'cause marketing',
 'real estate investors',
 'english-speaking',
 'toys',
 'generation y-z',
 'enterprise hardware',
 'social media advertising',
 'darknet',
 'natural gas uses',
 'internet technology',
 'nightlife',
 'adaptive equipment',
 'enterprise 2.0',
 'internet tv',
 'skill gaming',
 'racing',
 'specialty retail',
 'swimming',
 'registrars',
 'golf equipment',
 'biotechnology and semiconductor',
 'vacation rentals',
 'google glass',
 'rapidly expanding',
 'group email',
 'kinect',
 'spas',
 'product search',
 'sex industry',
 'psychology',
 'testing',
 'greentech',
 'retirement',
 'subscription businesses',
 'lingerie',
 'mobile emergency&health',
 'sponsorship',
 'deep information technology']

## Checkpoint 5: Sector Analysis 2

In [53]:
D1 = merged_master_frame[(merged_master_frame['country_code'] == 'USA') & (merged_master_frame['funding_round_type'] == 'venture') \
                         & (merged_master_frame['raised_amount_usd'] >= 5000000) & (merged_master_frame['raised_amount_usd'] <=15000000)]
D1['total_no_of_investment'] = D1.groupby(['main_sector'])['raised_amount_usd'].transform('count')
D1['total_amount_invested'] = D1.groupby(['main_sector'])['raised_amount_usd'].transform('sum')
D1

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,primary_sector,main_sector,total_no_of_investment,total_amount_invested
10,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,/organization/0xdata,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,19-07-2014,8900000.00,analytics,"Social, Finance, Analytics, Advertising",2714.00,23807376964.00
16,/organization/1-mainstream,1 Mainstream,http://www.1mainstream.com,Apps|Cable|Distribution|Software,acquired,USA,CA,SF Bay Area,Cupertino,/organization/1-mainstream,/funding-round/b952cbaf401f310927430c97b68162ea,venture,17-03-2015,5000000.00,apps,"News, Search and Messaging",1583.00,13971567428.00
78,/organization/128-technology,128 Technology,http://www.128technology.com/,Service Providers|Technology,operating,USA,MA,Boston,Burlington,/organization/128-technology,/funding-round/fb6216a30cb566ede89e0bee0623a634,venture,16-12-2014,11999347.00,service providers,Others,2950.00,26321007002.00
84,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,/organization/1366-technologies,/funding-round/424129ce1235cfab2655ee81305f7c2b,venture,15-10-2013,15000000.00,manufacturing,Manufacturing,799.00,7258553378.00
85,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,/organization/1366-technologies,/funding-round/6d3f3797371956ece035b8478c1441b2,venture,09-04-2015,5000000.00,manufacturing,Manufacturing,799.00,7258553378.00
86,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,/organization/1366-technologies,/funding-round/786f61aa9866f4471151285f5c56be36,venture,03-02-2010,5150000.00,manufacturing,Manufacturing,799.00,7258553378.00
87,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,/organization/1366-technologies,/funding-round/82ace97530965cd2be8f262836b43ff5,venture,27-03-2008,12400000.00,manufacturing,Manufacturing,799.00,7258553378.00
88,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,/organization/1366-technologies,/funding-round/ab99fc5a53717b1b53fd6aa5687c5fa9,venture,16-12-2010,6000000.00,manufacturing,Manufacturing,799.00,7258553378.00
103,/organization/170-systems,170 Systems,http://www.170systems.com,Software,acquired,USA,MA,Boston,Bedford,/organization/170-systems,/funding-round/b84bb882ca873f5fb96535671981196d,venture,16-04-2002,14000000.00,software,Others,2950.00,26321007002.00
109,/organization/17zuoye,17zuoye,http://www.17zuoye.com/,Education|Language Learning,operating,USA,VA,VA - Other,Shanghai,/organization/17zuoye,/funding-round/69690484f51e15bc27ff52bfe472cd96,venture,01-01-2011,5000000.00,education,Others,2950.00,26321007002.00


In [54]:
# Fiding out the Total number of investments(count) and Total amount of investment (USD) for each sector in descending order
D1_total_investment_sector_wise = D1[['main_sector','total_no_of_investment','total_amount_invested']].drop_duplicates().dropna().sort_values(['total_no_of_investment','total_amount_invested'],ascending=[False,False])
D1_total_investment_sector_wise

Unnamed: 0,main_sector,total_no_of_investment,total_amount_invested
78,Others,2950.0,26321007002.0
10,"Social, Finance, Analytics, Advertising",2714.0,23807376964.0
118,Cleantech / Semiconductors,2350.0,21633430822.0
16,"News, Search and Messaging",1583.0,13971567428.0
698,Health,909.0,8211859357.0
84,Manufacturing,799.0,7258553378.0
651,Entertainment,591.0,5099197982.0
349,Automotive & Sports,167.0,1454104361.0


In [55]:
# Company-wise investment for top sector('Others')
D1_total_investment_company_wise_top_sector = D1[D1.main_sector == D1_total_investment_sector_wise['main_sector'].iloc[0]].groupby('name')['raised_amount_usd'].sum().reset_index()
D1_total_investment_company_wise_top_sector = D1_total_investment_company_wise_top_sector.sort_values('raised_amount_usd',ascending=False)
D1_total_investment_company_wise_top_sector.head()

Unnamed: 0,name,raised_amount_usd
1877,Virtustream,64300000.0
313,Capella Photonics,54968051.0
57,AirTight Networks,54201907.0
1988,deCarta,52100000.0
221,Black Duck Software,51000000.0


In [56]:
# Company-wise investment for second-best sector('Social, Finance, Analytics, Advertising')
D1_total_investment_company_wise_second_best_sector = D1[D1.main_sector == D1_total_investment_sector_wise['main_sector'].iloc[1]].groupby('name')['raised_amount_usd'].sum().reset_index()
D1_total_investment_company_wise_second_best_sector = D1_total_investment_company_wise_second_best_sector.sort_values('raised_amount_usd',ascending=False)
D1_total_investment_company_wise_second_best_sector.head()

Unnamed: 0,name,raised_amount_usd
1321,SST Inc. (Formerly ShotSpotter),67933006.0
452,Demandbase,63000000.0
754,Intacct,61800000.0
1028,NetBase Solutions,60600000.0
890,Lotame,59700000.0


In [57]:
### Table 5.1
# 1. Total number of investments (count)
print(D1_total_investment_sector_wise['total_no_of_investment'].sum())

# 2. Total amount of investment (USD)
print(D1_total_investment_sector_wise['total_amount_invested'].sum())

# 3. Top sector (based on count of investments)
print(D1_total_investment_sector_wise['main_sector'].iloc[0])

# 4. Second-best sector (based on count of investments)
print(D1_total_investment_sector_wise['main_sector'].iloc[1])

# 5. Third-best sector (based on count of investments)
print(D1_total_investment_sector_wise['main_sector'].iloc[2])

# 6. Number of investments in the top sector (refer to point 3)
print(D1_total_investment_sector_wise.iloc[0,1])

# 7. Number of investments in the second-best sector (refer to point 4)
print(D1_total_investment_sector_wise.iloc[1,1])

# 8. Number of investments in the third-best sector (refer to point 5)
print(D1_total_investment_sector_wise.iloc[2,1])

# 9. For the top sector count-wise (point 3), which company received the highest investment?
print(D1_total_investment_company_wise_top_sector['name'].iloc[0])

# 10.For the second-best sector count-wise (point 4), which company received the highest investment?
print(D1_total_investment_company_wise_second_best_sector['name'].iloc[0])


12063.0
107757097294.0
Others
Social, Finance, Analytics, Advertising
Cleantech / Semiconductors
2950.0
2714.0
2350.0
Virtustream
SST Inc. (Formerly ShotSpotter)


In [58]:
D2 = merged_master_frame[(merged_master_frame['country_code'] == 'GBR') & (merged_master_frame['funding_round_type'] == 'venture') \
                         & (merged_master_frame['raised_amount_usd'] >= 5000000) & (merged_master_frame['raised_amount_usd'] <=15000000)]
D2['total_no_of_investment'] = D2.groupby(['main_sector'])['raised_amount_usd'].transform('count')
D2['total_amount_invested'] = D2.groupby(['main_sector'])['raised_amount_usd'].transform('sum')
D2

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,primary_sector,main_sector,total_no_of_investment,total_amount_invested
309,/organization/365scores,365Scores,http://biz.365scores.com,Android|Apps|iPhone|Mobile|Sports,operating,GBR,H9,London,London,/organization/365scores,/funding-round/48212f931f542fdef78810bc87aef086,venture,29-09-2014,5500000.00,android,"Social, Finance, Analytics, Advertising",133.00,1089404014.00
671,/organization/7digital,7digital,http://about.7digital.com,Content Creators|Content Delivery|Licensing|Mu...,acquired,GBR,H9,London,London,/organization/7digital,/funding-round/b5ad7ed7baddd3974bd51403f17dd88f,venture,01-01-2008,8468328.00,content creators,Entertainment,56.00,482784687.00
672,/organization/7digital,7digital,http://about.7digital.com,Content Creators|Content Delivery|Licensing|Mu...,acquired,GBR,H9,London,London,/organization/7digital,/funding-round/eafacfcceb1fbc4fd605f641b603313e,venture,19-10-2012,10000000.00,content creators,Entertainment,56.00,482784687.00
730,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,/organization/90min,/funding-round/21a2cbf6f2fb2a1c2a61e04bf930dfe6,venture,06-10-2015,15000000.00,media,Entertainment,56.00,482784687.00
731,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,/organization/90min,/funding-round/bd626ed022f5c66574b1afe234f3c90d,venture,07-05-2013,5800000.00,media,Entertainment,56.00,482784687.00
907,/organization/abcodia,Abcodia,http://abcodia.com,Biotechnology,operating,GBR,H9,London,London,/organization/abcodia,/funding-round/3d20c23d203134ed86c0d1b2bec288b2,venture,18-05-2015,8259067.00,biotechnology,Cleantech / Semiconductors,130.00,1163990056.00
1065,/organization/acacia-pharma,Acacia Pharma,http://www.acaciapharma.com,Biotechnology,operating,GBR,C3,London,Cambridge,/organization/acacia-pharma,/funding-round/26eac8a3875e1a7bc68d36ceb7a71b05,venture,31-03-2011,10000000.00,biotechnology,Cleantech / Semiconductors,130.00,1163990056.00
1088,/organization/acal-energy,ACAL Energy,http://www.acalenergy.co.uk,Clean Technology,operating,GBR,C5,Runcorn,Runcorn,/organization/acal-energy,/funding-round/16817efd4bbef10f63cccdf158f47a41,venture,15-06-2011,9910333.00,clean technology,Cleantech / Semiconductors,130.00,1163990056.00
1091,/organization/acal-enterprise-solutions,Acal Enterprise Solutions,http://acalenterprisesolutions.com,Information Technology,operating,GBR,J8,Nottingham,Nottingham,/organization/acal-enterprise-solutions,/funding-round/0f9a693d9686330c5c2724215e0048e2,venture,03-06-2014,10720178.00,information technology,"Social, Finance, Analytics, Advertising",133.00,1089404014.00
1166,/organization/accent-media-ltd,Accent Media Limited,http://accent.media,Domains|Internet|Ticketing,operating,GBR,H9,London,London,/organization/accent-media-ltd,/funding-round/9dc643fa45031a46ffcfaa061d94e3e3,venture,01-07-2013,8300000.00,domains,"News, Search and Messaging",73.00,615746235.00


In [59]:
# Fiding out the Total number of investments(count) and Total amount of investment (USD) for each sector in descending order
D2_total_investment_sector_wise = D2[['main_sector','total_no_of_investment','total_amount_invested']].drop_duplicates().dropna().sort_values(['total_no_of_investment','total_amount_invested'],ascending=[False,False])
D2_total_investment_sector_wise

Unnamed: 0,main_sector,total_no_of_investment,total_amount_invested
1462,Others,147.0,1283624289.0
309,"Social, Finance, Analytics, Advertising",133.0,1089404014.0
907,Cleantech / Semiconductors,130.0,1163990056.0
1166,"News, Search and Messaging",73.0,615746235.0
671,Entertainment,56.0,482784687.0
4986,Manufacturing,42.0,361940335.0
1295,Health,24.0,214537510.0
7591,Automotive & Sports,16.0,167051565.0


In [60]:
# Company-wise investment for top sector('Others')
D2_total_investment_company_wise_top_sector = D2[D2.main_sector == D2_total_investment_sector_wise['main_sector'].iloc[0]].groupby('name')['raised_amount_usd'].sum().reset_index()
D2_total_investment_company_wise_top_sector = D2_total_investment_company_wise_top_sector.sort_values('raised_amount_usd',ascending=False)
D2_total_investment_company_wise_top_sector.head()

Unnamed: 0,name,raised_amount_usd
31,Electric Cloud,37000000.0
83,SenSage,36250000.0
32,Enigmatec,32500000.0
84,SilverRail Technologies,29000000.0
66,OpenCloud,27972766.0


In [61]:
# Company-wise investment for second-best sector('Social, Finance, Analytics, Advertising')
D2_total_investment_company_wise_second_best_sector = D2[D2.main_sector == D2_total_investment_sector_wise['main_sector'].iloc[1]].groupby('name')['raised_amount_usd'].sum().reset_index()
D2_total_investment_company_wise_second_best_sector = D2_total_investment_company_wise_second_best_sector.sort_values('raised_amount_usd',ascending=False)
D2_total_investment_company_wise_second_best_sector.head()

Unnamed: 0,name,raised_amount_usd
18,Celltick Technologies,37500000.0
101,myThings,34000000.0
94,Zopa,32900000.0
88,VisualDNA,28550000.0
44,MarketInvoice,25553007.0


In [62]:
### Table 5.1
# 1. Total number of investments (count)
print(D2_total_investment_sector_wise['total_no_of_investment'].sum())

# 2. Total amount of investment (USD)
print(D2_total_investment_sector_wise['total_amount_invested'].sum())

# 3. Top sector (based on count of investments)
print(D2_total_investment_sector_wise['main_sector'].iloc[0])

# 4. Second-best sector (based on count of investments)
print(D2_total_investment_sector_wise['main_sector'].iloc[1])

# 5. Third-best sector (based on count of investments)
print(D2_total_investment_sector_wise['main_sector'].iloc[2])

# 6. Number of investments in the top sector (refer to point 3)
print(D2_total_investment_sector_wise.iloc[0,1])

# 7. Number of investments in the second-best sector (refer to point 4)
print(D2_total_investment_sector_wise.iloc[1,1])

# 8. Number of investments in the third-best sector (refer to point 5)
print(D2_total_investment_sector_wise.iloc[2,1])

# 9. For the top sector count-wise (point 3), which company received the highest investment?
print(D2_total_investment_company_wise_top_sector['name'].iloc[0])

# 10.For the second-best sector count-wise (point 4), which company received the highest investment?
print(D2_total_investment_company_wise_second_best_sector['name'].iloc[0])

621.0
5379078691.0
Others
Social, Finance, Analytics, Advertising
Cleantech / Semiconductors
147.0
133.0
130.0
Electric Cloud
Celltick Technologies


In [63]:
D3 = merged_master_frame[(merged_master_frame['country_code'] == 'IND') & (merged_master_frame['funding_round_type'] == 'venture') \
                         & (merged_master_frame['raised_amount_usd'] >= 5000000) & (merged_master_frame['raised_amount_usd'] <=15000000)]
D3['total_no_of_investment'] = D3.groupby(['main_sector'])['raised_amount_usd'].transform('count')
D3['total_amount_invested'] = D3.groupby(['main_sector'])['raised_amount_usd'].transform('sum')
D3

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,company_permalink,funding_round_permalink,funding_round_type,funded_at,raised_amount_usd,primary_sector,main_sector,total_no_of_investment,total_amount_invested
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,05-01-2015,10000000.00,media,Entertainment,33.00,280830000.00
178,/organization/21diamonds-india,21Diamonds,http://www.21diamonds.de,E-Commerce,operating,IND,10,New Delhi,Gurgaon,/organization/21diamonds-india,/funding-round/6de7ffef8091ba9f33821f4b861f434a,venture,15-11-2012,6369507.00,e-commerce,Others,110.00,1013409507.00
810,/organization/a-little-world,A LITTLE WORLD,http://alittleworld.com,Finance,operating,IND,16,Mumbai,Mumbai,/organization/a-little-world,/funding-round/18d98f82ed392b1609975b81f3e8b3fb,venture,09-09-2008,6410000.00,finance,"Social, Finance, Analytics, Advertising",60.00,550549550.00
2051,/organization/adlabs-imagica,Adlabs Imagica,http://www.adlabsimagica.com,Entertainment|Tourism,operating,IND,16,IND - Other,Khopoli,/organization/adlabs-imagica,/funding-round/508d3c83daaae9fda3ba6f9682c78f6c,venture,28-10-2014,8180000.00,entertainment,Entertainment,33.00,280830000.00
2849,/organization/agile,Agile,http://www.agile-ft.com,Finance|Finance Technology|FinTech|Insurance,operating,IND,16,Mumbai,Mumbai,/organization/agile,/funding-round/cd3dd1c98ce9d0f632d8752163941674,venture,01-05-2011,5740000.00,finance,"Social, Finance, Analytics, Advertising",60.00,550549550.00
3376,/organization/akosha,Akosha,http://www.akosha.com,Consumer Internet|Digital Media|Enterprise Sof...,operating,IND,7,New Delhi,New Delhi,/organization/akosha,/funding-round/908a8813e2273a2bc604bf40a45c15ff,venture,01-07-2014,5000000.00,consumer internet,"Social, Finance, Analytics, Advertising",60.00,550549550.00
4205,/organization/amagi-media-labs,Amagi Media Labs,http://amagi.com,Advertising,operating,IND,19,Bangalore,Bangalore,/organization/amagi-media-labs,/funding-round/f244a91cc714317f6fbbc80dcc1d5135,venture,17-06-2013,5500000.00,advertising,"Social, Finance, Analytics, Advertising",60.00,550549550.00
4419,/organization/ameyo,Ameyo,http://www.ameyo.com/,Software,operating,IND,10,New Delhi,Gurgaon,/organization/ameyo,/funding-round/81b50a403d5d2293715fe9b0ce4db5d3,venture,03-07-2015,5000000.00,software,Others,110.00,1013409507.00
4856,/organization/ani-technologies,Ola,http://www.olacabs.com,Automotive|E-Commerce|Internet|Mobile|Mobile C...,operating,IND,28,Kolkata,Kolkata,/organization/ani-technologies,/funding-round/1e2b54335e2a41d8d7db25b7c11db399,venture,10-04-2012,5000000.00,automotive,Automotive & Sports,13.00,136900000.00
4911,/organization/annapurna-microfinace,Annapurna Microfinace,http://ampl.net.in,Finance,operating,IND,21,Bhubaneswar,Bhubaneswar,/organization/annapurna-microfinace,/funding-round/3f03bc9fea4ae59b1ce8c86a0782107e,venture,26-03-2014,5000000.00,finance,"Social, Finance, Analytics, Advertising",60.00,550549550.00


In [64]:
# Fiding out the Total number of investments(count) and Total amount of investment (USD) for each sector in descending order
D3_total_investment_sector_wise = D3[['main_sector','total_no_of_investment','total_amount_invested']].drop_duplicates().dropna().sort_values(['total_no_of_investment','total_amount_invested'],ascending=[False,False])
D3_total_investment_sector_wise

Unnamed: 0,main_sector,total_no_of_investment,total_amount_invested
178,Others,110.0,1013409507.0
810,"Social, Finance, Analytics, Advertising",60.0,550549550.0
8335,"News, Search and Messaging",52.0,433834545.0
0,Entertainment,33.0,280830000.0
8513,Manufacturing,21.0,200900000.0
7161,Cleantech / Semiconductors,20.0,165380000.0
11756,Health,19.0,167740000.0
4856,Automotive & Sports,13.0,136900000.0


In [65]:
# Company-wise investment for top sector('Others')
D3_total_investment_company_wise_top_sector = D3[D3.main_sector == D3_total_investment_sector_wise['main_sector'].iloc[0]].groupby('name')['raised_amount_usd'].sum().reset_index()
D3_total_investment_company_wise_top_sector = D3_total_investment_company_wise_top_sector.sort_values('raised_amount_usd',ascending=False)
D3_total_investment_company_wise_top_sector.head()

Unnamed: 0,name,raised_amount_usd
25,FirstCry.com,39000000.0
43,Myntra,38000000.0
15,CommonFloor,32900000.0
54,Pepperfry.com,28000000.0
35,ItzCash Card Ltd.,25000000.0


In [66]:
# Company-wise investment for second-best sector('Social, Finance, Analytics, Advertising')
D3_total_investment_company_wise_second_best_sector = D3[D3.main_sector == D3_total_investment_sector_wise['main_sector'].iloc[1]].groupby('name')['raised_amount_usd'].sum().reset_index()
D3_total_investment_company_wise_second_best_sector = D3_total_investment_company_wise_second_best_sector.sort_values('raised_amount_usd',ascending=False)
D3_total_investment_company_wise_second_best_sector.head()

Unnamed: 0,name,raised_amount_usd
28,Manthan Systems,50700000.0
25,Komli Media,28000000.0
38,ShopClues.com,25000000.0
46,inTarvo,21900000.0
21,Grameen Financial Services,21556050.0


In [250]:
### Table 5.1
# 1. Total number of investments (count)
print(D3_total_investment_sector_wise['total_no_of_investment'].sum())

# 2. Total amount of investment (USD)
print(D3_total_investment_sector_wise['total_amount_invested'].sum())

# 3. Top sector (based on count of investments)
print(D3_total_investment_sector_wise['main_sector'].iloc[0])

# 4. Second-best sector (based on count of investments)
print(D3_total_investment_sector_wise['main_sector'].iloc[1])

# 5. Third-best sector (based on count of investments)
print(D3_total_investment_sector_wise['main_sector'].iloc[2])

# 6. Number of investments in the top sector (refer to point 3)
print(D3_total_investment_sector_wise.iloc[0,1])

# 7. Number of investments in the second-best sector (refer to point 4)
print(D3_total_investment_sector_wise.iloc[1,1])

# 8. Number of investments in the third-best sector (refer to point 5)
print(D3_total_investment_sector_wise.iloc[2,1])

# 9. For the top sector count-wise (point 3), which company received the highest investment?
print(D3_total_investment_company_wise_top_sector['name'].iloc[0])

# 10.For the second-best sector count-wise (point 4), which company received the highest investment?
print(D3_total_investment_company_wise_second_best_sector['name'].iloc[0])

328.0
2949543602.0
Others
Social, Finance, Analytics, Advertising
News, Search and Messaging
110.0
60.0
52.0
FirstCry.com
Manthan Systems
