## Group Project - Investment Case Study
   #### Group Members:
    -  Vinayak Bandhu
    -  Sarathbabu Sankaran
    -  Puneet Agarwal
    -  Rakesh Gorai

# Checkpoint 1 : Data Cleaning

In [504]:
# Supress Warnings

import warnings
warnings.filterwarnings('ignore')

In [505]:
import pandas as pd
import numpy as np

###  1.1:  Importing data from companies and rounds2 datasets

In [506]:
#importing data from companies.txt
companies = pd.read_csv("companies.txt",sep='\t', engine = "python")

#remove unwanted characters
companies.loc[:,'permalink'] = companies['permalink'].str.encode('utf-8').str.decode('ascii', 'ignore').str.lower()
companies.loc[:,'name'] = companies['name'].str.encode('utf-8').str.decode('ascii', 'ignore').str.lower()

#importing data from rounds2.csv
rounds2   = pd.read_csv("rounds2.csv",engine = "python")

#remove unwanted characters
rounds2.loc[:,'company_permalink'] = rounds2['company_permalink'].str.encode('utf-8').str.decode('ascii','ignore').str.lower()


 ### 1.2:  Counting unique values and Inspecting

In [507]:
#count number of unique rows in companies and rounds2 dataframes
print(companies.loc[:,"permalink"].nunique())
print(rounds2.loc[:,"company_permalink"].nunique())

66368
66368


In [508]:
#inspect the companies dataframe 
print(companies.shape)
print(companies.info())
print(companies.describe())

(66368, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66368 entries, 0 to 66367
Data columns (total 10 columns):
permalink        66368 non-null object
name             66367 non-null object
homepage_url     61310 non-null object
category_list    63220 non-null object
status           66368 non-null object
country_code     59410 non-null object
state_code       57821 non-null object
region           58338 non-null object
city             58340 non-null object
founded_at       51147 non-null object
dtypes: object(10)
memory usage: 5.1+ MB
None
                                permalink   name                homepage_url  \
count                               66368  66367                       61310   
unique                              66368  66037                       61191   
top     /organization/kezar-life-sciences  spire  http://www.askforoffer.com   
freq                                    1      4                           5   

       category_list     status country_co

In [509]:
#inspect the rounds2 dataframe
print(rounds2.shape)
print(rounds2.info())
print(rounds2.describe(include = 'all'))

(114949, 6)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114949 entries, 0 to 114948
Data columns (total 6 columns):
company_permalink          114949 non-null object
funding_round_permalink    114949 non-null object
funding_round_type         114949 non-null object
funding_round_code         31140 non-null object
funded_at                  114949 non-null object
raised_amount_usd          94959 non-null float64
dtypes: float64(1), object(5)
memory usage: 5.3+ MB
None
               company_permalink  \
count                     114949   
unique                     66368   
top     /organization/solarflare   
freq                          19   
mean                         NaN   
std                          NaN   
min                          NaN   
25%                          NaN   
50%                          NaN   
75%                          NaN   
max                          NaN   

                                funding_round_permalink funding_round_type  \
count      

In [510]:
#checking for companies which are not in rounds2 dataframe
companies.loc[~companies['permalink'].isin(rounds2['company_permalink']),:]

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at


### 1.3: Merging the companies and rounds2 dataframe to create master_frame dataframe

In [511]:
# merge the companies and rounds2 dataframe. Since the name of common column in both the dataframe is not same,
# the name of company_permalink is renamed to permalink to remove the duplication of column in master_frame
master_frame = pd.merge(companies,rounds2.rename(columns={'company_permalink':'permalink'}), how = 'inner', on = 'permalink' )

In [512]:
#inspect the master frame
print(master_frame.shape)
print(master_frame.info())
print(master_frame.describe(include = 'all'))

(114949, 15)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 114949 entries, 0 to 114948
Data columns (total 15 columns):
permalink                  114949 non-null object
name                       114948 non-null object
homepage_url               108815 non-null object
category_list              111539 non-null object
status                     114949 non-null object
country_code               106271 non-null object
state_code                 104003 non-null object
region                     104782 non-null object
city                       104785 non-null object
founded_at                 94428 non-null object
funding_round_permalink    114949 non-null object
funding_round_type         114949 non-null object
funding_round_code         31140 non-null object
funded_at                  114949 non-null object
raised_amount_usd          94959 non-null float64
dtypes: float64(1), object(14)
memory usage: 14.0+ MB
None
                       permalink                       name  \
count 

In [513]:
# Checking the null count in each column
master_frame.isnull().sum()

permalink                      0
name                           1
homepage_url                6134
category_list               3410
status                         0
country_code                8678
state_code                 10946
region                     10167
city                       10164
founded_at                 20521
funding_round_permalink        0
funding_round_type             0
funding_round_code         83809
funded_at                      0
raised_amount_usd          19990
dtype: int64

In [514]:
#checking the null count in each row
master_frame.isnull().sum(axis = 1)

0         1
1         1
2         1
3         5
4         0
5         2
6         3
7         1
8         1
9         1
10        0
11        1
12        0
13        1
14        2
15        1
16        1
17        4
18        2
19        2
20        1
21        1
22        1
23        2
24        1
25        1
26        1
27        1
28        0
29        1
         ..
114919    1
114920    0
114921    1
114922    1
114923    1
114924    1
114925    1
114926    1
114927    2
114928    1
114929    1
114930    1
114931    0
114932    2
114933    2
114934    2
114935    2
114936    1
114937    0
114938    1
114939    1
114940    1
114941    1
114942    1
114943    1
114944    1
114945    1
114946    6
114947    1
114948    6
Length: 114949, dtype: int64

In [515]:
#Checking the %age of null in each column
round(100* master_frame.isnull().sum()/len(master_frame.index),2)

permalink                   0.00
name                        0.00
homepage_url                5.34
category_list               2.97
status                      0.00
country_code                7.55
state_code                  9.52
region                      8.84
city                        8.84
founded_at                 17.85
funding_round_permalink     0.00
funding_round_type          0.00
funding_round_code         72.91
funded_at                   0.00
raised_amount_usd          17.39
dtype: float64

In [516]:
#master_frame_temp = master_frame.drop(['funding_round_code','homepage_url','founded_at', 'funded_at', 'funding_round_permalink'],axis=1,inplace=False)
#round(100*(master_frame_temp.isnull().sum()/len(master_frame_temp.index)), 2)

Since raised_amount_usd has 17.39 percentage of null values and the investment analysis requires funding amount.
Hence dropping these rows.

In [517]:
#dropping the rows which have null value for raised_amount_usd
#master_frame_temp = master_frame_temp[~np.isnan(master_frame_temp['raised_amount_usd'])]
master_frame_temp = master_frame[~np.isnan(master_frame['raised_amount_usd'])]

In [518]:
round(100*(master_frame_temp.isnull().sum()/len(master_frame_temp.index)), 2)

permalink                   0.00
name                        0.00
homepage_url                4.56
category_list               1.10
status                      0.00
country_code                6.16
state_code                  8.01
region                      7.42
city                        7.42
founded_at                 16.81
funding_round_permalink     0.00
funding_round_type          0.00
funding_round_code         70.34
funded_at                   0.00
raised_amount_usd           0.00
dtype: float64

In [519]:
#master_frame_temp = master_frame_temp[master_frame_temp.isnull().sum(axis=1) <5]

In [520]:
#round(100*(master_frame_temp.isnull().sum()/len(master_frame_temp.index)), 2)

#### Change the unit of columns:
Convert the unit of the `raised_amount_usd` column from `USD` to `million USD`.

In [521]:
master_frame_temp['raised_amount_mn_usd'] = master_frame_temp['raised_amount_usd'].apply(lambda x : x/1000000)

# Checkpoint 2: Funding Type Analysis

### 2.1: Average funding in Investment Types

In [522]:
group_by_funding_round_type = master_frame_temp.groupby('funding_round_type')
average_frtype = pd.DataFrame(group_by_funding_round_type['raised_amount_mn_usd'].mean())

In [523]:
average_frtype.loc[['venture','angel','seed','private_equity']].sort_values(by = 'raised_amount_mn_usd', ascending = False)

Unnamed: 0_level_0,raised_amount_mn_usd
funding_round_type,Unnamed: 1_level_1
private_equity,73.308593
venture,11.748949
angel,0.958694
seed,0.719818


Here the `venture` is the second highest investment type with raised_amount_mn_usd and falls under the 5 to 15 M USD window of Spark Funds. Hence this is the most suitable investment type for Spark Funds.  

In [535]:
master_frame_country = master_frame_temp.loc[master_frame_temp['funding_round_type'] == 'venture', :]

In [536]:
#Check the null percentage in each column in the master_frame_country dataframe
round(100*(master_frame_country.isnull().sum()/len(master_frame_country.index)), 2)

permalink                   0.00
name                        0.00
homepage_url                5.19
category_list               1.00
status                      0.00
country_code                4.21
state_code                  5.70
region                      5.40
city                        5.40
founded_at                 19.17
funding_round_permalink     0.00
funding_round_type          0.00
funding_round_code         44.17
funded_at                   0.00
raised_amount_usd           0.00
raised_amount_mn_usd        0.00
dtype: float64

In [537]:
#Check the count null values in each column in master_frame_country dataframe
master_frame_country.isnull().sum()

permalink                      0
name                           0
homepage_url                2608
category_list                502
status                         0
country_code                2117
state_code                  2861
region                      2713
city                        2713
founded_at                  9631
funding_round_permalink        0
funding_round_type             0
funding_round_code         22185
funded_at                      0
raised_amount_usd              0
raised_amount_mn_usd           0
dtype: int64

In [538]:
#Checking the number of rows for which all the state_code, region, city and country_code columns are null
len(master_frame_country.loc[(master_frame_temp['region'].isnull() & master_frame_temp['city'].isnull() & master_frame_temp['state_code'].isnull() & master_frame_temp['country_code'].isnull())].index)

2117

Since the number of rows for which all the state_code, region, city and country_code columns are null is `2117` which is same as the number of number of rows with null value in country_code. Hence it is not possible to impute date from any of these columns to country_code. Hence dropping the country_code rows with null value rather than imputing it with the mode


# Checkpoint 3: Country Analysis

### 3.1: Cleaning the dataset for unvailable country code

In [551]:
#dropping the rows with null values in country_code column.
master_frame_country = master_frame_country[~(master_frame_country['country_code'].isnull())]

In [552]:
#Checking the number of null values in each column in master_frame_country dataframe.
master_frame_country.isnull().sum()

permalink                      0
name                           0
homepage_url                2175
category_list                302
status                         0
country_code                   0
state_code                   744
region                       596
city                         596
founded_at                  8551
funding_round_permalink        0
funding_round_type             0
funding_round_code         21294
funded_at                      0
raised_amount_usd              0
raised_amount_mn_usd           0
dtype: int64

In [553]:
#Checking the percentage of null values in each column in master_frame_country dataframe.
round(100*(master_frame_country.isnull().sum()/len(master_frame_country.index)), 2)

permalink                   0.00
name                        0.00
homepage_url                4.52
category_list               0.63
status                      0.00
country_code                0.00
state_code                  1.55
region                      1.24
city                        1.24
founded_at                 17.77
funding_round_permalink     0.00
funding_round_type          0.00
funding_round_code         44.26
funded_at                   0.00
raised_amount_usd           0.00
raised_amount_mn_usd        0.00
dtype: float64

In [554]:
#master_frame_country = master_frame_country.drop(['state_code','region', 'city'],axis=1,inplace=False)
#round(100*(master_frame_country.isnull().sum()/len(master_frame_country.index)), 2)

In [555]:
#master_frame_country.isnull().sum()


### 3.2: Grouping countries on the basis of Total funding

In [556]:
#Grouping the dataframe by country_code and calculating the total investment in each country
group_by_country = master_frame_country.groupby('country_code')

#Sorting the countries based on the total investment amount from top to down
total_amount_per_country = pd.DataFrame(group_by_country['raised_amount_mn_usd'].sum()).sort_values(by = "raised_amount_mn_usd",ascending = False)

In [557]:
#Finding the top 9 countries with highest amount of investment
top9 = total_amount_per_country.iloc[:9,:]
top9

Unnamed: 0_level_0,raised_amount_mn_usd
country_code,Unnamed: 1_level_1
USA,422510.842796
CHN,39835.418773
GBR,20245.627416
IND,14391.858718
CAN,9583.332317
FRA,7259.536732
ISR,6907.514579
DEU,6346.959822
JPN,3363.676611


# Checkpoint 4: Sector Analysis 1

In the top9 dataframe, the top 3 countries with English as an official language are `USA`, `GBR` and `IND`.

In [558]:
#Creating a new data frame sector_df with the 3 countries USA, GBR and IND
sector_df = master_frame_country.loc[(master_frame_country['country_code'] == 'USA') | (master_frame_country['country_code'] == 'GBR') | (master_frame_country['country_code'] == 'IND')]


In [559]:
sector_df.isnull().sum()

permalink                      0
name                           0
homepage_url                1857
category_list                215
status                         0
country_code                   0
state_code                    73
region                       158
city                         158
founded_at                  6326
funding_round_permalink        0
funding_round_type             0
funding_round_code         17816
funded_at                      0
raised_amount_usd              0
raised_amount_mn_usd           0
dtype: int64

In [560]:
round(100*(sector_df.isnull().sum()/len(sector_df.index)), 2)

permalink                   0.00
name                        0.00
homepage_url                4.76
category_list               0.55
status                      0.00
country_code                0.00
state_code                  0.19
region                      0.40
city                        0.40
founded_at                 16.21
funding_round_permalink     0.00
funding_round_type          0.00
funding_round_code         45.66
funded_at                   0.00
raised_amount_usd           0.00
raised_amount_mn_usd        0.00
dtype: float64

In [561]:
#Import mapping.csv file
mapping = pd.read_csv('mapping.csv')
len(mapping.index)

688

In [563]:
#Extracting primary sectors from category_list and adding a new column 'primary_sector' with these values.
sector_primary = sector_df['category_list'].str.split('|',  expand = True)
sector_df['primary_sector'] = sector_primary.iloc[:,0]


In [564]:
mapping.iloc[:,0]

0                             NaN
1                              3D
2                     3D Printing
3                   3D Technology
4                      Accounting
5                Active Lifestyle
6                    Ad Targeting
7              Advanced Materials
8                Adventure Travel
9                     Advertising
10          Advertising Exchanges
11           Advertising Networks
12          Advertising Platforms
13                         Advice
14                      Aerospace
15                    Agriculture
16          Air Pollution Control
17                     Algorithms
18                    All Markets
19                   All Students
20            Alter0tive Medicine
21                         Alumni
22                       A0lytics
23                        Android
24                         Angels
25                    Animal Feed
26     Anything Capital Intensive
27                  App Discovery
28                  App Marketing
29            

In [565]:
#Cleaning the mapping dataframe. The 'na' in each category is replaced with '0' in the dataframe.
#Hence changing the '0' to 'na' again.
mapping['category_list'] = mapping['category_list'].str.replace('0', 'na', regex=False)

In [347]:
mapping['category_list']

0                             NaN
1                              3D
2                     3D Printing
3                   3D Technology
4                      Accounting
5                Active Lifestyle
6                    Ad Targeting
7              Advanced Materials
8                Adventure Travel
9                     Advertising
10          Advertising Exchanges
11           Advertising Networks
12          Advertising Platforms
13                         Advice
14                      Aerospace
15                    Agriculture
16          Air Pollution Control
17                     Algorithms
18                    All Markets
19                   All Students
20           Alternative Medicine
21                         Alumni
22                      Analytics
23                        Android
24                         Angels
25                    Animal Feed
26     Anything Capital Intensive
27                  App Discovery
28                  App Marketing
29            

In [566]:
#normalizing the mapping dataframe so that merge operation can be done appropriately.
mapping = pd.melt(mapping, id_vars = ["category_list"], var_name = "main_sector")

In [567]:
#Removing the unwanted rows which doesn't provide ant mapping information
mapping = mapping[~(mapping['value'] == 0)]

#Dropping the value column, since it is not useful.
mapping.drop(['value'],axis=1,inplace=True)

In [568]:
len(mapping.index)

688

In [569]:
#merging the sector data with main sector mapping
sector_df = pd.merge(sector_df,mapping.rename(columns={'category_list':'primary_sector'}), how = 'inner', on = 'primary_sector' )

In [570]:
sector_df

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,raised_amount_mn_usd,primary_sector,main_sector
0,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.0,10.000000,Media,Entertainment
1,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,01-01-2011,/funding-round/21a2cbf6f2fb2a1c2a61e04bf930dfe6,venture,,06-10-2015,15000000.0,15.000000,Media,Entertainment
2,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,01-01-2011,/funding-round/bd626ed022f5c66574b1afe234f3c90d,venture,,07-05-2013,5800000.0,5.800000,Media,Entertainment
3,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,01-01-2011,/funding-round/fd4b15e8c97ee2ffc0acccdbe1a98810,venture,,26-03-2014,18000000.0,18.000000,Media,Entertainment
4,/organization/all-def-digital,all def digital,http://alldefdigital.com,Media,operating,USA,CA,Los Angeles,Los Angeles,,/funding-round/452a2342fe720285c3b92e9bd927d9ba,venture,A,06-08-2014,5000000.0,5.000000,Media,Entertainment
5,/organization/audiosocket,audiosocket,http://audiosocket.com,Media|Music|Technology,operating,USA,WA,Seattle,Seattle,01-05-2008,/funding-round/0959f59e3a89bc8db8c6ed9c646349fd,venture,,02-05-2013,650000.0,0.650000,Media,Entertainment
6,/organization/audiosocket,audiosocket,http://audiosocket.com,Media|Music|Technology,operating,USA,WA,Seattle,Seattle,01-05-2008,/funding-round/4e3a53eec6c609943df752206070b23f,venture,A,02-09-2009,750000.0,0.750000,Media,Entertainment
7,/organization/audiosocket,audiosocket,http://audiosocket.com,Media|Music|Technology,operating,USA,WA,Seattle,Seattle,01-05-2008,/funding-round/bc0b412cf2fa7b739e4105ad756b47a4,venture,,15-02-2012,1582300.0,1.582300,Media,Entertainment
8,/organization/babble,babble,http://www.babble.com,Media|News|Parenting,acquired,USA,NY,New York City,New York,01-12-2006,/funding-round/43cc678b39be3e684bb88501af05ff9b,venture,A,05-02-2009,2000000.0,2.000000,Media,Entertainment
9,/organization/babble,babble,http://www.babble.com,Media|News|Parenting,acquired,USA,NY,New York City,New York,01-12-2006,/funding-round/5fdb3dbeeb6eea0243702adc372130f3,venture,A,12-05-2009,1250000.0,1.250000,Media,Entertainment


# Checkpoint 5: Sector Analysis 2

In [574]:
#Data frame D1 for country USA and funding amount between 5 and 15 M USD.
#Two new columns are added Total Amount Invested and Total count of Investments
D1 = sector_df.loc[(sector_df['country_code'] == 'USA') & (sector_df['raised_amount_mn_usd'] >= 5) & (sector_df['raised_amount_mn_usd'] <= 15)]
D1['Total Amount Invested'] = D1_groupBy['raised_amount_mn_usd'].transform('sum')
D1['Total count of Investments'] = D1_groupBy['raised_amount_mn_usd'].transform('count')

In [None]:
#Data frame D2 for country GBR and funding amount between 5 and 15 M USD.
#Two new columns are added Total Amount Invested and Total count of Investments
D2 = sector_df.loc[(sector_df['country_code'] == 'GBR') & (sector_df['raised_amount_mn_usd'] >= 5) & (sector_df['raised_amount_mn_usd'] <= 15)]
D2['Total Amount Invested'] = D2_groupBy['raised_amount_mn_usd'].transform('sum')
D2['Total count of Investments'] = D2_groupBy['raised_amount_mn_usd'].transform('count')
D2

In [None]:
#Data frame D3 for country IND and funding amount between 5 and 15 M USD.
#Two new columns are added Total Amount Invested and Total count of Investments
D3 = sector_df.loc[(sector_df['country_code'] == 'IND') & (sector_df['raised_amount_mn_usd'] >= 5) & (sector_df['raised_amount_mn_usd'] <= 15)]
D3['Total Amount Invested'] = D3.groupby('main_sector')['raised_amount_mn_usd'].transform('sum')
D3['Total count of Investments'] = D3.groupby('main_sector')['raised_amount_mn_usd'].transform('count')
D3

In [598]:
#Total number of investments (count) and Total amount of investment (USD) of USA
print(D1['raised_amount_usd'].count())
print(D1['raised_amount_usd'].sum())

108083.057956
12098


main_sector
Others                                     2950
Social, Finance, Analytics, Advertising    2714
Cleantech / Semiconductors                 2300
News, Search and Messaging                 1582
Health                                      909
Manufacturing                               799
Entertainment                               591
Automotive & Sports                         167
Blanks                                       86
Name: raised_amount_mn_usd, dtype: int64

In [None]:
#Total number of investments (count) and Total amount of investment (USD) of GBR
print(D2['raised_amount_usd'].count())
print(D2['raised_amount_usd'].sum())

In [None]:
#Total number of investments (count) and Total amount of investment (USD) of IND
print(D3['raised_amount_usd'].count())
print(D3['raised_amount_usd'].sum())


In [625]:
#Grouping dataframe for USA based on main_sector and then counting based on raised_amount_mn_usd 
D1_groupBy = D1.groupby('main_sector')
D1_groupBy['raised_amount_mn_usd'].count().sort_values(ascending = False).iloc[:3]

main_sector
Others                                     2950
Social, Finance, Analytics, Advertising    2714
Cleantech / Semiconductors                 2300
Name: raised_amount_mn_usd, dtype: int64

The top 3 main_sector based on the count of investments for USA are : `Others`, `Social, Finance, Analytics, Advertising` and `Cleantech / Semiconductors`

In [627]:
#Grouping dataframe for GBR based on main_sector and then counting based on raised_amount_mn_usd 
D2_groupBy = D2.groupby('main_sector')
D2_groupBy['raised_amount_mn_usd'].count().sort_values(ascending = False).iloc[:3]

main_sector
Others                                     147
Social, Finance, Analytics, Advertising    133
Cleantech / Semiconductors                 128
Name: raised_amount_mn_usd, dtype: int64

The top 3 main_sector based on the count of investments for GBR are : `Others`, `Social, Finance, Analytics, Advertising` and `Cleantech / Semiconductors`

In [631]:
#Grouping dataframe for IND based on main_sector and then counting based on raised_amount_mn_usd 
D3_groupBy = D3.groupby('main_sector')
D3_groupBy['raised_amount_mn_usd'].count().sort_values(ascending = False).iloc[:3]

main_sector
Others                                     110
Social, Finance, Analytics, Advertising     60
News, Search and Messaging                  52
Name: raised_amount_mn_usd, dtype: int64

The top 3 main_sector based on the count of investments for IND are : `Others`, `Social, Finance, Analytics, Advertising` and `News, Search and Messaging`

In [640]:
#Name of the company in USA for Others Sector, which is top in amount invested

top_sector_D1 = D1.loc[D1['main_sector'] == 'Others'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
top_sector_D1['name']

'17zuoye'

In [641]:
#Name of the company in GBR for Others Sector, which is top in amount invested

top_sector_D2 = D2.loc[D2['main_sector'] == 'Others'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
top_sector_D2['name']

'peak'

In [642]:
#Name of the company in IND for Others Sector, which is top in amount invested

top_sector_D3 = D3.loc[D3['main_sector'] == 'Others'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
top_sector_D3['name']

'treebo hotels'

In [644]:
#Name of the company in USA for "Social, Finance, Analytics, Advertising" Sector, which is top in amount invested.

second_top_sector_D1 = D1.loc[D1['main_sector'] == 'Social, Finance, Analytics, Advertising'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
second_top_sector_D1['name']

'h2o.ai'

In [645]:
#Name of the company in GBR for "Social, Finance, Analytics, Advertising" Sector, which is top in amount invested.

second_top_sector_D2 = D2.loc[D2['main_sector'] == 'Social, Finance, Analytics, Advertising'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
second_top_sector_D2['name']

'aihit'

In [646]:
#Name of the company in IND for "Social, Finance, Analytics, Advertising" Sector, which is top in amount invested.

second_top_sector_D3 = D3.loc[D3['main_sector'] == 'Social, Finance, Analytics, Advertising'].sort_values(by = "Total Amount Invested", ascending = False).iloc[0]
second_top_sector_D3['name']

'coverfox insurance'

# Checkpoint 6: Plots