#Set up Code

In [1]:
!pip install pandasql

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import pandasql as ps

In [3]:
prefix = '/content/drive'
from google.colab import drive
drive.mount(prefix, force_remount=True)

Mounted at /content/drive


# Companies Analysis and Preprocessing

In [4]:
def set_to_floats(x):
  x = str(x)
  if x == '':
    return np.nan
  if x.strip() == '-':
    return np.nan
  if x == 'nan':
    return np.nan
  return float(x.replace(',', ''))

In [5]:
companies = pd.read_csv('/content/drive/My Drive/Colab Notebooks/CIS 550/companies.csv')
companies[' funding_total_usd '] = companies[' funding_total_usd '].apply(lambda x: set_to_floats(x))
companies

Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
0,/organization/waywire,#waywire,http://www.waywire.com,|Entertainment|Politics|Social Media|News|,News,1750000.0,acquired,USA,NY,New York City,New York,1,2012-06-01,2012-06,2012-Q2,2012.0,2012-06-30,2012-06-30
1,/organization/tv-communications,&TV Communications,http://enjoyandtv.com,|Games|,Games,4000000.0,operating,USA,CA,Los Angeles,Los Angeles,2,,,,,2010-06-04,2010-09-23
2,/organization/rock-your-paper,'Rock' Your Paper,http://www.rockyourpaper.org,|Publishing|Education|,Publishing,40000.0,operating,EST,,Tallinn,Tallinn,1,2012-10-26,2012-10,2012-Q4,2012.0,2012-08-09,2012-08-09
3,/organization/in-touch-network,(In)Touch Network,http://www.InTouchNetwork.com,|Electronics|Guides|Coffee|Restaurants|Music|i...,Electronics,1500000.0,operating,GBR,,London,London,1,2011-04-01,2011-04,2011-Q2,2011.0,2011-04-01,2011-04-01
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,,|Tourism|Entertainment|Games|,Tourism,60000.0,operating,USA,TX,Dallas,Fort Worth,2,2014-01-01,2014-01,2014-Q1,2014.0,2014-08-17,2014-09-26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49433,/organization/zzish,Zzish,http://www.zzish.com,|Analytics|Gamification|Developer APIs|iOS|And...,Education,320000.0,operating,GBR,,London,London,1,2013-01-28,2013-01,2013-Q1,2013.0,2014-03-24,2014-03-24
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01
49435,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,http://www.zzzzapp.com,|Web Development|Advertising|Wireless|Mobile|,Web Development,97398.0,operating,HRV,,Split,Split,5,2012-05-13,2012-05,2012-Q2,2012.0,2011-11-01,2014-09-10
49436,/organization/a-list-games,[a]list games,http://www.alistgames.com,|Games|,Games,9300000.0,operating,,,,,1,,,,,2011-11-21,2011-11-21


## Summary Stats

In [6]:
companies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49438 entries, 0 to 49437
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   permalink            49438 non-null  object 
 1   name                 49437 non-null  object 
 2   homepage_url         45989 non-null  object 
 3   category_list        45477 non-null  object 
 4    market              45470 non-null  object 
 5    funding_total_usd   40978 non-null  float64
 6   status               48124 non-null  object 
 7   country_code         44165 non-null  object 
 8   state_code           30161 non-null  object 
 9   region               44165 non-null  object 
 10  city                 43322 non-null  object 
 11  funding_rounds       49438 non-null  int64  
 12  founded_at           38554 non-null  object 
 13  founded_month        38482 non-null  object 
 14  founded_quarter      38482 non-null  object 
 15  founded_year         38482 non-null 

In [7]:
companies.describe()

Unnamed: 0,funding_total_usd,funding_rounds,founded_year
count,40978.0,49438.0,38482.0
mean,15912970.0,1.696205,2007.359129
std,168562100.0,1.294213,7.579203
min,1.0,1.0,1902.0
25%,350000.0,1.0,2006.0
50%,2000000.0,1.0,2010.0
75%,10000000.0,2.0,2012.0
max,30079500000.0,18.0,2014.0


In [8]:
companies.nunique()

permalink              49436
name                   49350
homepage_url           45850
category_list          16675
 market                  753
 funding_total_usd     14629
status                     3
country_code             115
state_code                61
region                  1089
city                    4188
funding_rounds            17
founded_at              3369
founded_month            420
founded_quarter          218
founded_year             103
first_funding_at        3914
last_funding_at         3657
dtype: int64

In [9]:
companies.corr()

Unnamed: 0,funding_total_usd,funding_rounds,founded_year
funding_total_usd,1.0,0.105506,-0.071897
funding_rounds,0.105506,1.0,-0.058314
founded_year,-0.071897,-0.058314,1.0


## Analysis

In [10]:
column_names = ['permalink', 'name', 'homepage_url', 'category_list', ' market ', ' funding_total_usd ', 'status', 'country_code', 'state_code', 'region', 'city', 'funding_rounds', 'founded_at', 'founded_month', 'founded_quarter', 'founded_year', 'first_funding_at', 'last_funding_at']

for col in column_names:
  print('\n\n')
  print("==========================================================================")
  print(col.upper())
  print("VALUE COUNTS")
  print(companies[col].value_counts())
  print("\n")

  print("DUPLICATE COUNTS")
  print(companies.duplicated(subset=col, keep='first').sum())
  print("\n")

  print("NULL COUNTS AND PERCENTAGE")
  print(companies[col].isnull().sum())
  print(companies[col].isnull().sum()/(companies['permalink'].nunique())*100)
  print('\n')
  
  print("NULL ROWS")
  display(companies[companies[col].isnull()])




PERMALINK
VALUE COUNTS
/organization/treasure-valley-urology-services    2
/organization/prysm                               2
/organization/waywire                             1
/organization/polybona                            1
/organization/pollfish                            1
                                                 ..
/organization/game-ventures                       1
/organization/game9z                              1
/organization/gameaccount-network                 1
/organization/gameanalytics                       1
/organization/x                                   1
Name: permalink, Length: 49436, dtype: int64


DUPLICATE COUNTS
2


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at





NAME
VALUE COUNTS
Roost                  4
Spire                  4
Cue                    3
Compass                3
Hubbub                 3
                      ..
Game Trust             1
Game Ventures          1
Game9z                 1
GameAccount Network    1
[x+1]                  1
Name: name, Length: 49350, dtype: int64


DUPLICATE COUNTS
87


NULL COUNTS AND PERCENTAGE
1
0.0020228173800469294


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
28221,/organization/tell-it-in,,http://tellitin10.com,|Startups|,Startups,25000.0,closed,,,,,1,2011-10-01,2011-10,2011-Q4,2011.0,2012-03-01,2012-03-01





HOMEPAGE_URL
VALUE COUNTS
http://spaceport.io              2
http://shelby.tv                 2
http://www.kuwo.cn               2
http://gui.de                    2
http://primordialgenetics.com    2
                                ..
http://www.gamecooks.net         1
http://www.game-craft.com        1
http://www.gamedigitalplc.com    1
http://game-insight.com          1
http://www.xplusone.com/         1
Name: homepage_url, Length: 45850, dtype: int64


DUPLICATE COUNTS
3587


NULL COUNTS AND PERCENTAGE
3449
6.97669714378186


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,,|Tourism|Entertainment|Games|,Tourism,60000.0,operating,USA,TX,Dallas,Fort Worth,2,2014-01-01,2014-01,2014-Q1,2014.0,2014-08-17,2014-09-26
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
14,/organization/1-618-technology,1.618 Technology,,|Real Estate|,Real Estate,,operating,USA,FL,Orlando,Orlando,1,2013-12-07,2013-12,2013-Q4,2013.0,2014-01-22,2014-01-22
29,/organization/10°north,10°North,,|Fashion|,Fashion,,operating,CAN,ON,Toronto,Mississauga,1,2014-08-12,2014-08,2014-Q3,2014.0,2014-08-12,2014-08-12
42,/organization/121-rentals,121 Rentals,,|Real Estate|,Real Estate,,operating,USA,TX,TX - Other,Amarillo,1,2011-04-12,2011-04,2011-Q2,2011.0,2014-09-03,2014-09-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49341,/organization/zperfectgift,zPerfectGift,,|Social Media|,Social Media,25000.0,operating,USA,UT,Salt Lake City,Midvale,1,2010-01-01,2010-01,2010-Q1,2010.0,2012-09-26,2012-09-26
49356,/organization/zuffle,Zuffle,,,,132351.0,operating,ITA,,ITA - Other,Prato,1,,,,,2013-07-26,2013-07-26
49378,/organization/zumodrive,zumodrive,,,,,operating,,,,,1,,,,,2007-09-01,2007-09-01
49390,/organization/zuse,Zuse,,|Software|,Software,,operating,USA,NY,New York City,New York,1,,,,,2013-05-01,2013-05-01





CATEGORY_LIST
VALUE COUNTS
|Software|                                                                                     3650
|Biotechnology|                                                                                3597
|E-Commerce|                                                                                   1263
|Mobile|                                                                                       1211
|Curated Web|                                                                                  1120
                                                                                               ... 
|Fashion|Digital Media|Marketplaces|E-Commerce|                                                   1
|Advertising|Web Development|App Marketing|Enterprises|Cloud Computing|Enterprise Software|       1
|Web Design|Software|Web Tools|Web Development|Enterprise Software|                               1
|3D|Web Tools|Entertainment|Curated Web|                              

Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
48,/organization/12bis,12Bis,http://12bis.com,,,130636.0,operating,FRA,,Paris,Paris,1,2008-01-01,2008-01,2008-Q1,2008.0,2012-03-15,2012-03-15
61,/organization/16lab-inc-,16Lab Inc.,http://www.16lab.net,,,,operating,,,,,1,2013-04-24,2013-04,2013-Q2,2013.0,2014-11-26,2014-11-26
72,/organization/1d4-pty,1d4 Pty,http://www.immortaloutdoors.com,,,40000.0,operating,NOR,,Aust-Agder,,1,,,,,2012-04-11,2012-04-11
85,/organization/1rp-media,1RP Media,,,,,operating,USA,CA,Los Angeles,Los Angeles,1,2011-01-01,2011-01,2011-Q1,2011.0,2012-06-29,2012-06-29
88,/organization/1st-choice-lawn-care,1st Choice Lawn Care,,,,,operating,USA,LA,Shreveport,Bossier City,1,2012-08-01,2012-08,2012-Q3,2012.0,2012-07-22,2012-07-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49356,/organization/zuffle,Zuffle,,,,132351.0,operating,ITA,,ITA - Other,Prato,1,,,,,2013-07-26,2013-07-26
49378,/organization/zumodrive,zumodrive,,,,,operating,,,,,1,,,,,2007-09-01,2007-09-01
49389,/organization/zursh-2,Zursh,http://www.zursh.com/,,,25000.0,operating,,,,,1,,,,,2014-05-01,2014-05-01
49397,/organization/zuzher,Zuzher,http://www.zuzher.com/,,,15698.0,operating,,,,,1,2012-05-01,2012-05,2012-Q2,2012.0,2012-08-01,2012-08-01





 MARKET 
VALUE COUNTS
 Software                4620
 Biotechnology           3688
 Mobile                  1983
 E-Commerce              1805
 Curated Web             1655
                         ... 
 Contact Centers            1
 Swimming                   1
 Retirement                 1
 Musical Instruments        1
 Rural Energy               1
Name:  market , Length: 753, dtype: int64


DUPLICATE COUNTS
48684


NULL COUNTS AND PERCENTAGE
3968
8.026539364026215


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
48,/organization/12bis,12Bis,http://12bis.com,,,130636.0,operating,FRA,,Paris,Paris,1,2008-01-01,2008-01,2008-Q1,2008.0,2012-03-15,2012-03-15
61,/organization/16lab-inc-,16Lab Inc.,http://www.16lab.net,,,,operating,,,,,1,2013-04-24,2013-04,2013-Q2,2013.0,2014-11-26,2014-11-26
72,/organization/1d4-pty,1d4 Pty,http://www.immortaloutdoors.com,,,40000.0,operating,NOR,,Aust-Agder,,1,,,,,2012-04-11,2012-04-11
85,/organization/1rp-media,1RP Media,,,,,operating,USA,CA,Los Angeles,Los Angeles,1,2011-01-01,2011-01,2011-Q1,2011.0,2012-06-29,2012-06-29
88,/organization/1st-choice-lawn-care,1st Choice Lawn Care,,,,,operating,USA,LA,Shreveport,Bossier City,1,2012-08-01,2012-08,2012-Q3,2012.0,2012-07-22,2012-07-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49356,/organization/zuffle,Zuffle,,,,132351.0,operating,ITA,,ITA - Other,Prato,1,,,,,2013-07-26,2013-07-26
49378,/organization/zumodrive,zumodrive,,,,,operating,,,,,1,,,,,2007-09-01,2007-09-01
49389,/organization/zursh-2,Zursh,http://www.zursh.com/,,,25000.0,operating,,,,,1,,,,,2014-05-01,2014-05-01
49397,/organization/zuzher,Zuzher,http://www.zuzher.com/,,,15698.0,operating,,,,,1,2012-05-01,2012-05,2012-Q2,2012.0,2012-08-01,2012-08-01





 FUNDING_TOTAL_USD 
VALUE COUNTS
1000000.0     928
500000.0      765
100000.0      750
40000.0       680
2000000.0     626
             ... 
11253082.0      1
17926365.0      1
177404.0        1
252052.0        1
97398.0         1
Name:  funding_total_usd , Length: 14629, dtype: int64


DUPLICATE COUNTS
34808


NULL COUNTS AND PERCENTAGE
8460
17.113035035197022


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
8,/organization/004-technologies,004 Technologies,http://004gmbh.de/en/004-interact,|Software|,Software,,operating,USA,IL,"Springfield, Illinois",Champaign,1,2010-01-01,2010-01,2010-Q1,2010.0,2014-07-24,2014-07-24
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
12,/organization/1-800-dentist,1-800-DENTIST,http://www.1800dentist.com,|Health and Wellness|,Health and Wellness,,operating,USA,CA,Los Angeles,Los Angeles,1,1986-01-01,1986-01,1986-Q1,1986.0,2010-08-19,2010-08-19
14,/organization/1-618-technology,1.618 Technology,,|Real Estate|,Real Estate,,operating,USA,FL,Orlando,Orlando,1,2013-12-07,2013-12,2013-Q4,2013.0,2014-01-22,2014-01-22
19,/organization/1000jobboersen-de,1000jobboersen.de,http://www.1000jobboersen.de,|Curated Web|,Curated Web,,operating,DEU,,Berlin,Berlin,1,,,,,2011-09-16,2011-09-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49411,/organization/zygo,Zygo Communications,http://www.zygocommunications.com/who,|SMS|Mobile|,Mobile,,closed,GBR,,London,London,1,2005-06-21,2005-06,2005-Q2,2005.0,2007-11-19,2007-11-19
49418,/organization/zymergen,Zymergen,http://www.zymergen.com,|Biotechnology|,Biotechnology,,operating,USA,CA,SF Bay Area,San Francisco,1,2013-01-01,2013-01,2013-Q1,2013.0,2014-01-01,2014-01-01
49421,/organization/zympi,Zympi,http://www.zympi.com,|Networking|Web Hosting|Mobile|,Web Hosting,,operating,,,,,1,2010-09-03,2010-09,2010-Q3,2010.0,2011-11-22,2011-11-22
49422,/organization/zymr-inc-,"Zymr, Inc.",http://www.zymr.com,|Open Source|Network Security|Networking|Virtu...,Testing,,operating,USA,CA,SF Bay Area,Santa Clara,1,,,,,2012-12-12,2012-12-12





STATUS
VALUE COUNTS
operating    41829
acquired      3692
closed        2603
Name: status, dtype: int64


DUPLICATE COUNTS
49434


NULL COUNTS AND PERCENTAGE
1314
2.6579820373816654


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
5,/organization/club-domains,.Club Domains,http://nic.club/,|Software|,Software,7000000.0,,USA,FL,Ft. Lauderdale,Oakland Park,1,2011-10-10,2011-10,2011-Q4,2011.0,2013-05-31,2013-05-31
67,/organization/1c-company,1C Company,http://1c.ru/eng,|Video Games|Games|Software|,Software,200000000.0,,RUS,,Moscow,Moscow,1,1991-01-01,1991-01,1991-Q1,1991.0,2011-10-03,2011-10-03
89,/organization/1st-merchant-funding,1st Merchant Funding,http://www.1stmerchantfunding.com/,|Financial Services|,Financial Services,10000000.0,,USA,FL,Miami,Miami,1,2007-01-01,2007-01,2007-Q1,2007.0,2014-05-07,2014-05-07
121,/organization/24h00,24h00,http://www.boosket.com/,,,,,FRA,,Paris,Paris,1,2006-01-01,2006-01,2006-Q1,2006.0,2006-08-01,2006-08-01
238,/organization/3nder,3nder,http://www.3nderapp.com,|iOS|Online Dating|Mobile|Location Based Servi...,Social Media,,,,,,,1,2014-02-18,2014-02,2014-Q1,2014.0,2014-05-09,2014-05-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49175,/organization/zingaya,Zingaya,http://www.zingaya.com,|Customer Service|E-Commerce|VoIP|Telecommunic...,Messaging,1150000.0,,USA,CA,SF Bay Area,Palo Alto,1,2010-06-01,2010-06,2010-Q2,2010.0,2011-10-26,2011-10-26
49248,/organization/tyrosine-pharmaceuticals,Zocere,http://zocere.com/,|Biotechnology|,Biotechnology,100000.0,,,,,,1,2012-01-01,2012-01,2012-Q1,2012.0,2013-09-25,2013-09-25
49288,/organization/zoojoo-be,zoojoo.BE,http://www.zoojoo.be,|Software|,Software,,,IND,,Bangalore,Bangalore,1,2012-01-01,2012-01,2012-Q1,2012.0,2014-05-15,2014-05-15
49289,/organization/zookal,Zookal,http://www.zookal.com,|Education|Textbooks|E-Commerce|,E-Commerce,2060000.0,,AUS,,Sydney,Sydney,3,2011-03-01,2011-03,2011-Q1,2011.0,2011-12-01,2013-09-30





COUNTRY_CODE
VALUE COUNTS
USA    28793
GBR     2642
CAN     1405
CHN     1239
DEU      968
       ...  
ALB        1
MOZ        1
LIE        1
BRN        1
MAF        1
Name: country_code, Length: 115, dtype: int64


DUPLICATE COUNTS
49322


NULL COUNTS AND PERCENTAGE
5273
10.66631604498746


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
7,/organization/0-6-com,0-6.com,http://www.0-6.com,|Curated Web|,Curated Web,2000000.0,operating,,,,,1,2007-01-01,2007-01,2007-Q1,2007.0,2008-03-19,2008-03-19
18,/organization/1000-markets,1000 Markets,http://www.1000markets.com,|Marketplaces|Art|E-Commerce|,Marketplaces,500000.0,acquired,,,,,1,2009-01-01,2009-01,2009-Q1,2009.0,2009-05-15,2009-05-15
24,/organization/100du-tv,100du.tv,http://www.100du.com,|Hospitality|,Hospitality,3000000.0,operating,,,,,2,,,,,2008-01-07,2010-08-13
31,/organization/10sec,10sec,http://10s.ec/,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,1600000.0,operating,,,,,2,2013-07-08,2013-07,2013-Q3,2013.0,2013-10-31,2014-05-08
36,/organization/115-network-disks,115 network disks,http://www.115.com/,|Education|,Education,,operating,,,,,1,,,,,2011-11-01,2011-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49389,/organization/zursh-2,Zursh,http://www.zursh.com/,,,25000.0,operating,,,,,1,,,,,2014-05-01,2014-05-01
49397,/organization/zuzher,Zuzher,http://www.zuzher.com/,,,15698.0,operating,,,,,1,2012-05-01,2012-05,2012-Q2,2012.0,2012-08-01,2012-08-01
49407,/organization/zwoor-com,zwoor.com,http://www.zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,100000.0,operating,,,,,1,2010-01-01,2010-01,2010-Q1,2010.0,2011-01-01,2011-01-01
49421,/organization/zympi,Zympi,http://www.zympi.com,|Networking|Web Hosting|Mobile|,Web Hosting,,operating,,,,,1,2010-09-03,2010-09,2010-Q3,2010.0,2011-11-22,2011-11-22





STATE_CODE
VALUE COUNTS
CA    9917
NY    2914
MA    1969
TX    1466
WA     974
      ... 
MB      13
AK      12
NB       8
SK       4
PE       2
Name: state_code, Length: 61, dtype: int64


DUPLICATE COUNTS
49376


NULL COUNTS AND PERCENTAGE
19277
38.993850635164655


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
2,/organization/rock-your-paper,'Rock' Your Paper,http://www.rockyourpaper.org,|Publishing|Education|,Publishing,40000.0,operating,EST,,Tallinn,Tallinn,1,2012-10-26,2012-10,2012-Q4,2012.0,2012-08-09,2012-08-09
3,/organization/in-touch-network,(In)Touch Network,http://www.InTouchNetwork.com,|Electronics|Guides|Coffee|Restaurants|Music|i...,Electronics,1500000.0,operating,GBR,,London,London,1,2011-04-01,2011-04,2011-Q2,2011.0,2011-04-01,2011-04-01
6,/organization/fox-networks,.Fox Networks,http://www.dotfox.com,|Advertising|,Advertising,4912393.0,closed,ARG,,Buenos Aires,Buenos Aires,1,,,,,2007-01-16,2007-01-16
7,/organization/0-6-com,0-6.com,http://www.0-6.com,|Curated Web|,Curated Web,2000000.0,operating,,,,,1,2007-01-01,2007-01,2007-Q1,2007.0,2008-03-19,2008-03-19
9,/organization/01games-technology,01Games Technology,http://www.01games.hk/,|Games|,Games,41250.0,operating,HKG,,Hong Kong,Hong Kong,1,,,,,2014-07-01,2014-07-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49432,/organization/zytoprotec,Zytoprotec,http://www.zytoprotec.com,|Biotechnology|,Biotechnology,2686600.0,operating,AUT,,Vienna,Gerasdorf Bei Wien,1,2007-01-01,2007-01,2007-Q1,2007.0,2013-01-29,2013-01-29
49433,/organization/zzish,Zzish,http://www.zzish.com,|Analytics|Gamification|Developer APIs|iOS|And...,Education,320000.0,operating,GBR,,London,London,1,2013-01-28,2013-01,2013-Q1,2013.0,2014-03-24,2014-03-24
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01
49435,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,http://www.zzzzapp.com,|Web Development|Advertising|Wireless|Mobile|,Web Development,97398.0,operating,HRV,,Split,Split,5,2012-05-13,2012-05,2012-Q2,2012.0,2011-11-01,2014-09-10





REGION
VALUE COUNTS
SF Bay Area        6804
New York City      2577
Boston             1837
London             1588
Los Angeles        1389
                   ... 
Palma Del Río         1
Harbin                1
Teddington            1
Borehamwood           1
Buckinghamshire       1
Name: region, Length: 1089, dtype: int64


DUPLICATE COUNTS
48348


NULL COUNTS AND PERCENTAGE
5273
10.66631604498746


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
7,/organization/0-6-com,0-6.com,http://www.0-6.com,|Curated Web|,Curated Web,2000000.0,operating,,,,,1,2007-01-01,2007-01,2007-Q1,2007.0,2008-03-19,2008-03-19
18,/organization/1000-markets,1000 Markets,http://www.1000markets.com,|Marketplaces|Art|E-Commerce|,Marketplaces,500000.0,acquired,,,,,1,2009-01-01,2009-01,2009-Q1,2009.0,2009-05-15,2009-05-15
24,/organization/100du-tv,100du.tv,http://www.100du.com,|Hospitality|,Hospitality,3000000.0,operating,,,,,2,,,,,2008-01-07,2010-08-13
31,/organization/10sec,10sec,http://10s.ec/,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,1600000.0,operating,,,,,2,2013-07-08,2013-07,2013-Q3,2013.0,2013-10-31,2014-05-08
36,/organization/115-network-disks,115 network disks,http://www.115.com/,|Education|,Education,,operating,,,,,1,,,,,2011-11-01,2011-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49389,/organization/zursh-2,Zursh,http://www.zursh.com/,,,25000.0,operating,,,,,1,,,,,2014-05-01,2014-05-01
49397,/organization/zuzher,Zuzher,http://www.zuzher.com/,,,15698.0,operating,,,,,1,2012-05-01,2012-05,2012-Q2,2012.0,2012-08-01,2012-08-01
49407,/organization/zwoor-com,zwoor.com,http://www.zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,100000.0,operating,,,,,1,2010-01-01,2010-01,2010-Q1,2010.0,2011-01-01,2011-01-01
49421,/organization/zympi,Zympi,http://www.zympi.com,|Networking|Web Hosting|Mobile|,Web Hosting,,operating,,,,,1,2010-09-03,2010-09,2010-Q3,2010.0,2011-11-22,2011-11-22





CITY
VALUE COUNTS
San Francisco            2615
New York                 2334
London                   1257
Palo Alto                 597
Austin                    583
                         ... 
Richmond Upon Thames        1
Kunming                     1
Browns Mills                1
Paducah                     1
Damansara New Village       1
Name: city, Length: 4188, dtype: int64


DUPLICATE COUNTS
45249


NULL COUNTS AND PERCENTAGE
6116
12.37155109636702


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
7,/organization/0-6-com,0-6.com,http://www.0-6.com,|Curated Web|,Curated Web,2000000.0,operating,,,,,1,2007-01-01,2007-01,2007-Q1,2007.0,2008-03-19,2008-03-19
18,/organization/1000-markets,1000 Markets,http://www.1000markets.com,|Marketplaces|Art|E-Commerce|,Marketplaces,500000.0,acquired,,,,,1,2009-01-01,2009-01,2009-Q1,2009.0,2009-05-15,2009-05-15
24,/organization/100du-tv,100du.tv,http://www.100du.com,|Hospitality|,Hospitality,3000000.0,operating,,,,,2,,,,,2008-01-07,2010-08-13
31,/organization/10sec,10sec,http://10s.ec/,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,1600000.0,operating,,,,,2,2013-07-08,2013-07,2013-Q3,2013.0,2013-10-31,2014-05-08
36,/organization/115-network-disks,115 network disks,http://www.115.com/,|Education|,Education,,operating,,,,,1,,,,,2011-11-01,2011-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49389,/organization/zursh-2,Zursh,http://www.zursh.com/,,,25000.0,operating,,,,,1,,,,,2014-05-01,2014-05-01
49397,/organization/zuzher,Zuzher,http://www.zuzher.com/,,,15698.0,operating,,,,,1,2012-05-01,2012-05,2012-Q2,2012.0,2012-08-01,2012-08-01
49407,/organization/zwoor-com,zwoor.com,http://www.zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,100000.0,operating,,,,,1,2010-01-01,2010-01,2010-Q1,2010.0,2011-01-01,2011-01-01
49421,/organization/zympi,Zympi,http://www.zympi.com,|Networking|Web Hosting|Mobile|,Web Hosting,,operating,,,,,1,2010-09-03,2010-09,2010-Q3,2010.0,2011-11-22,2011-11-22





FUNDING_ROUNDS
VALUE COUNTS
1     32039
2      9219
3      4026
4      1997
5      1001
6       560
7       252
8       152
9        84
10       43
11       35
12       12
13        8
15        4
14        4
16        1
18        1
Name: funding_rounds, dtype: int64


DUPLICATE COUNTS
49421


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at





FOUNDED_AT
VALUE COUNTS
2012-01-01    2181
2011-01-01    2161
2010-01-01    1855
2009-01-01    1603
2013-01-01    1575
              ... 
2002-11-20       1
2008-08-26       1
2003-05-29       1
2007-01-11       1
2012-05-13       1
Name: founded_at, Length: 3369, dtype: int64


DUPLICATE COUNTS
46068


NULL COUNTS AND PERCENTAGE
10884
22.01634436443078


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
1,/organization/tv-communications,&TV Communications,http://enjoyandtv.com,|Games|,Games,4000000.0,operating,USA,CA,Los Angeles,Los Angeles,2,,,,,2010-06-04,2010-09-23
6,/organization/fox-networks,.Fox Networks,http://www.dotfox.com,|Advertising|,Advertising,4912393.0,closed,ARG,,Buenos Aires,Buenos Aires,1,,,,,2007-01-16,2007-01-16
9,/organization/01games-technology,01Games Technology,http://www.01games.hk/,|Games|,Games,41250.0,operating,HKG,,Hong Kong,Hong Kong,1,,,,,2014-07-01,2014-07-01
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
19,/organization/1000jobboersen-de,1000jobboersen.de,http://www.1000jobboersen.de,|Curated Web|,Curated Web,,operating,DEU,,Berlin,Berlin,1,,,,,2011-09-16,2011-09-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49412,/organization/zygo-corporation,Zygo Corporation,http://www.zygo.com,|Manufacturing|,Manufacturing,9000000.0,,USA,CT,CT - Other,Middlefield,1,,,,,2011-10-24,2011-10-24
49422,/organization/zymr-inc-,"Zymr, Inc.",http://www.zymr.com,|Open Source|Network Security|Networking|Virtu...,Testing,,operating,USA,CA,SF Bay Area,Santa Clara,1,,,,,2012-12-12,2012-12-12
49431,/organization/zyrra,Zyrra,http://www.zyrra.com,|E-Commerce|,E-Commerce,1510500.0,operating,USA,MA,Boston,Cambridge,4,,,,,2010-10-21,2012-10-18
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01





FOUNDED_MONTH
VALUE COUNTS
2012-01    2327
2011-01    2286
2010-01    1952
2013-01    1722
2009-01    1655
           ... 
1918-01       1
1985-04       1
1919-06       1
1989-09       1
1994-10       1
Name: founded_month, Length: 420, dtype: int64


DUPLICATE COUNTS
49017


NULL COUNTS AND PERCENTAGE
10956
22.161987215794156


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
1,/organization/tv-communications,&TV Communications,http://enjoyandtv.com,|Games|,Games,4000000.0,operating,USA,CA,Los Angeles,Los Angeles,2,,,,,2010-06-04,2010-09-23
6,/organization/fox-networks,.Fox Networks,http://www.dotfox.com,|Advertising|,Advertising,4912393.0,closed,ARG,,Buenos Aires,Buenos Aires,1,,,,,2007-01-16,2007-01-16
9,/organization/01games-technology,01Games Technology,http://www.01games.hk/,|Games|,Games,41250.0,operating,HKG,,Hong Kong,Hong Kong,1,,,,,2014-07-01,2014-07-01
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
19,/organization/1000jobboersen-de,1000jobboersen.de,http://www.1000jobboersen.de,|Curated Web|,Curated Web,,operating,DEU,,Berlin,Berlin,1,,,,,2011-09-16,2011-09-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49412,/organization/zygo-corporation,Zygo Corporation,http://www.zygo.com,|Manufacturing|,Manufacturing,9000000.0,,USA,CT,CT - Other,Middlefield,1,,,,,2011-10-24,2011-10-24
49422,/organization/zymr-inc-,"Zymr, Inc.",http://www.zymr.com,|Open Source|Network Security|Networking|Virtu...,Testing,,operating,USA,CA,SF Bay Area,Santa Clara,1,,,,,2012-12-12,2012-12-12
49431,/organization/zyrra,Zyrra,http://www.zyrra.com,|E-Commerce|,E-Commerce,1510500.0,operating,USA,MA,Boston,Cambridge,4,,,,,2010-10-21,2012-10-18
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01





FOUNDED_QUARTER
VALUE COUNTS
2012-Q1    2904
2011-Q1    2768
2010-Q1    2259
2013-Q1    2206
2009-Q1    1852
           ... 
1991-Q3       1
1986-Q4       1
1960-Q2       1
1973-Q4       1
1907-Q1       1
Name: founded_quarter, Length: 218, dtype: int64


DUPLICATE COUNTS
49219


NULL COUNTS AND PERCENTAGE
10956
22.161987215794156


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
1,/organization/tv-communications,&TV Communications,http://enjoyandtv.com,|Games|,Games,4000000.0,operating,USA,CA,Los Angeles,Los Angeles,2,,,,,2010-06-04,2010-09-23
6,/organization/fox-networks,.Fox Networks,http://www.dotfox.com,|Advertising|,Advertising,4912393.0,closed,ARG,,Buenos Aires,Buenos Aires,1,,,,,2007-01-16,2007-01-16
9,/organization/01games-technology,01Games Technology,http://www.01games.hk/,|Games|,Games,41250.0,operating,HKG,,Hong Kong,Hong Kong,1,,,,,2014-07-01,2014-07-01
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
19,/organization/1000jobboersen-de,1000jobboersen.de,http://www.1000jobboersen.de,|Curated Web|,Curated Web,,operating,DEU,,Berlin,Berlin,1,,,,,2011-09-16,2011-09-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49412,/organization/zygo-corporation,Zygo Corporation,http://www.zygo.com,|Manufacturing|,Manufacturing,9000000.0,,USA,CT,CT - Other,Middlefield,1,,,,,2011-10-24,2011-10-24
49422,/organization/zymr-inc-,"Zymr, Inc.",http://www.zymr.com,|Open Source|Network Security|Networking|Virtu...,Testing,,operating,USA,CA,SF Bay Area,Santa Clara,1,,,,,2012-12-12,2012-12-12
49431,/organization/zyrra,Zyrra,http://www.zyrra.com,|E-Commerce|,E-Commerce,1510500.0,operating,USA,MA,Boston,Cambridge,4,,,,,2010-10-21,2012-10-18
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01





FOUNDED_YEAR
VALUE COUNTS
2012.0    5211
2011.0    4905
2013.0    4044
2010.0    3773
2009.0    2979
          ... 
1943.0       1
1946.0       1
1918.0       1
1944.0       1
1907.0       1
Name: founded_year, Length: 103, dtype: int64


DUPLICATE COUNTS
49334


NULL COUNTS AND PERCENTAGE
10956
22.161987215794156


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at
1,/organization/tv-communications,&TV Communications,http://enjoyandtv.com,|Games|,Games,4000000.0,operating,USA,CA,Los Angeles,Los Angeles,2,,,,,2010-06-04,2010-09-23
6,/organization/fox-networks,.Fox Networks,http://www.dotfox.com,|Advertising|,Advertising,4912393.0,closed,ARG,,Buenos Aires,Buenos Aires,1,,,,,2007-01-16,2007-01-16
9,/organization/01games-technology,01Games Technology,http://www.01games.hk/,|Games|,Games,41250.0,operating,HKG,,Hong Kong,Hong Kong,1,,,,,2014-07-01,2014-07-01
11,/organization/1-4-all,1-4 All,,|Entertainment|Games|Software|,Software,,operating,USA,NC,NC - Other,Connellys Springs,1,,,,,2013-04-21,2013-04-21
19,/organization/1000jobboersen-de,1000jobboersen.de,http://www.1000jobboersen.de,|Curated Web|,Curated Web,,operating,DEU,,Berlin,Berlin,1,,,,,2011-09-16,2011-09-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49412,/organization/zygo-corporation,Zygo Corporation,http://www.zygo.com,|Manufacturing|,Manufacturing,9000000.0,,USA,CT,CT - Other,Middlefield,1,,,,,2011-10-24,2011-10-24
49422,/organization/zymr-inc-,"Zymr, Inc.",http://www.zymr.com,|Open Source|Network Security|Networking|Virtu...,Testing,,operating,USA,CA,SF Bay Area,Santa Clara,1,,,,,2012-12-12,2012-12-12
49431,/organization/zyrra,Zyrra,http://www.zyrra.com,|E-Commerce|,E-Commerce,1510500.0,operating,USA,MA,Boston,Cambridge,4,,,,,2010-10-21,2012-10-18
49434,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,http://www.zznode.com,|Enterprise Software|,Enterprise Software,1587301.0,operating,CHN,,Beijing,Beijing,1,,,,,2012-04-01,2012-04-01





FIRST_FUNDING_AT
VALUE COUNTS
2012-01-01    468
2013-01-01    463
2008-01-01    422
2011-01-01    392
2007-01-01    342
             ... 
2006-12-30      1
2008-07-16      1
0019-11-20      1
2006-04-09      1
2004-10-12      1
Name: first_funding_at, Length: 3914, dtype: int64


DUPLICATE COUNTS
45524


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at





LAST_FUNDING_AT
VALUE COUNTS
2013-01-01    387
2014-01-01    364
2012-01-01    348
2008-01-01    302
2011-01-01    272
             ... 
2005-09-03      1
2009-05-24      1
2010-10-02      1
1998-06-30      1
2008-07-13      1
Name: last_funding_at, Length: 3657, dtype: int64


DUPLICATE COUNTS
45781


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,permalink,name,homepage_url,category_list,market,funding_total_usd,status,country_code,state_code,region,city,funding_rounds,founded_at,founded_month,founded_quarter,founded_year,first_funding_at,last_funding_at


# Rounds Analysis and Preprocessing

In [11]:
rounds = pd.read_csv('/content/drive/My Drive/Colab Notebooks/CIS 550/rounds.csv')
rounds[' raised_amount_usd '] = rounds[' raised_amount_usd '].apply(lambda x: set_to_floats(x))
rounds = rounds.drop(columns= 'Unnamed: 16')
rounds

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/waywire,#waywire,|Entertainment|Politics|Social Media|News|,News,USA,NY,New York City,New York,/funding-round/cc409188fa2b63482bd9008f682c2efa,seed,,2012-06-30,2012-06,2012-Q2,2012.0,1750000.0
1,/organization/tv-communications,&TV Communications,|Games|,Games,USA,CA,Los Angeles,Los Angeles,/funding-round/59a3669a64e39360c2b939300bcda162,venture,,2010-09-23,2010-09,2010-Q3,2010.0,3000000.0
2,/organization/tv-communications,&TV Communications,|Games|,Games,USA,CA,Los Angeles,Los Angeles,/funding-round/86d22afc65107b6941e6c43c671ecbb8,venture,,2010-06-04,2010-06,2010-Q2,2010.0,1000000.0
3,/organization/rock-your-paper,'Rock' Your Paper,|Publishing|Education|,Publishing,EST,,Tallinn,Tallinn,/funding-round/f06b420775f7cb6c1541a9db526534bb,seed,,2012-08-09,2012-08,2012-Q3,2012.0,40000.0
4,/organization/in-touch-network,(In)Touch Network,|Electronics|Guides|Coffee|Restaurants|Music|i...,Electronics,GBR,,London,London,/funding-round/33c3f135f05d7b734b8d7b7c8ae82647,seed,,2011-04-01,2011-04,2011-Q2,2011.0,1500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83865,/organization/a-list-games,[a]list games,|Games|,Games,,,,,/funding-round/57d5459df4b227527e2edb2c1f183132,seed,,2011-11-21,2011-11,2011-Q4,2011.0,9300000.0
83866,/organization/x,[x+1],|Enterprise Software|,Enterprise Software,USA,NY,New York City,New York,/funding-round/9e17c1f6579169daac605d43926e02fb,debt_financing,,2013-04-04,2013-04,2013-Q2,2013.0,17000000.0
83867,/organization/x,[x+1],|Enterprise Software|,Enterprise Software,USA,NY,New York City,New York,/funding-round/ba12fd8e3d341db1c81b16282018ff7e,venture,A,2008-06-01,2008-06,2008-Q2,2008.0,16000000.0
83868,/organization/x,[x+1],|Enterprise Software|,Enterprise Software,USA,NY,New York City,New York,/funding-round/deb408bbd9f539af78ea69fb532ffa75,venture,B,2011-01-03,2011-01,2011-Q1,2011.0,10000000.0


## Summary Stats

In [12]:
rounds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83870 entries, 0 to 83869
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   company_permalink        83870 non-null  object 
 1   company_name             83869 non-null  object 
 2   company_category_list    79424 non-null  object 
 3   company_market           79417 non-null  object 
 4   company_country_code     77304 non-null  object 
 5   company_state_code       56497 non-null  object 
 6   company_region           77304 non-null  object 
 7   company_city             76223 non-null  object 
 8   funding_round_permalink  83870 non-null  object 
 9   funding_round_type       83870 non-null  object 
 10  funding_round_code       22870 non-null  object 
 11  funded_at                83870 non-null  object 
 12  funded_month             83860 non-null  object 
 13   funded_quarter          83860 non-null  object 
 14  funded_year           

In [13]:
rounds.describe()

Unnamed: 0,funded_year,raised_amount_usd
count,83860.0,70915.0
mean,2011.018173,9176873.0
std,2.88924,96654080.0
min,1921.0,1.0
25%,2010.0,361150.5
50%,2012.0,1700000.0
75%,2013.0,6574399.0
max,2015.0,21271940000.0


In [14]:
rounds.nunique()

company_permalink          49427
company_name               49344
company_category_list      16666
company_market               753
company_country_code         115
company_state_code            61
company_region              1089
company_city                4188
funding_round_permalink    83870
funding_round_type            13
funding_round_code             8
funded_at                   4159
funded_month                 308
 funded_quarter              130
funded_year                   39
 raised_amount_usd         16582
dtype: int64

## Analysis

In [15]:
column_names = ['company_permalink', 'company_name', 'company_category_list', 'company_market', 'company_country_code', 'company_state_code', 'company_region', 'company_city', 'funding_round_permalink', 'funding_round_type', 'funding_round_code', 'funded_at', 'funded_month', ' funded_quarter ', 'funded_year', ' raised_amount_usd ']

for col in column_names:
  print('\n\n')
  print("==========================================================================")
  print(col.upper())
  print("VALUE COUNTS")
  print(rounds[col].value_counts())
  print("\n")

  print("DUPLICATE COUNTS")
  print(rounds.duplicated(subset=col, keep='first').sum())
  print("\n")

  print("NULL COUNTS AND PERCENTAGE")
  print(rounds[col].isnull().sum())
  print(rounds[col].isnull().sum()/len(rounds)*100)
  print('\n')
  
  print("NULL ROWS")
  display(rounds[rounds[col].isnull()])




COMPANY_PERMALINK
VALUE COUNTS
/organization/solarflare               18
/organization/mobivery                 16
/organization/terascala                15
/organization/aperto-networks          15
/organization/instamed                 15
                                       ..
/organization/itsalat-international     1
/organization/itsdapper                 1
/organization/itsgoinon                 1
/organization/itsmyurls                 1
/organization/waywire                   1
Name: company_permalink, Length: 49427, dtype: int64


DUPLICATE COUNTS
34443


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





COMPANY_NAME
VALUE COUNTS
Solarflare Communications    18
Mobivery                     16
InstaMed                     15
Aperto Networks              15
Terascala                    15
                             ..
Iframe Apps                   1
iFrat Wars                    1
IfThisFits                    1
ifussss                       1
Lost Property Heaven          1
Name: company_name, Length: 49344, dtype: int64


DUPLICATE COUNTS
34525


NULL COUNTS AND PERCENTAGE
1
0.0011923214498628831


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
47686,/organization/tell-it-in,,|Startups|,Startups,,,,,/funding-round/9c987e616755a78c51a4aa67c27a2a93,seed,,2012-03-01,2012-03,2012-Q1,2012.0,25000.0





COMPANY_CATEGORY_LIST
VALUE COUNTS
|Biotechnology|                                      7429
|Software|                                           5824
|Mobile|                                             2106
|E-Commerce|                                         1978
|Clean Technology|                                   1976
                                                     ... 
|Software|Databases|Android|Mobile|Games|               1
|Health Care|Web Tools|Software|                        1
|Parenting|Social Media|Kids|Health and Wellness|       1
|Internet|Nonprofits|Software|                          1
|Entertainment|Politics|Social Media|News|              1
Name: company_category_list, Length: 16666, dtype: int64


DUPLICATE COUNTS
67203


NULL COUNTS AND PERCENTAGE
4446
5.301061166090379


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
60,/organization/12-labs,12 Labs,,,,,,,/funding-round/958803df0c565d11afba52347f7efcde,seed,,0026-11-14,,,,765000.0
76,/organization/12bis,12Bis,,,FRA,,Paris,Paris,/funding-round/19c048bab9708ca7aa465043160f59d1,seed,,2012-03-15,2012-03,2012-Q1,2012.0,130636.0
100,/organization/16lab-inc-,16Lab Inc.,,,,,,,/funding-round/2ee9967324046aa900ea572116ec9c38,seed,,2014-11-26,2014-11,2014-Q4,2014.0,
115,/organization/1d4-pty,1d4 Pty,,,NOR,,Aust-Agder,,/funding-round/3ad3787e6b62433a3e718dfe36ad133f,seed,,2012-04-11,2012-04,2012-Q2,2012.0,40000.0
132,/organization/1rp-media,1RP Media,,,USA,CA,Los Angeles,Los Angeles,/funding-round/f7400a94939ef73f70af0c52b54b31ce,seed,,2012-06-29,2012-06,2012-Q2,2012.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83705,/organization/zuffle,Zuffle,,,ITA,,ITA - Other,Prato,/funding-round/0871ea19b46655d8d109605e8b3f2330,seed,,2013-07-26,2013-07,2013-Q3,2013.0,132351.0
83746,/organization/zumodrive,zumodrive,,,,,,,/funding-round/8e802d38dafc2673e37668852603d2a4,convertible_note,,2007-09-01,2007-09,2007-Q3,2007.0,
83768,/organization/zursh-2,Zursh,,,,,,,/funding-round/c1fd05266e33d67cc540de1571ff31d3,seed,,2014-05-01,2014-05,2014-Q2,2014.0,25000.0
83777,/organization/zuzher,Zuzher,,,,,,,/funding-round/1538682f335c8aed32a7079bafa8a3a0,angel,,2012-08-01,2012-08,2012-Q3,2012.0,15698.0





COMPANY_MARKET
VALUE COUNTS
Biotechnology              7652
Software                   7632
Mobile                     3571
E-Commerce                 2959
Health Care                2778
                           ... 
New Product Development       1
Homeless Shelter              1
Insurance Companies           1
Custom Retail                 1
Rural Energy                  1
Name: company_market, Length: 753, dtype: int64


DUPLICATE COUNTS
83116


NULL COUNTS AND PERCENTAGE
4453
5.309407416239418


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
60,/organization/12-labs,12 Labs,,,,,,,/funding-round/958803df0c565d11afba52347f7efcde,seed,,0026-11-14,,,,765000.0
76,/organization/12bis,12Bis,,,FRA,,Paris,Paris,/funding-round/19c048bab9708ca7aa465043160f59d1,seed,,2012-03-15,2012-03,2012-Q1,2012.0,130636.0
100,/organization/16lab-inc-,16Lab Inc.,,,,,,,/funding-round/2ee9967324046aa900ea572116ec9c38,seed,,2014-11-26,2014-11,2014-Q4,2014.0,
115,/organization/1d4-pty,1d4 Pty,,,NOR,,Aust-Agder,,/funding-round/3ad3787e6b62433a3e718dfe36ad133f,seed,,2012-04-11,2012-04,2012-Q2,2012.0,40000.0
132,/organization/1rp-media,1RP Media,,,USA,CA,Los Angeles,Los Angeles,/funding-round/f7400a94939ef73f70af0c52b54b31ce,seed,,2012-06-29,2012-06,2012-Q2,2012.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83705,/organization/zuffle,Zuffle,,,ITA,,ITA - Other,Prato,/funding-round/0871ea19b46655d8d109605e8b3f2330,seed,,2013-07-26,2013-07,2013-Q3,2013.0,132351.0
83746,/organization/zumodrive,zumodrive,,,,,,,/funding-round/8e802d38dafc2673e37668852603d2a4,convertible_note,,2007-09-01,2007-09,2007-Q3,2007.0,
83768,/organization/zursh-2,Zursh,,,,,,,/funding-round/c1fd05266e33d67cc540de1571ff31d3,seed,,2014-05-01,2014-05,2014-Q2,2014.0,25000.0
83777,/organization/zuzher,Zuzher,,,,,,,/funding-round/1538682f335c8aed32a7079bafa8a3a0,angel,,2012-08-01,2012-08,2012-Q3,2012.0,15698.0





COMPANY_COUNTRY_CODE
VALUE COUNTS
USA    54313
GBR     4072
CAN     2229
CHN     2125
DEU     1462
       ...  
SOM        1
MOZ        1
ALB        1
SYC        1
JEY        1
Name: company_country_code, Length: 115, dtype: int64


DUPLICATE COUNTS
83754


NULL COUNTS AND PERCENTAGE
6566
7.82878263979969


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
9,/organization/0-6-com,0-6.com,|Curated Web|,Curated Web,,,,,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,2008-03-19,2008-03,2008-Q1,2008.0,2000000.0
24,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/funding-round/ce6749b6441f52ccaf38b6d5578779fd,seed,,2009-05-15,2009-05,2009-Q2,2009.0,500000.0
41,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/46a9d3228f69c036e8076f1362ac58cd,venture,C,2010-08-13,2010-08,2010-Q3,2010.0,
42,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/8797d60368bb0227f0d0ab4c72aef886,venture,A,2008-01-07,2008-01,2008-Q1,2008.0,3000000.0
51,/organization/10sec,10sec,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,,,,,/funding-round/3f842b5ec5236ed37e375b73e945ab7f,seed,,2014-05-08,2014-05,2014-Q2,2014.0,1600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83768,/organization/zursh-2,Zursh,,,,,,,/funding-round/c1fd05266e33d67cc540de1571ff31d3,seed,,2014-05-01,2014-05,2014-Q2,2014.0,25000.0
83777,/organization/zuzher,Zuzher,,,,,,,/funding-round/1538682f335c8aed32a7079bafa8a3a0,angel,,2012-08-01,2012-08,2012-Q3,2012.0,15698.0
83791,/organization/zwoor-com,zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,,,,,/funding-round/817aedacbcb0993d2a748aab3f22648f,seed,,2011-01-01,2011-01,2011-Q1,2011.0,100000.0
83825,/organization/zympi,Zympi,|Networking|Web Hosting|Mobile|,Web Hosting,,,,,/funding-round/4aaf79fabd66416d679c9830c01f31d1,equity_crowdfunding,,2011-11-22,2011-11,2011-Q4,2011.0,





COMPANY_STATE_CODE
VALUE COUNTS
CA    20173
NY     5254
MA     4421
TX     2640
WA     1928
      ...  
WY       18
MB       14
NB       11
SK        5
PE        3
Name: company_state_code, Length: 61, dtype: int64


DUPLICATE COUNTS
83808


NULL COUNTS AND PERCENTAGE
27373
32.637415047096695


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
3,/organization/rock-your-paper,'Rock' Your Paper,|Publishing|Education|,Publishing,EST,,Tallinn,Tallinn,/funding-round/f06b420775f7cb6c1541a9db526534bb,seed,,2012-08-09,2012-08,2012-Q3,2012.0,40000.0
4,/organization/in-touch-network,(In)Touch Network,|Electronics|Guides|Coffee|Restaurants|Music|i...,Electronics,GBR,,London,London,/funding-round/33c3f135f05d7b734b8d7b7c8ae82647,seed,,2011-04-01,2011-04,2011-Q2,2011.0,1500000.0
8,/organization/fox-networks,.Fox Networks,|Advertising|,Advertising,ARG,,Buenos Aires,Buenos Aires,/funding-round/69a1536a6f4506538afd7aa7241ddbe1,undisclosed,,2007-01-16,2007-01,2007-Q1,2007.0,4912393.0
9,/organization/0-6-com,0-6.com,|Curated Web|,Curated Web,,,,,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,2008-03-19,2008-03,2008-Q1,2008.0,2000000.0
11,/organization/01games-technology,01Games Technology,|Games|,Games,HKG,,Hong Kong,Hong Kong,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,seed,,2014-07-01,2014-07,2014-Q3,2014.0,41250.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83861,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/22ef2fafb4d20ac3aa4b86143dbf6c8e,seed,,2011-11-01,2011-11,2011-Q4,2011.0,7000.0
83862,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/6ba41360588bc6e3f77e9b50a0ebfafa,seed,,2013-08-01,2013-08,2013-Q3,2013.0,32165.0
83863,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/ff1aa06ed5da186c84f101549035d4ae,seed,,2013-03-19,2013-03,2013-Q1,2013.0,32360.0
83864,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/8f6d25b8ee4199e586484d817bceda05,convertible_note,,2014-01-01,2014-01,2014-Q1,2014.0,12955.0





COMPANY_REGION
VALUE COUNTS
SF Bay Area        14473
New York City       4760
Boston              4156
London              2593
Los Angeles         2451
                   ...  
Reigate                1
Andhra Pradesh         1
MOZ - Other            1
Kolbermoor             1
Buckinghamshire        1
Name: company_region, Length: 1089, dtype: int64


DUPLICATE COUNTS
82780


NULL COUNTS AND PERCENTAGE
6566
7.82878263979969


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
9,/organization/0-6-com,0-6.com,|Curated Web|,Curated Web,,,,,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,2008-03-19,2008-03,2008-Q1,2008.0,2000000.0
24,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/funding-round/ce6749b6441f52ccaf38b6d5578779fd,seed,,2009-05-15,2009-05,2009-Q2,2009.0,500000.0
41,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/46a9d3228f69c036e8076f1362ac58cd,venture,C,2010-08-13,2010-08,2010-Q3,2010.0,
42,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/8797d60368bb0227f0d0ab4c72aef886,venture,A,2008-01-07,2008-01,2008-Q1,2008.0,3000000.0
51,/organization/10sec,10sec,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,,,,,/funding-round/3f842b5ec5236ed37e375b73e945ab7f,seed,,2014-05-08,2014-05,2014-Q2,2014.0,1600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83768,/organization/zursh-2,Zursh,,,,,,,/funding-round/c1fd05266e33d67cc540de1571ff31d3,seed,,2014-05-01,2014-05,2014-Q2,2014.0,25000.0
83777,/organization/zuzher,Zuzher,,,,,,,/funding-round/1538682f335c8aed32a7079bafa8a3a0,angel,,2012-08-01,2012-08,2012-Q3,2012.0,15698.0
83791,/organization/zwoor-com,zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,,,,,/funding-round/817aedacbcb0993d2a748aab3f22648f,seed,,2011-01-01,2011-01,2011-Q1,2011.0,100000.0
83825,/organization/zympi,Zympi,|Networking|Web Hosting|Mobile|,Web Hosting,,,,,/funding-round/4aaf79fabd66416d679c9830c01f31d1,equity_crowdfunding,,2011-11-22,2011-11,2011-Q4,2011.0,





COMPANY_CITY
VALUE COUNTS
San Francisco       5397
New York            4401
London              2003
Palo Alto           1199
Cambridge           1182
                    ... 
Wylie                  1
State University       1
Coldwater              1
Skövde                 1
Moncalieri             1
Name: company_city, Length: 4188, dtype: int64


DUPLICATE COUNTS
79681


NULL COUNTS AND PERCENTAGE
7647
9.117682127101466


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
9,/organization/0-6-com,0-6.com,|Curated Web|,Curated Web,,,,,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,2008-03-19,2008-03,2008-Q1,2008.0,2000000.0
24,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/funding-round/ce6749b6441f52ccaf38b6d5578779fd,seed,,2009-05-15,2009-05,2009-Q2,2009.0,500000.0
41,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/46a9d3228f69c036e8076f1362ac58cd,venture,C,2010-08-13,2010-08,2010-Q3,2010.0,
42,/organization/100du-tv,100du.tv,|Hospitality|,Hospitality,,,,,/funding-round/8797d60368bb0227f0d0ab4c72aef886,venture,A,2008-01-07,2008-01,2008-Q1,2008.0,3000000.0
51,/organization/10sec,10sec,|Social Commerce|E-Commerce|Mobile Commerce|,Mobile Commerce,,,,,/funding-round/3f842b5ec5236ed37e375b73e945ab7f,seed,,2014-05-08,2014-05,2014-Q2,2014.0,1600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83768,/organization/zursh-2,Zursh,,,,,,,/funding-round/c1fd05266e33d67cc540de1571ff31d3,seed,,2014-05-01,2014-05,2014-Q2,2014.0,25000.0
83777,/organization/zuzher,Zuzher,,,,,,,/funding-round/1538682f335c8aed32a7079bafa8a3a0,angel,,2012-08-01,2012-08,2012-Q3,2012.0,15698.0
83791,/organization/zwoor-com,zwoor.com,|Meeting Software|Polling|Surveys|Android|iPho...,Polling,,,,,/funding-round/817aedacbcb0993d2a748aab3f22648f,seed,,2011-01-01,2011-01,2011-Q1,2011.0,100000.0
83825,/organization/zympi,Zympi,|Networking|Web Hosting|Mobile|,Web Hosting,,,,,/funding-round/4aaf79fabd66416d679c9830c01f31d1,equity_crowdfunding,,2011-11-22,2011-11,2011-Q4,2011.0,





FUNDING_ROUND_PERMALINK
VALUE COUNTS
/funding-round/cc409188fa2b63482bd9008f682c2efa    1
/funding-round/6f07f21df5f9d1632a5664e586261217    1
/funding-round/a0960d0d9ad9f88db43f465482277d20    1
/funding-round/576658dab6661974462815fb7a0c20aa    1
/funding-round/01cba1e55eb3d82da0a7e94557a96501    1
                                                  ..
/funding-round/5c9d8fd5e6f5fa1a28e3dafd6b23fae1    1
/funding-round/37c6cac8889b15cb4db311d01afac287    1
/funding-round/702499a1e2280aee7f1ada3e9da7e05c    1
/funding-round/f05c2325ba9cc43b696357079137ff04    1
/funding-round/5c5ebcc9d85c2a5f810e319c249a36e7    1
Name: funding_round_permalink, Length: 83870, dtype: int64


DUPLICATE COUNTS
0


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDING_ROUND_TYPE
VALUE COUNTS
venture                 41742
seed                    21036
debt_financing           5692
angel                    4443
undisclosed              3871
equity_crowdfunding      2256
private_equity           1828
grant                    1476
convertible_note          759
post_ipo_equity           395
product_crowdfunding      235
post_ipo_debt              82
secondary_market           55
Name: funding_round_type, dtype: int64


DUPLICATE COUNTS
83857


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDING_ROUND_CODE
VALUE COUNTS
A    11033
B     6344
C     3223
D     1462
E      576
F      190
G       38
H        4
Name: funding_round_code, dtype: int64


DUPLICATE COUNTS
83861


NULL COUNTS AND PERCENTAGE
61000
72.73160844163587


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/waywire,#waywire,|Entertainment|Politics|Social Media|News|,News,USA,NY,New York City,New York,/funding-round/cc409188fa2b63482bd9008f682c2efa,seed,,2012-06-30,2012-06,2012-Q2,2012.0,1750000.0
1,/organization/tv-communications,&TV Communications,|Games|,Games,USA,CA,Los Angeles,Los Angeles,/funding-round/59a3669a64e39360c2b939300bcda162,venture,,2010-09-23,2010-09,2010-Q3,2010.0,3000000.0
2,/organization/tv-communications,&TV Communications,|Games|,Games,USA,CA,Los Angeles,Los Angeles,/funding-round/86d22afc65107b6941e6c43c671ecbb8,venture,,2010-06-04,2010-06,2010-Q2,2010.0,1000000.0
3,/organization/rock-your-paper,'Rock' Your Paper,|Publishing|Education|,Publishing,EST,,Tallinn,Tallinn,/funding-round/f06b420775f7cb6c1541a9db526534bb,seed,,2012-08-09,2012-08,2012-Q3,2012.0,40000.0
4,/organization/in-touch-network,(In)Touch Network,|Electronics|Guides|Coffee|Restaurants|Music|i...,Electronics,GBR,,London,London,/funding-round/33c3f135f05d7b734b8d7b7c8ae82647,seed,,2011-04-01,2011-04,2011-Q2,2011.0,1500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83863,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/ff1aa06ed5da186c84f101549035d4ae,seed,,2013-03-19,2013-03,2013-Q1,2013.0,32360.0
83864,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,|Web Development|Advertising|Wireless|Mobile|,Web Development,HRV,,Split,Split,/funding-round/8f6d25b8ee4199e586484d817bceda05,convertible_note,,2014-01-01,2014-01,2014-Q1,2014.0,12955.0
83865,/organization/a-list-games,[a]list games,|Games|,Games,,,,,/funding-round/57d5459df4b227527e2edb2c1f183132,seed,,2011-11-21,2011-11,2011-Q4,2011.0,9300000.0
83866,/organization/x,[x+1],|Enterprise Software|,Enterprise Software,USA,NY,New York City,New York,/funding-round/9e17c1f6579169daac605d43926e02fb,debt_financing,,2013-04-04,2013-04,2013-Q2,2013.0,17000000.0





FUNDED_AT
VALUE COUNTS
2012-01-01    652
2013-01-01    616
2008-01-01    548
2011-01-01    511
2014-01-01    464
             ... 
2004-08-25      1
2003-12-12      1
2005-06-12      1
1990-10-31      1
2005-05-27      1
Name: funded_at, Length: 4159, dtype: int64


DUPLICATE COUNTS
79711


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDED_MONTH
VALUE COUNTS
2014-01    1889
2014-07    1592
2014-06    1590
2013-09    1571
2013-10    1570
           ... 
1996-04       1
1990-07       1
1991-10       1
1988-11       1
1992-02       1
Name: funded_month, Length: 308, dtype: int64


DUPLICATE COUNTS
83561


NULL COUNTS AND PERCENTAGE
10
0.01192321449862883


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
60,/organization/12-labs,12 Labs,,,,,,,/funding-round/958803df0c565d11afba52347f7efcde,seed,,0026-11-14,,,,765000.0
2637,/organization/agflow,AgFlow,|Software|,Software,CHE,,Geneva,Geneva,/funding-round/a4fcc1845670f82284af88690d1db2ad,seed,,0020-06-14,,,,
11563,/organization/buru-buru,Buru Buru,|Startups|Internet|Retail|Design|Art|E-Commerce|,Startups,ITA,,Firenze,Firenze,/funding-round/1e22bfd0485bd84bd78a880edd8d25a6,seed,,0019-11-20,,,,
24877,/organization/exploco,Exploco,|Adventure Travel|,Adventure Travel,AUS,,Perth,Perth,/funding-round/f5769fb890cd1ab33f70abf959d932c9,seed,,0201-01-01,,,,
50199,/organization/nubank,Nubank,|Consumer Internet|Financial Services|,Financial Services,BRA,,Sao Paulo,São Paulo,/funding-round/6aaddf226adfaa6eb7dc8497331d9a7b,seed,,0007-05-13,,,,2000000.0
54020,/organization/peoplegoal,PeopleGoal,|Enterprise Software|,Enterprise Software,,,,,/funding-round/c0990da936fc935fb4eb4dc4c9c8d129,seed,,0001-05-14,,,,
61572,/organization/rotor,Rotor,|Developer Tools|Music|Video|,Video,IRL,,Dublin,Dublin,/funding-round/4f2416bb4db59e5e585ce3d57cd906ef,convertible_note,,0029-09-14,,,,
63245,/organization/securenet-payment-systems,SecureNet Payment Systems,|Trading|Mobile Payments|Payments|E-Commerce|,Payments,USA,TX,Austin,Austin,/funding-round/d72b60bd12b826d684820c6d28957899,private_equity,,0011-11-14,,,,
64605,/organization/shopboostr,Shopboostr,|SaaS|Reviews and Recommendations|E-Commerce|,SaaS,,,,,/funding-round/52b8b119eab4ea1af360c5306526c6a9,seed,,0001-11-14,,,,
72904,/organization/the-urban-roosters,The Urban Roosters,,,,,,,/funding-round/45a194fa4e0eec9cfd60fa00a3c098cc,angel,,0001-07-14,,,,





 FUNDED_QUARTER 
VALUE COUNTS
 2014-Q1     4775
 2014-Q2     4478
 2013-Q4     4359
 2014-Q3     4354
 2013-Q3     4178
             ... 
 1960-Q4        1
 1974-Q1        1
 1995-Q3        1
 1979-Q1        1
 1921-Q3        1
Name:  funded_quarter , Length: 130, dtype: int64


DUPLICATE COUNTS
83739


NULL COUNTS AND PERCENTAGE
10
0.01192321449862883


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
60,/organization/12-labs,12 Labs,,,,,,,/funding-round/958803df0c565d11afba52347f7efcde,seed,,0026-11-14,,,,765000.0
2637,/organization/agflow,AgFlow,|Software|,Software,CHE,,Geneva,Geneva,/funding-round/a4fcc1845670f82284af88690d1db2ad,seed,,0020-06-14,,,,
11563,/organization/buru-buru,Buru Buru,|Startups|Internet|Retail|Design|Art|E-Commerce|,Startups,ITA,,Firenze,Firenze,/funding-round/1e22bfd0485bd84bd78a880edd8d25a6,seed,,0019-11-20,,,,
24877,/organization/exploco,Exploco,|Adventure Travel|,Adventure Travel,AUS,,Perth,Perth,/funding-round/f5769fb890cd1ab33f70abf959d932c9,seed,,0201-01-01,,,,
50199,/organization/nubank,Nubank,|Consumer Internet|Financial Services|,Financial Services,BRA,,Sao Paulo,São Paulo,/funding-round/6aaddf226adfaa6eb7dc8497331d9a7b,seed,,0007-05-13,,,,2000000.0
54020,/organization/peoplegoal,PeopleGoal,|Enterprise Software|,Enterprise Software,,,,,/funding-round/c0990da936fc935fb4eb4dc4c9c8d129,seed,,0001-05-14,,,,
61572,/organization/rotor,Rotor,|Developer Tools|Music|Video|,Video,IRL,,Dublin,Dublin,/funding-round/4f2416bb4db59e5e585ce3d57cd906ef,convertible_note,,0029-09-14,,,,
63245,/organization/securenet-payment-systems,SecureNet Payment Systems,|Trading|Mobile Payments|Payments|E-Commerce|,Payments,USA,TX,Austin,Austin,/funding-round/d72b60bd12b826d684820c6d28957899,private_equity,,0011-11-14,,,,
64605,/organization/shopboostr,Shopboostr,|SaaS|Reviews and Recommendations|E-Commerce|,SaaS,,,,,/funding-round/52b8b119eab4ea1af360c5306526c6a9,seed,,0001-11-14,,,,
72904,/organization/the-urban-roosters,The Urban Roosters,,,,,,,/funding-round/45a194fa4e0eec9cfd60fa00a3c098cc,angel,,0001-07-14,,,,





FUNDED_YEAR
VALUE COUNTS
2013.0    15969
2014.0    15825
2012.0    12512
2011.0    10471
2010.0     8647
2009.0     6515
2008.0     4235
2007.0     3535
2006.0     2640
2005.0     1753
2004.0      414
2000.0      266
2003.0      259
2002.0      203
2001.0      191
1999.0      176
1998.0       61
1996.0       32
1997.0       30
1995.0       21
1994.0       15
1993.0       15
1990.0       14
1992.0       12
1991.0       10
1985.0        6
1987.0        6
1986.0        4
1989.0        4
1984.0        4
1960.0        3
1982.0        3
1974.0        2
1988.0        2
1979.0        1
1973.0        1
1983.0        1
2015.0        1
1921.0        1
Name: funded_year, dtype: int64


DUPLICATE COUNTS
83830


NULL COUNTS AND PERCENTAGE
10
0.01192321449862883


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
60,/organization/12-labs,12 Labs,,,,,,,/funding-round/958803df0c565d11afba52347f7efcde,seed,,0026-11-14,,,,765000.0
2637,/organization/agflow,AgFlow,|Software|,Software,CHE,,Geneva,Geneva,/funding-round/a4fcc1845670f82284af88690d1db2ad,seed,,0020-06-14,,,,
11563,/organization/buru-buru,Buru Buru,|Startups|Internet|Retail|Design|Art|E-Commerce|,Startups,ITA,,Firenze,Firenze,/funding-round/1e22bfd0485bd84bd78a880edd8d25a6,seed,,0019-11-20,,,,
24877,/organization/exploco,Exploco,|Adventure Travel|,Adventure Travel,AUS,,Perth,Perth,/funding-round/f5769fb890cd1ab33f70abf959d932c9,seed,,0201-01-01,,,,
50199,/organization/nubank,Nubank,|Consumer Internet|Financial Services|,Financial Services,BRA,,Sao Paulo,São Paulo,/funding-round/6aaddf226adfaa6eb7dc8497331d9a7b,seed,,0007-05-13,,,,2000000.0
54020,/organization/peoplegoal,PeopleGoal,|Enterprise Software|,Enterprise Software,,,,,/funding-round/c0990da936fc935fb4eb4dc4c9c8d129,seed,,0001-05-14,,,,
61572,/organization/rotor,Rotor,|Developer Tools|Music|Video|,Video,IRL,,Dublin,Dublin,/funding-round/4f2416bb4db59e5e585ce3d57cd906ef,convertible_note,,0029-09-14,,,,
63245,/organization/securenet-payment-systems,SecureNet Payment Systems,|Trading|Mobile Payments|Payments|E-Commerce|,Payments,USA,TX,Austin,Austin,/funding-round/d72b60bd12b826d684820c6d28957899,private_equity,,0011-11-14,,,,
64605,/organization/shopboostr,Shopboostr,|SaaS|Reviews and Recommendations|E-Commerce|,SaaS,,,,,/funding-round/52b8b119eab4ea1af360c5306526c6a9,seed,,0001-11-14,,,,
72904,/organization/the-urban-roosters,The Urban Roosters,,,,,,,/funding-round/45a194fa4e0eec9cfd60fa00a3c098cc,angel,,0001-07-14,,,,





 RAISED_AMOUNT_USD 
VALUE COUNTS
1000000.0     2267
2000000.0     1691
500000.0      1660
5000000.0     1585
10000000.0    1569
              ... 
4891000.0        1
8676000.0        1
2874000.0        1
7483000.0        1
12955.0          1
Name:  raised_amount_usd , Length: 16582, dtype: int64


DUPLICATE COUNTS
67287


NULL COUNTS AND PERCENTAGE
12955
15.446524382973651


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
10,/organization/004-technologies,004 Technologies,|Software|,Software,USA,IL,"Springfield, Illinois",Champaign,/funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830,venture,,2014-07-24,2014-07,2014-Q3,2014.0,
13,/organization/1-4-all,1-4 All,|Entertainment|Games|Software|,Software,USA,NC,NC - Other,Connellys Springs,/funding-round/e97a192e13ea0ee3c4f71136b4f3ec16,equity_crowdfunding,,2013-04-21,2013-04,2013-Q2,2013.0,
14,/organization/1-800-dentist,1-800-DENTIST,|Health and Wellness|,Health and Wellness,USA,CA,Los Angeles,Los Angeles,/funding-round/5274aacc211163fc7c86539ce94bbacc,undisclosed,,2010-08-19,2010-08,2010-Q3,2010.0,
16,/organization/1-618-technology,1.618 Technology,|Real Estate|,Real Estate,USA,FL,Orlando,Orlando,/funding-round/83b8f4c7d37ecef5e001a5e953bf461a,equity_crowdfunding,,2014-01-22,2014-01,2014-Q1,2014.0,
25,/organization/1000jobboersen-de,1000jobboersen.de,|Curated Web|,Curated Web,DEU,,Berlin,Berlin,/funding-round/3407d447c84490721358e5c0eb23e3a1,venture,B,2011-09-16,2011-09,2011-Q3,2011.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83816,/organization/zymergen,Zymergen,|Biotechnology|,Biotechnology,USA,CA,SF Bay Area,San Francisco,/funding-round/6b08eaf89f9a6211f1c83494c2bf8e82,seed,,2014-01-01,2014-01,2014-Q1,2014.0,
83825,/organization/zympi,Zympi,|Networking|Web Hosting|Mobile|,Web Hosting,,,,,/funding-round/4aaf79fabd66416d679c9830c01f31d1,equity_crowdfunding,,2011-11-22,2011-11,2011-Q4,2011.0,
83826,/organization/zymr-inc-,"Zymr, Inc.",|Open Source|Network Security|Networking|Virtu...,Testing,USA,CA,SF Bay Area,Santa Clara,/funding-round/9048d1a80edeae803ed1790c9f473eec,angel,,2012-12-12,2012-12,2012-Q4,2012.0,
83827,/organization/zyncd,Zyncd,|Knowledge Management|Crowdsourcing|,Crowdsourcing,GBR,,London,London,/funding-round/5a0d321abcfef61a8bae383176ac46e7,seed,,2014-02-01,2014-02,2014-Q1,2014.0,


#Investments Analysis and Preprocessing

In [16]:
investments = pd.read_csv('/content/drive/My Drive/Colab Notebooks/CIS 550/investments.csv')
investments['raised_amount_usd'] = investments['raised_amount_usd'].apply(lambda x: set_to_floats(x))
investments

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114504,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/wendy-lea,Wendy Lea,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0


##Summary Stats

In [17]:
investments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114506 entries, 0 to 114505
Data columns (total 24 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   company_permalink        114506 non-null  object 
 1   company_name             114505 non-null  object 
 2   company_category_list    111242 non-null  object 
 3   company_market           111240 non-null  object 
 4   company_country_code     107147 non-null  object 
 5   company_state_code       79158 non-null   object 
 6   company_region           107147 non-null  object 
 7   company_city             105801 non-null  object 
 8   investor_permalink       114440 non-null  object 
 9   investor_name            114440 non-null  object 
 10  investor_category_list   30507 non-null   object 
 11  investor_market          30455 non-null   object 
 12  investor_country_code    86521 non-null   object 
 13  investor_state_code      62274 non-null   object 
 14  inve

In [18]:
investments.describe()

Unnamed: 0,funded_year,raised_amount_usd
count,114506.0,101091.0
mean,2010.686174,12671400.0
std,3.065592,47254290.0
min,1921.0,14.0
25%,2009.0,1200000.0
50%,2012.0,4600000.0
75%,2013.0,13000000.0
max,2014.0,5800000000.0


In [19]:
investments.nunique()

company_permalink          32335
company_name               32284
company_category_list      11874
company_market               712
company_country_code         101
company_state_code            61
company_region               965
company_city                2901
investor_permalink         22276
investor_name              22037
investor_category_list      1160
investor_market              272
investor_country_code         85
investor_state_code           59
investor_region              589
investor_city               1381
funding_round_permalink    50280
funding_round_type            13
funding_round_code             8
funded_at                   3923
funded_month                 300
funded_quarter               123
funded_year                   36
raised_amount_usd           7452
dtype: int64

##Analysis

In [20]:
column_names = ['company_permalink', 'company_name', 'company_category_list', 'company_market', 'company_country_code', 'company_state_code', 'company_region', 'company_city', 'investor_permalink', 'investor_name', 'investor_category_list', 'investor_market', 'investor_country_code','investor_state_code', 'investor_region', 'investor_city', 'funding_round_permalink', 'funding_round_type', 'funding_round_code', 'funded_at', 'funded_month', 'funded_quarter', 'funded_year', 'raised_amount_usd']

for col in column_names:
  print('\n\n')
  print("==========================================================================")
  print(col.upper())
  print("VALUE COUNTS")
  print(investments[col].value_counts())
  print("\n")

  print("DUPLICATE COUNTS")
  print(investments.duplicated(subset=col, keep='first').sum())
  print("\n")

  print("NULL COUNTS AND PERCENTAGE")
  print(investments[col].isnull().sum())
  print(investments[col].isnull().sum()/len(investments)*100)
  print('\n')
  
  print("NULL ROWS")
  display(investments[investments[col].isnull()])




COMPANY_PERMALINK
VALUE COUNTS
/organization/fab-com            60
/organization/ecomom             59
/organization/cardiodx           57
/organization/practice-fusion    55
/organization/path               53
                                 ..
/organization/reaqua-systems      1
/organization/bindhq              1
/organization/godigex             1
/organization/dimers-lab          1
/organization/vonjour-com         1
Name: company_permalink, Length: 32335, dtype: int64


DUPLICATE COUNTS
82171


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





COMPANY_NAME
VALUE COUNTS
Fab                60
ecomom             59
CardioDx           57
Practice Fusion    55
Path               53
                   ..
Startlocal          1
Game Cooks          1
Yoink Games         1
Zeto                1
Vonjour             1
Name: company_name, Length: 32284, dtype: int64


DUPLICATE COUNTS
82221


NULL COUNTS AND PERCENTAGE
1
0.0008733166820952613


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
59891,/organization/tell-it-in,,|Startups|,Startups,,,,,/organization/portland-seed-fund,Portland Seed Fund,...,"Portland, Oregon",Portland,/funding-round/9c987e616755a78c51a4aa67c27a2a93,seed,,2012-03-01,2012-03,2012-Q1,2012,25000.0





COMPANY_CATEGORY_LIST
VALUE COUNTS
|Biotechnology|                                                      7762
|Software|                                                           7224
|Mobile|                                                             2979
|Enterprise Software|                                                2710
|E-Commerce|                                                         2616
                                                                     ... 
|Consumer Goods|Beauty|Health Care|                                     1
|Security|Lighting|Semiconductors|Clean Technology|                     1
|Discounts|Location Based Services|Mobile|Messaging|SMS|Software|       1
|Hospitality|Travel|                                                    1
|Services|Wireless|Telecommunications|                                  1
Name: company_category_list, Length: 11874, dtype: int64


DUPLICATE COUNTS
102631


NULL COUNTS AND PERCENTAGE
3264
2.850505650358933


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
5,/organization/laborat-rios-noli,Laboratórios Noli,,,BRA,,Porto Alegre,Porto Alegre,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/5bdff09243b72f7fb325146259880e93,venture,A,1982-12-01,1982-12,1982-Q4,1982,155000.0
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
7,/organization/polimax,Polimax,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/b046f5d59bc7a5bc86ddfb69c70adf77,venture,A,1983-06-01,1983-06,1983-Q2,1983,94000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114438,/organization/ascendis-pharma,Ascendis Pharma,,,USA,CA,SF Bay Area,Palo Alto,/organization/venrock,Venrock,...,SF Bay Area,Palo Alto,/funding-round/caf8476f16c2266ac347376454ec61f5,venture,D,2014-12-01,2014-12,2014-Q4,2014,60000000.0
114439,/organization/ascendis-pharma,Ascendis Pharma,,,USA,CA,SF Bay Area,Palo Alto,/organization/vivo-capital,Vivo Capital,...,,,/funding-round/caf8476f16c2266ac347376454ec61f5,venture,D,2014-12-01,2014-12,2014-Q4,2014,60000000.0
114479,/organization/xeltis,Xeltis,,,CHE,,Zurich,Zürich,/organization/kurma-life-science-partners,Kurma Life Science Partners,...,,,/funding-round/03dd9b7cfc4a45a1bd6903143423f207,venture,B,2014-12-02,2014-12,2014-Q4,2014,34000000.0
114480,/organization/xeltis,Xeltis,,,CHE,,Zurich,Zürich,/organization/life-sciences-partners,Life Sciences Partners,...,Amsterdam,Amsterdam,/funding-round/03dd9b7cfc4a45a1bd6903143423f207,venture,B,2014-12-02,2014-12,2014-Q4,2014,34000000.0





COMPANY_MARKET
VALUE COUNTS
Software               9906
Biotechnology          8164
Mobile                 5282
E-Commerce             4292
Enterprise Software    4128
                       ... 
Direct Sales              1
Minerals                  1
Music Venues              1
Sex                       1
Civil Engineers           1
Name: company_market, Length: 712, dtype: int64


DUPLICATE COUNTS
113793


NULL COUNTS AND PERCENTAGE
3266
2.8522522837231237


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
5,/organization/laborat-rios-noli,Laboratórios Noli,,,BRA,,Porto Alegre,Porto Alegre,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/5bdff09243b72f7fb325146259880e93,venture,A,1982-12-01,1982-12,1982-Q4,1982,155000.0
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
7,/organization/polimax,Polimax,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/b046f5d59bc7a5bc86ddfb69c70adf77,venture,A,1983-06-01,1983-06,1983-Q2,1983,94000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114438,/organization/ascendis-pharma,Ascendis Pharma,,,USA,CA,SF Bay Area,Palo Alto,/organization/venrock,Venrock,...,SF Bay Area,Palo Alto,/funding-round/caf8476f16c2266ac347376454ec61f5,venture,D,2014-12-01,2014-12,2014-Q4,2014,60000000.0
114439,/organization/ascendis-pharma,Ascendis Pharma,,,USA,CA,SF Bay Area,Palo Alto,/organization/vivo-capital,Vivo Capital,...,,,/funding-round/caf8476f16c2266ac347376454ec61f5,venture,D,2014-12-01,2014-12,2014-Q4,2014,60000000.0
114479,/organization/xeltis,Xeltis,,,CHE,,Zurich,Zürich,/organization/kurma-life-science-partners,Kurma Life Science Partners,...,,,/funding-round/03dd9b7cfc4a45a1bd6903143423f207,venture,B,2014-12-02,2014-12,2014-Q4,2014,34000000.0
114480,/organization/xeltis,Xeltis,,,CHE,,Zurich,Zürich,/organization/life-sciences-partners,Life Sciences Partners,...,Amsterdam,Amsterdam,/funding-round/03dd9b7cfc4a45a1bd6903143423f207,venture,B,2014-12-02,2014-12,2014-Q4,2014,34000000.0





COMPANY_COUNTRY_CODE
VALUE COUNTS
USA    76523
GBR     5933
CHN     2846
CAN     2667
DEU     2488
       ...  
LAO        1
OMN        1
MOZ        1
ZWE        1
BRN        1
Name: company_country_code, Length: 101, dtype: int64


DUPLICATE COUNTS
114404


NULL COUNTS AND PERCENTAGE
7359
6.426737463539029


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
7,/organization/polimax,Polimax,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/b046f5d59bc7a5bc86ddfb69c70adf77,venture,A,1983-06-01,1983-06,1983-Q2,1983,94000.0
9,/organization/mixertech,Mixertech,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/4316813c5da6e06f2a15fa25c7946a73,venture,A,1984-05-01,1984-05,1984-Q2,1984,
16,/organization/medaphis-physician-services-corp...,Medaphis Physician Services Corporation,|Health Care|,Health Care,,,,,/organization/summit-partners,Summit Partners,...,Boston,Boston,/funding-round/679e8e54c0e43298f7692bd8e3caed1e,undisclosed,,1986-05-30,1986-05,1986-Q2,1986,
21,/organization/nhk-world,NHK World,|News|,News,,,,,/organization/itochu-corporation,ITOCHU Corporation,...,Tokyo,Tokyo,/funding-round/bdae954da8883f9b4dab3dce9a62c5ed,venture,D,1987-06-16,1987-06,1987-Q2,1987,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114409,/organization/appintop,AppInTop,,,,,,,/organization/run-capital-investment-fund,Run Capital Investment Fund,...,,,/funding-round/00fda78c1c7c938c4ea3a2fb262cc812,venture,A,2014-11-28,2014-11,2014-Q4,2014,6000000.0
114422,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/caixa-capital,Caixa Capital Risc,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114429,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/inveready-technology-investment-...,Inveready Technology Investment Group,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114437,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/the-crowd-angel,The Crowd Angel,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0





COMPANY_STATE_CODE
VALUE COUNTS
CA    39101
NY     8934
MA     6540
WA     2481
TX     2436
      ...  
PE        5
SK        4
SD        4
AK        2
WY        1
Name: company_state_code, Length: 61, dtype: int64


DUPLICATE COUNTS
114444


NULL COUNTS AND PERCENTAGE
35348
30.869998078703297


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
5,/organization/laborat-rios-noli,Laboratórios Noli,,,BRA,,Porto Alegre,Porto Alegre,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/5bdff09243b72f7fb325146259880e93,venture,A,1982-12-01,1982-12,1982-Q4,1982,155000.0
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114496,/organization/veniam,Veniam,|Communications Infrastructure|Wireless|,Wireless,PRT,,Porto,Porto,/organization/union-square-ventures,Union Square Ventures,...,New York City,New York,/funding-round/e5e418907f0047bdc23e4b53440abc7e,venture,A,2014-12-02,2014-12,2014-Q4,2014,4900000.0
114499,/organization/xeltis,Xeltis,,,CHE,,Zurich,Zürich,/organization/vi-partners,VI Partners,...,Zurich,Zug,/funding-round/03dd9b7cfc4a45a1bd6903143423f207,venture,B,2014-12-02,2014-12,2014-Q4,2014,34000000.0
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,





COMPANY_REGION
VALUE COUNTS
SF Bay Area      32120
New York City     8604
Boston            6286
London            3860
Los Angeles       3674
                 ...  
Gilching             1
Tarragona            1
Borehamwood          1
Belgaum              1
Kathmandu            1
Name: company_region, Length: 965, dtype: int64


DUPLICATE COUNTS
113540


NULL COUNTS AND PERCENTAGE
7359
6.426737463539029


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
7,/organization/polimax,Polimax,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/b046f5d59bc7a5bc86ddfb69c70adf77,venture,A,1983-06-01,1983-06,1983-Q2,1983,94000.0
9,/organization/mixertech,Mixertech,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/4316813c5da6e06f2a15fa25c7946a73,venture,A,1984-05-01,1984-05,1984-Q2,1984,
16,/organization/medaphis-physician-services-corp...,Medaphis Physician Services Corporation,|Health Care|,Health Care,,,,,/organization/summit-partners,Summit Partners,...,Boston,Boston,/funding-round/679e8e54c0e43298f7692bd8e3caed1e,undisclosed,,1986-05-30,1986-05,1986-Q2,1986,
21,/organization/nhk-world,NHK World,|News|,News,,,,,/organization/itochu-corporation,ITOCHU Corporation,...,Tokyo,Tokyo,/funding-round/bdae954da8883f9b4dab3dce9a62c5ed,venture,D,1987-06-16,1987-06,1987-Q2,1987,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114409,/organization/appintop,AppInTop,,,,,,,/organization/run-capital-investment-fund,Run Capital Investment Fund,...,,,/funding-round/00fda78c1c7c938c4ea3a2fb262cc812,venture,A,2014-11-28,2014-11,2014-Q4,2014,6000000.0
114422,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/caixa-capital,Caixa Capital Risc,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114429,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/inveready-technology-investment-...,Inveready Technology Investment Group,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114437,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/the-crowd-angel,The Crowd Angel,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0





COMPANY_CITY
VALUE COUNTS
San Francisco         14287
New York               8140
London                 2861
Mountain View          2601
Palo Alto              2520
                      ...  
Cuautitlán Izcalli        1
Scarborough               1
Aschheim                  1
Mapleton                  1
Asbury Park               1
Name: company_city, Length: 2901, dtype: int64


DUPLICATE COUNTS
111604


NULL COUNTS AND PERCENTAGE
8705
7.60222171763925


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
6,/organization/indelsul,Indelsul,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/48e8db0d90f95934831603622cb3f46a,venture,A,1982-12-01,1982-12,1982-Q4,1982,165000.0
7,/organization/polimax,Polimax,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/b046f5d59bc7a5bc86ddfb69c70adf77,venture,A,1983-06-01,1983-06,1983-Q2,1983,94000.0
9,/organization/mixertech,Mixertech,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/4316813c5da6e06f2a15fa25c7946a73,venture,A,1984-05-01,1984-05,1984-Q2,1984,
12,/organization/curtume-er,Curtume Erê,,,BRA,,BRA - Other,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/180c9ab38e64d3abd4bfadd291519475,venture,A,1985-06-01,1985-06,1985-Q2,1985,169000.0
16,/organization/medaphis-physician-services-corp...,Medaphis Physician Services Corporation,|Health Care|,Health Care,,,,,/organization/summit-partners,Summit Partners,...,Boston,Boston,/funding-round/679e8e54c0e43298f7692bd8e3caed1e,undisclosed,,1986-05-30,1986-05,1986-Q2,1986,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114422,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/caixa-capital,Caixa Capital Risc,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114429,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/inveready-technology-investment-...,Inveready Technology Investment Group,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0
114430,/organization/grid4c,Grid4C,,,ISR,,ISR - Other,,/organization/israel-cleantech-ventures,Israel Cleantech Ventures,...,Tel Aviv,Ramat Hasharon,/funding-round/5fcdc713477ba78741e770b3118f54c6,venture,,2014-12-01,2014-12,2014-Q4,2014,1500000.0
114437,/organization/a-crowd-of-monsters,A Crowd of Monsters,|Games|,Games,,,,,/organization/the-crowd-angel,The Crowd Angel,...,Barcelona,Barcelona,/funding-round/56ef745ff01198e5e9d4ae18b808ce13,venture,,2014-12-01,2014-12,2014-Q4,2014,560248.0





INVESTOR_PERMALINK
VALUE COUNTS
/organization/sequoia-capital    776
/organization/start-up-chile     702
/organization/500-startups       694
/organization/intel-capital      674
/organization/y-combinator       624
                                ... 
/organization/match                1
/person/jonas-brandon              1
/person/matt-wyndowe               1
/organization/ern-capital          1
/person/trevor-wright              1
Name: investor_permalink, Length: 22276, dtype: int64


DUPLICATE COUNTS
92229


NULL COUNTS AND PERCENTAGE
66
0.05763890101828725


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
628,/organization/goldpocket-interactive,Goldpocket Interactive,|Television|Software|,Software,USA,CA,Los Angeles,Los Angeles,,,...,,,/funding-round/fe5ce7fc6e13da8ca7883dd1482e7a97,venture,B,2000-03-30,2000-03,2000-Q1,2000,43000000.0
12727,/organization/bmeye,BMEYE,|Health Care|,Health Care,NLD,,Amsterdam,Amsterdam,,,...,,,/funding-round/7cd40465092578ef9bf1a74770db0dec,venture,A,2006-12-31,2006-12,2006-Q4,2006,
40424,/organization/moon-express-inc,"Moon Express, Inc.",|Space Travel|,Space Travel,USA,CA,CA - Other,Moffett Field,,,...,,,/funding-round/e16c20bf7a1f75115bf0d2764cb4f792,seed,,2010-09-01,2010-09,2010-Q3,2010,2500000.0
43781,/organization/koudai,Koudai,|E-Commerce|,E-Commerce,CHN,,Beijing,Beijing,,,...,,,/funding-round/60127514c25d418fed079a9b4ad75776,venture,A,2011-01-01,2011-01,2011-Q1,2011,12000000.0
45072,/organization/unbooked-ltd,Unbooked Ltd,|Enterprises|Business Services|Beauty|Health a...,Enterprises,USA,CA,SF Bay Area,San Francisco,,,...,,,/funding-round/79aa066f34d45045d21be10accda9dbb,angel,,2011-02-01,2011-02,2011-Q1,2011,500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113281,/organization/emaze,emaze,|Technology|Design|Internet|Presentations|Soft...,Technology,ISR,,Tel Aviv,Tel Aviv,,,...,,,/funding-round/6ba23aab1c18a20700597a99f95bafe3,venture,A,2014-11-03,2014-11,2014-Q4,2014,2000000.0
113282,/organization/social-reality,Social Reality,|Social Media Advertising|Social Media Marketi...,Facebook Applications,USA,CA,Los Angeles,Los Angeles,,,...,,,/funding-round/7e452b8b63ac6a508ab30ad3b513f363,venture,,2014-11-03,2014-11,2014-Q4,2014,5000000.0
113283,/organization/visual-software-systems-ltd,Visual Software Systems Ltd.,|Software|,Software,GBR,,Camberley,Camberley,,,...,,,/funding-round/da53165180ea91afd808bcfa9ad20a41,venture,A,2014-11-03,2014-11,2014-Q4,2014,2000000.0
113328,/organization/nanolive,Nanolive,,,CHE,,Ecublens,Ecublens,,,...,,,/funding-round/e732b70e579f61fd973820ea9e348d7d,venture,,2014-11-04,2014-11,2014-Q4,2014,2800205.0





INVESTOR_NAME
VALUE COUNTS
Sequoia Capital              776
Start-Up Chile               702
500 Startups                 694
Intel Capital                674
Y Combinator                 625
                            ... 
AVC                            1
Rheinland Venture Capital      1
Haresh Chawla                  1
departure                      1
Trevor Wright                  1
Name: investor_name, Length: 22037, dtype: int64


DUPLICATE COUNTS
92468


NULL COUNTS AND PERCENTAGE
66
0.05763890101828725


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
628,/organization/goldpocket-interactive,Goldpocket Interactive,|Television|Software|,Software,USA,CA,Los Angeles,Los Angeles,,,...,,,/funding-round/fe5ce7fc6e13da8ca7883dd1482e7a97,venture,B,2000-03-30,2000-03,2000-Q1,2000,43000000.0
12727,/organization/bmeye,BMEYE,|Health Care|,Health Care,NLD,,Amsterdam,Amsterdam,,,...,,,/funding-round/7cd40465092578ef9bf1a74770db0dec,venture,A,2006-12-31,2006-12,2006-Q4,2006,
40424,/organization/moon-express-inc,"Moon Express, Inc.",|Space Travel|,Space Travel,USA,CA,CA - Other,Moffett Field,,,...,,,/funding-round/e16c20bf7a1f75115bf0d2764cb4f792,seed,,2010-09-01,2010-09,2010-Q3,2010,2500000.0
43781,/organization/koudai,Koudai,|E-Commerce|,E-Commerce,CHN,,Beijing,Beijing,,,...,,,/funding-round/60127514c25d418fed079a9b4ad75776,venture,A,2011-01-01,2011-01,2011-Q1,2011,12000000.0
45072,/organization/unbooked-ltd,Unbooked Ltd,|Enterprises|Business Services|Beauty|Health a...,Enterprises,USA,CA,SF Bay Area,San Francisco,,,...,,,/funding-round/79aa066f34d45045d21be10accda9dbb,angel,,2011-02-01,2011-02,2011-Q1,2011,500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113281,/organization/emaze,emaze,|Technology|Design|Internet|Presentations|Soft...,Technology,ISR,,Tel Aviv,Tel Aviv,,,...,,,/funding-round/6ba23aab1c18a20700597a99f95bafe3,venture,A,2014-11-03,2014-11,2014-Q4,2014,2000000.0
113282,/organization/social-reality,Social Reality,|Social Media Advertising|Social Media Marketi...,Facebook Applications,USA,CA,Los Angeles,Los Angeles,,,...,,,/funding-round/7e452b8b63ac6a508ab30ad3b513f363,venture,,2014-11-03,2014-11,2014-Q4,2014,5000000.0
113283,/organization/visual-software-systems-ltd,Visual Software Systems Ltd.,|Software|,Software,GBR,,Camberley,Camberley,,,...,,,/funding-round/da53165180ea91afd808bcfa9ad20a41,venture,A,2014-11-03,2014-11,2014-Q4,2014,2000000.0
113328,/organization/nanolive,Nanolive,,,CHE,,Ecublens,Ecublens,,,...,,,/funding-round/e732b70e579f61fd973820ea9e348d7d,venture,,2014-11-04,2014-11,2014-Q4,2014,2800205.0





INVESTOR_CATEGORY_LIST
VALUE COUNTS
|Venture Capital|                                                       5760
|Finance|                                                               4069
|Finance|Venture Capital|                                               1598
|Investment Management|                                                 1324
|Venture Capital|Finance|                                               1112
                                                                        ... 
|Entrepreneur|Advertising|                                                 1
|Startups|Automotive|                                                      1
|Digital Media|Active Lifestyle|Sports|                                    1
|E-Commerce|Trading|Online Rental|Marketplaces|Internet|Real Estate|       1
|Finance|Angels|Startups|                                                  1
Name: investor_category_list, Length: 1160, dtype: int64


DUPLICATE COUNTS
113345


NULL COUNTS AND PERCENTAGE
83

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114504,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/wendy-lea,Wendy Lea,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





INVESTOR_MARKET
VALUE COUNTS
Venture Capital                9001
Finance                        7551
Investment Management          1790
Startups                       1537
Technology                     1091
                               ... 
Fraud Detection                   1
Productivity Software             1
Real Estate Investors             1
Natural Language Processing       1
Commercial Real Estate            1
Name: investor_market, Length: 272, dtype: int64


DUPLICATE COUNTS
114233


NULL COUNTS AND PERCENTAGE
84051
73.40314044678881


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114504,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/wendy-lea,Wendy Lea,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





INVESTOR_COUNTRY_CODE
VALUE COUNTS
USA    60291
GBR     4803
DEU     2158
FRA     2005
CAN     1999
       ...  
AZE        1
IRN        1
PER        1
BHS        1
GEO        1
Name: investor_country_code, Length: 85, dtype: int64


DUPLICATE COUNTS
114420


NULL COUNTS AND PERCENTAGE
27985
24.43976734843589


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114500,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/organization/vine-st-ventures,Vine St. Ventures,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





INVESTOR_STATE_CODE
VALUE COUNTS
CA    30429
NY     7398
MA     5929
IL     1657
TX     1385
PA     1374
WA     1296
CO     1046
ON      859
VA      856
CT      827
OH      792
MD      767
TN      752
NJ      637
QC      598
DC      569
NC      494
MI      448
GA      403
UT      389
BC      381
FL      368
MN      292
MO      288
OR      191
WI      191
RI      183
IN      141
NV      130
AZ      127
AL      111
NH       99
SC       88
NS       81
NE       81
AB       66
OK       55
NM       53
ID       52
KY       42
ME       42
AR       41
HI       35
KS       34
LA       32
VT       32
DE       31
IA       28
ND       17
SD       12
AK       12
MT        8
MS        6
NL        6
MB        4
WV        4
WY        3
SK        2
Name: investor_state_code, dtype: int64


DUPLICATE COUNTS
114446


NULL COUNTS AND PERCENTAGE
52232
45.615076939199696


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114504,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/wendy-lea,Wendy Lea,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





INVESTOR_REGION
VALUE COUNTS
SF Bay Area      27269
New York City     7218
Boston            5819
London            3500
Los Angeles       1799
                 ...  
Boulogne             1
Fyshwick             1
Saarbrucken          1
Logrono              1
Greensboro           1
Name: investor_region, Length: 589, dtype: int64


DUPLICATE COUNTS
113916


NULL COUNTS AND PERCENTAGE
27985
24.43976734843589


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114500,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/organization/vine-st-ventures,Vine St. Ventures,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





INVESTOR_CITY
VALUE COUNTS
Menlo Park       9962
New York         7030
San Francisco    5934
Palo Alto        5224
London           3146
                 ... 
Oakville            1
Farsund             1
Poznan              1
George Town         1
Allen Park          1
Name: investor_city, Length: 1381, dtype: int64


DUPLICATE COUNTS
113124


NULL COUNTS AND PERCENTAGE
28499
24.888652123032852


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
4,/organization/ikro,Ikro,,,BRA,,BRA - Other,Canoas,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/46c353a8249170cc4b6ab89a522fefdc,venture,A,1982-06-01,1982-06,1982-Q2,1982,724000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114500,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/organization/vine-st-ventures,Vine St. Ventures,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114501,/organization/helpling,Helpling,|Consumers|,Consumers,DEU,,Berlin,Berlin,/person/lukasz-gadowski,Lukasz Gadowski,...,,,/funding-round/681af1b330a13043b17aeac45d841b88,venture,A,2014-12-02,2014-12,2014-Q4,2014,17000000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





FUNDING_ROUND_PERMALINK
VALUE COUNTS
/funding-round/e7d7980d9469411c5ccdefb37c3912f1    38
/funding-round/ac1b0041e4d8afa6f4447acada922c5c    32
/funding-round/c128c985f9fe11016e14cc8fd0df0eb6    32
/funding-round/ba42cc33e618b150009534ffc39562db    32
/funding-round/466ba14ad502ee25a563eb70d0b27436    31
                                                   ..
/funding-round/353481efe5bfc4e5689fe2407d7abf66     1
/funding-round/639aac1c76c2fb4d1501ab6328dee8cc     1
/funding-round/d2b07c53039e025e8394fd3756424374     1
/funding-round/5858300b2e90da7dc703b1c072b3a9f4     1
/funding-round/3b06e2822fbd630d8baa1d696c6478fa     1
Name: funding_round_permalink, Length: 50280, dtype: int64


DUPLICATE COUNTS
64226


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDING_ROUND_TYPE
VALUE COUNTS
venture                 70615
seed                    29272
angel                    4894
undisclosed              4479
private_equity           2128
debt_financing           1541
grant                     721
convertible_note          547
post_ipo_equity           126
equity_crowdfunding        80
secondary_market           53
post_ipo_debt              32
product_crowdfunding       18
Name: funding_round_type, dtype: int64


DUPLICATE COUNTS
114493


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDING_ROUND_CODE
VALUE COUNTS
A    21708
B    15625
C     9709
D     4914
E     1932
F      673
G      101
H        7
Name: funding_round_code, dtype: int64


DUPLICATE COUNTS
114497


NULL COUNTS AND PERCENTAGE
59837
52.256650306534155


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
0,/organization/test-company-3,test company,,,ARE,,Dubai,Dubai,/person/jamessss-bondddd,jamessss bondddd,...,,,/funding-round/c308019016ead7afb2a1d117018eb6fc,seed,,1921-09-01,1921-09,1921-Q3,1921,1000.0
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
2,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/antonio-murroni,ANTONIO MURRONI,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
3,/organization/abo-data,ABO Data,|Enterprise Software|,Enterprise Software,USA,TX,TX - Other,Italy,/person/filippo-murroni,FILIPPO Murroni,...,,,/funding-round/809e211b969c3f66440fc15ffcd29385,seed,,1979-01-01,1979-01,1979-Q1,1979,1000000.0
8,/organization/lenovo,Lenovo,|Hardware|,Hardware,USA,NC,Raleigh,Morrisville,/organization/chinese-academy-of-sciences,Chinese Academy Of Sciences,...,Beijing,Beijing,/funding-round/0f444f3fdb0c1058aa70eeba9f0f5089,undisclosed,,1984-01-01,1984-01,1984-Q1,1984,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114500,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/organization/vine-st-ventures,Vine St. Ventures,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114503,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/trevor-wright,Trevor Wright,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0
114504,/organization/strap,Strap,|Apps|,Apps,USA,OH,Cincinnati,Cincinnati,/person/wendy-lea,Wendy Lea,...,,,/funding-round/375e6c61fec5b3854ecbe5732dfdcabe,seed,,2014-12-02,2014-12,2014-Q4,2014,1200000.0





FUNDED_AT
VALUE COUNTS
2012-01-01    795
2013-01-01    648
2011-01-01    579
2008-01-01    573
2007-01-01    540
             ... 
2004-10-30      1
2004-11-08      1
2004-11-16      1
2004-11-28      1
2014-12-20      1
Name: funded_at, Length: 3923, dtype: int64


DUPLICATE COUNTS
110583


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDED_MONTH
VALUE COUNTS
2014-01    2200
2013-09    2153
2013-10    2025
2014-06    1948
2014-07    1933
           ... 
1992-03       1
1992-02       1
1991-04       1
1991-01       1
1921-09       1
Name: funded_month, Length: 300, dtype: int64


DUPLICATE COUNTS
114206


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDED_QUARTER
VALUE COUNTS
2014-Q1    5812
2014-Q2    5703
2013-Q3    5669
2013-Q4    5512
2014-Q3    5485
           ... 
1989-Q4       1
1991-Q2       1
1991-Q4       1
1995-Q3       1
1921-Q3       1
Name: funded_quarter, Length: 123, dtype: int64


DUPLICATE COUNTS
114383


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





FUNDED_YEAR
VALUE COUNTS
2013    21041
2014    20012
2012    16583
2011    13089
2010     9708
2008     7366
2009     7031
2007     6946
2006     5735
2005     4290
2004      707
2000      455
2003      374
2002      359
2001      309
1999      254
1998       65
1997       32
1996       30
1990       19
1995       17
1994       16
1993       15
1992       12
1991       10
1987        7
1985        5
1986        4
1982        3
1989        3
1988        2
1984        2
1979        2
1974        1
1983        1
1921        1
Name: funded_year, dtype: int64


DUPLICATE COUNTS
114470


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd





RAISED_AMOUNT_USD
VALUE COUNTS
10000000.0    3385
1000000.0     3310
5000000.0     2585
2000000.0     2568
15000000.0    2229
              ... 
1118770.0        1
215049.0         1
107524.0         1
2463690.0        1
14081347.0       1
Name: raised_amount_usd, Length: 7452, dtype: int64


DUPLICATE COUNTS
107053


NULL COUNTS AND PERCENTAGE
13415
11.71554329030793


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,investor_permalink,investor_name,...,investor_region,investor_city,funding_round_permalink,funding_round_type,funding_round_code,funded_at,funded_month,funded_quarter,funded_year,raised_amount_usd
1,/organization/andrewburnett-com-ltd,AndrewBurnett.com Ltd,|Internet|SEO|Services|Public Relations|Social...,Internet,GBR,,Edinburgh,Edinburgh,/organization/ekaf,Ekaf,...,,,/funding-round/14fe2864e02d0f15ddc3ec8eacdc8e1b,seed,,1974-01-01,1974-01,1974-Q1,1974,
8,/organization/lenovo,Lenovo,|Hardware|,Hardware,USA,NC,Raleigh,Morrisville,/organization/chinese-academy-of-sciences,Chinese Academy Of Sciences,...,Beijing,Beijing,/funding-round/0f444f3fdb0c1058aa70eeba9f0f5089,undisclosed,,1984-01-01,1984-01,1984-Q1,1984,
9,/organization/mixertech,Mixertech,,,,,,,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,...,,,/funding-round/4316813c5da6e06f2a15fa25c7946a73,venture,A,1984-05-01,1984-05,1984-Q2,1984,
13,/organization/aehr-test-systems,Aehr Test Systems,|Hardware + Software|,Hardware + Software,USA,CA,SF Bay Area,Fremont,/organization/summit-partners,Summit Partners,...,Boston,Boston,/funding-round/3b02cf325be61412c2af1eb545f4d568,undisclosed,,1985-09-19,1985-09,1985-Q3,1985,
14,/organization/fsi-international,FSI International,|Semiconductors|,Semiconductors,USA,MN,Minneapolis,Chaska,/organization/summit-partners,Summit Partners,...,Boston,Boston,/funding-round/34b482e681b7e2ec7b0c7886e9f9d90d,undisclosed,,1985-10-20,1985-10,1985-Q4,1985,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114360,/organization/zendrive,Zendrive,|Automotive|,Automotive,USA,CA,SF Bay Area,San Francisco,/organization/fontinalis-partners,Fontinalis Partners,...,Detroit,Detroit,/funding-round/31204ec2abf4ddf45667d231a1b11be3,venture,,2014-11-26,2014-11,2014-Q4,2014,
114407,/organization/orori,Orori,,,IDN,,Jakarta,Jakarta,/organization/ideosource,Ideosource,...,Jakarta,Jakarta,/funding-round/d0f5ccae89755f0c35deaf80b6cfe4ac,seed,,2014-11-28,2014-11,2014-Q4,2014,
114476,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/organization/jungle-ventures,Jungle Ventures,...,Singapore,Singapore,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,
114502,/organization/appknox,Appknox,|Mobile Security|SaaS|Mobile|,Mobile Security,SGP,,Singapore,Singapore,/person/rajan-anandan,Rajan Anandan,...,,,/funding-round/bfb0b5859be2431f35882bc03370dd67,venture,,2014-12-02,2014-12,2014-Q4,2014,


#Acquisitions EDA and Preprocessing

In [21]:
acquisitions = pd.read_csv('/content/drive/My Drive/Colab Notebooks/CIS 550/acquisitions.csv')
acquisitions[' price_amount '] = acquisitions[' price_amount '].apply(lambda x: set_to_floats(x))
acquisitions

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
0,/organization/waywire,#waywire,|Entertainment|Politics|Social Media|News|,News,USA,NY,New York City,New York,/organization/magnify,Waywire Networks,...,USA,NY,New York City,New York,2013-10-17,2013-10,2013-Q4,2013.0,,USD
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
2,/organization/red,(RED),|Nonprofits|,Nonprofits,USA,NY,New York City,New York,/organization/nationstar-mortgage-holdings,Nationstar Mortgage Holdings,...,USA,TX,Dallas,Lewisville,2014-05-08,2014-05,2014-Q2,2014.0,18000000.0,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
4,/organization/co-internet,.CO,|Registrars|Domains|Curated Web|,Registrars,USA,FL,Miami,Miami,/organization/neustar,Neustar,...,USA,VA,"Washington, D.C.",Sterling,2014-03-20,2014-03,2014-Q1,2014.0,109000000.0,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD
13065,/organization/zync-render,Zync Render,|Digital Media|,Digital Media,USA,MA,Boston,Boston,/organization/google,Google,...,USA,CA,SF Bay Area,Mountain View,2014-08-26,2014-08,2014-Q3,2014.0,,USD
13066,/organization/zynk-mobile,Zynk Mobile,,,,,,,/organization/zenvia,Zenvia,...,BRA,,Porto Alegre,Porto Alegre,2013-03-01,2013-03,2013-Q1,2013.0,,USD
13067,/organization/zyrion,Zyrion Inc,|Networking|Software|,Software,USA,CA,SF Bay Area,Sunnyvale,/organization/kaseya,Kaseya,...,,,,,2013-06-25,2013-06,2013-Q2,2013.0,,USD


##Summary Stats

In [22]:
acquisitions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13069 entries, 0 to 13068
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   company_permalink       13069 non-null  object 
 1   company_name            13069 non-null  object 
 2   company_category_list   9786 non-null   object 
 3   company_market          9784 non-null   object 
 4   company_country_code    9782 non-null   object 
 5   company_state_code      7519 non-null   object 
 6   company_region          9783 non-null   object 
 7   company_city            9584 non-null   object 
 8   acquirer_permalink      13069 non-null  object 
 9   acquirer_name           13069 non-null  object 
 10  acquirer_category_list  11565 non-null  object 
 11  acquirer_market         11559 non-null  object 
 12  acquirer_country_code   12228 non-null  object 
 13  acquirer_state_code     9782 non-null   object 
 14  acquirer_region         12228 non-null

In [23]:
acquisitions.describe()

Unnamed: 0,acquired_year,price_amount
count,13067.0,3711.0
mean,2010.366037,742282400.0
std,3.69535,4048504000.0
min,1960.0,1.0
25%,2009.0,18450000.0
50%,2011.0,83000000.0
75%,2013.0,342750000.0
max,2014.0,150000000000.0


In [24]:
acquisitions.nunique()

company_permalink         12790
company_name              12783
company_category_list      3649
company_market              536
company_country_code         73
company_state_code           59
company_region              596
company_city               1816
acquirer_permalink         6632
acquirer_name              6629
acquirer_category_list     2371
acquirer_market             451
acquirer_country_code        67
acquirer_state_code          57
acquirer_region             495
acquirer_city              1410
acquired_at                3078
acquired_month              279
acquired_quarter            111
acquired_year                37
 price_amount              1311
price_currency_code          11
dtype: int64

##Analysis

In [25]:
column_names = ['company_permalink', 'company_name', 'company_category_list', 'company_market', 'company_country_code', 'company_state_code', 'company_region', 'company_city', 'acquirer_permalink', 'acquirer_name', 'acquirer_category_list', 'acquirer_market', 'acquirer_country_code','acquirer_state_code', 'acquirer_region', 'acquirer_city', 'acquired_at', 'acquired_month', 'acquired_quarter', 'acquired_year', ' price_amount ', 'price_currency_code']

for col in column_names:
  print('\n\n')
  print("==========================================================================")
  print(col.upper())
  print("VALUE COUNTS")
  print(acquisitions[col].value_counts())
  print("\n")

  print("DUPLICATE COUNTS")
  print(acquisitions.duplicated(subset=col, keep='first').sum())
  print("\n")

  print("NULL COUNTS AND PERCENTAGE")
  print(acquisitions[col].isnull().sum())
  print(acquisitions[col].isnull().sum()/len(acquisitions)*100)
  print('\n')
  
  print("NULL ROWS")
  display(acquisitions[acquisitions[col].isnull()])




COMPANY_PERMALINK
VALUE COUNTS
/organization/pando-networks                18
/organization/insieme                        5
/organization/travel-channel                 4
/organization/udcast                         4
/organization/webshots                       4
                                            ..
/organization/fortis-healthcare              1
/organization/fortress-solutions             1
/organization/fortress-technologies          1
/organization/fortsum-business-solutions     1
/organization/zystor                         1
Name: company_permalink, Length: 12790, dtype: int64


DUPLICATE COUNTS
279


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code





COMPANY_NAME
VALUE COUNTS
Pando Networks                18
Insieme                        5
Webshots                       4
UDcast                         4
Travel Channel                 4
                              ..
Fortis Healthcare              1
Fortress Solutions             1
Fortress Technologies          1
Fortsum Business Solutions     1
Zystor                         1
Name: company_name, Length: 12783, dtype: int64


DUPLICATE COUNTS
286


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code





COMPANY_CATEGORY_LIST
VALUE COUNTS
|Software|                                                                    1254
|Curated Web|                                                                  578
|Biotechnology|                                                                475
|Enterprise Software|                                                          351
|Mobile|                                                                       293
                                                                              ... 
|Internet|Identity|Media|Personalization|Finance|Fraud Detection|Software|       1
|Publishing|News|Media|Video|Advertising|                                        1
|Mobile|Crowdsourcing|Gas|Travel|Oil|Curated Web|                                1
|Media|Hardware|Computers|Hardware + Software|                                   1
|Design|Health and Wellness|Hardware + Software|                                 1
Name: company_category_list, Length: 3649, dtype:

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
6,/organization/003-ru,003.RU,,,RUS,,Moscow,Moscow,/organization/media-saturn,Media Saturn,...,ESP,,Barcelona,Barcelona,2012-07-23,2012-07,2012-Q3,2012.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13051,/organization/zui,Zui,,,,,,,/organization/saban-brands,Saban Brands,...,USA,CA,Los Angeles,Los Angeles,2012-09-20,2012-09,2012-Q3,2012.0,,USD
13052,/organization/zuji,Zuji,,,,,,,/organization/webjet-com-au,Webjet.com.au,...,AUS,,Melbourne,Melbourne,2013-03-01,2013-03,2013-Q1,2013.0,25000000.0,AUD
13053,/organization/zukes,Zuke’s,,,USA,CO,Grand Junction,Durango,/organization/nestle-purina-petcare,Nestle Purina Petcare,...,USA,MO,St. Louis,St Louis,2014-01-15,2014-01,2014-Q1,2014.0,,USD
13061,/organization/zylom-media-group,Zylom Media Group,,,NLD,,Eindhoven,Eindhoven,/organization/realnetworks,Real Networks,...,USA,WA,Seattle,Seattle,2006-02-06,2006-02,2006-Q1,2006.0,,USD





COMPANY_MARKET
VALUE COUNTS
Software                 1489
Curated Web               714
Biotechnology             505
Enterprise Software       448
Mobile                    433
                         ... 
Digital Entertainment       1
Textiles                    1
Licensing                   1
UV LEDs                     1
Racing                      1
Name: company_market, Length: 536, dtype: int64


DUPLICATE COUNTS
12532


NULL COUNTS AND PERCENTAGE
3285
25.135817583594765


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
6,/organization/003-ru,003.RU,,,RUS,,Moscow,Moscow,/organization/media-saturn,Media Saturn,...,ESP,,Barcelona,Barcelona,2012-07-23,2012-07,2012-Q3,2012.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13051,/organization/zui,Zui,,,,,,,/organization/saban-brands,Saban Brands,...,USA,CA,Los Angeles,Los Angeles,2012-09-20,2012-09,2012-Q3,2012.0,,USD
13052,/organization/zuji,Zuji,,,,,,,/organization/webjet-com-au,Webjet.com.au,...,AUS,,Melbourne,Melbourne,2013-03-01,2013-03,2013-Q1,2013.0,25000000.0,AUD
13053,/organization/zukes,Zuke’s,,,USA,CO,Grand Junction,Durango,/organization/nestle-purina-petcare,Nestle Purina Petcare,...,USA,MO,St. Louis,St Louis,2014-01-15,2014-01,2014-Q1,2014.0,,USD
13061,/organization/zylom-media-group,Zylom Media Group,,,NLD,,Eindhoven,Eindhoven,/organization/realnetworks,Real Networks,...,USA,WA,Seattle,Seattle,2006-02-06,2006-02,2006-Q1,2006.0,,USD





COMPANY_COUNTRY_CODE
VALUE COUNTS
USA    7177
GBR     646
CAN     348
DEU     206
FRA     138
       ... 
BLR       1
GIB       1
COL       1
CYM       1
MAF       1
Name: company_country_code, Length: 73, dtype: int64


DUPLICATE COUNTS
12995


NULL COUNTS AND PERCENTAGE
3287
25.151120973295587


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
10,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/organization/bonanzle,Bonanza,...,USA,WA,Seattle,Seattle,2012-09-22,2012-09,2012-Q3,2012.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13052,/organization/zuji,Zuji,,,,,,,/organization/webjet-com-au,Webjet.com.au,...,AUS,,Melbourne,Melbourne,2013-03-01,2013-03,2013-Q1,2013.0,25000000.0,AUD
13054,/organization/zulip,Zulip,|Enterprise Software|,Enterprise Software,,,,,/organization/dropbox,Dropbox,...,USA,CA,SF Bay Area,San Francisco,2014-03-17,2014-03,2014-Q1,2014.0,,USD
13055,/organization/zuneboards,ZuneBoards,|Hardware + Software|,Hardware + Software,,,,,/organization/crowdgather,CrowdGather,...,USA,CA,Los Angeles,Woodland Hills,2008-08-01,2008-08,2008-Q3,2008.0,62000.0,USD
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD





COMPANY_STATE_CODE
VALUE COUNTS
CA    2520
NY     641
MA     510
TX     416
IL     279
WA     266
CO     212
FL     202
PA     193
NJ     190
ON     183
VA     180
GA     155
MD     129
NC     124
OH      99
MN      88
AZ      80
UT      77
CT      72
OR      71
MI      66
MO      64
QC      61
WI      58
BC      57
TN      56
KS      37
IN      35
NH      31
DC      29
AL      28
AB      28
NV      27
LA      22
KY      22
SC      22
OK      22
IA      17
NE      16
RI      15
ID      13
VT      12
AR      11
DE      11
ND      10
MT       9
NS       8
ME       8
MS       6
NM       6
HI       6
WV       5
MB       5
AK       4
WY       2
NB       1
NL       1
SK       1
Name: company_state_code, dtype: int64


DUPLICATE COUNTS
13009


NULL COUNTS AND PERCENTAGE
5550
42.46690641977197


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
6,/organization/003-ru,003.RU,,,RUS,,Moscow,Moscow,/organization/media-saturn,Media Saturn,...,ESP,,Barcelona,Barcelona,2012-07-23,2012-07,2012-Q3,2012.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13061,/organization/zylom-media-group,Zylom Media Group,,,NLD,,Eindhoven,Eindhoven,/organization/realnetworks,Real Networks,...,USA,WA,Seattle,Seattle,2006-02-06,2006-02,2006-Q1,2006.0,,USD
13062,/organization/zymenex,Zymenex,|Biotechnology|,Biotechnology,DNK,,DNK - Other,Hillerød,/organization/chiesi-pharmaceuticals,Chiesi Pharmaceuticals,...,ITA,,Parma,Parma,2013-08-26,2013-08,2013-Q3,2013.0,,USD
13063,/organization/zynamics,zynamics,|Security|,Security,DEU,,Bochum,Bochum,/organization/google,Google,...,USA,CA,SF Bay Area,Mountain View,2011-03-01,2011-03,2011-Q1,2011.0,,USD
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD





COMPANY_REGION
VALUE COUNTS
SF Bay Area        1794
New York City       554
Boston              477
London              380
Los Angeles         331
                   ... 
Appenzell             1
Nottinghamshire       1
Itá                   1
Weybridge             1
Cheadle Hulme         1
Name: company_region, Length: 596, dtype: int64


DUPLICATE COUNTS
12472


NULL COUNTS AND PERCENTAGE
3286
25.143469278445174


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
10,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/organization/bonanzle,Bonanza,...,USA,WA,Seattle,Seattle,2012-09-22,2012-09,2012-Q3,2012.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13052,/organization/zuji,Zuji,,,,,,,/organization/webjet-com-au,Webjet.com.au,...,AUS,,Melbourne,Melbourne,2013-03-01,2013-03,2013-Q1,2013.0,25000000.0,AUD
13054,/organization/zulip,Zulip,|Enterprise Software|,Enterprise Software,,,,,/organization/dropbox,Dropbox,...,USA,CA,SF Bay Area,San Francisco,2014-03-17,2014-03,2014-Q1,2014.0,,USD
13055,/organization/zuneboards,ZuneBoards,|Hardware + Software|,Hardware + Software,,,,,/organization/crowdgather,CrowdGather,...,USA,CA,Los Angeles,Woodland Hills,2008-08-01,2008-08,2008-Q3,2008.0,62000.0,USD
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD





COMPANY_CITY
VALUE COUNTS
San Francisco           584
New York                516
London                  274
Mountain View           161
Seattle                 153
                       ... 
Ft Mitchell               1
Niantic                   1
Hendersonville            1
Weinstadt-endersbach      1
Hillerød                  1
Name: company_city, Length: 1816, dtype: int64


DUPLICATE COUNTS
11252


NULL COUNTS AND PERCENTAGE
3485
26.66615655367664


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
7,/organization/0958572-b-c-ltd,0958572 B.C. Ltd.,,,,,,,/organization/atlas-intellectual-property-mana...,ATLAS Intellectual Property Management Co.,...,USA,WA,Seattle,Seattle,2012-02-02,2012-02,2012-Q1,2012.0,9000000.0,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
10,/organization/1000-markets,1000 Markets,|Marketplaces|Art|E-Commerce|,Marketplaces,,,,,/organization/bonanzle,Bonanza,...,USA,WA,Seattle,Seattle,2012-09-22,2012-09,2012-Q3,2012.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13052,/organization/zuji,Zuji,,,,,,,/organization/webjet-com-au,Webjet.com.au,...,AUS,,Melbourne,Melbourne,2013-03-01,2013-03,2013-Q1,2013.0,25000000.0,AUD
13054,/organization/zulip,Zulip,|Enterprise Software|,Enterprise Software,,,,,/organization/dropbox,Dropbox,...,USA,CA,SF Bay Area,San Francisco,2014-03-17,2014-03,2014-Q1,2014.0,,USD
13055,/organization/zuneboards,ZuneBoards,|Hardware + Software|,Hardware + Software,,,,,/organization/crowdgather,CrowdGather,...,USA,CA,Los Angeles,Woodland Hills,2008-08-01,2008-08,2008-Q3,2008.0,62000.0,USD
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD





ACQUIRER_PERMALINK
VALUE COUNTS
/organization/cisco                     168
/organization/google                    165
/organization/microsoft                 156
/organization/ibm                       122
/organization/yahoo                     116
                                       ... 
/organization/wunderman                   1
/organization/savvis                      1
/organization/nuvotv                      1
/organization/diigo                       1
/organization/chiesi-pharmaceuticals      1
Name: acquirer_permalink, Length: 6632, dtype: int64


DUPLICATE COUNTS
6437


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code





ACQUIRER_NAME
VALUE COUNTS
Cisco                               168
Google                              165
Microsoft                           156
IBM                                 122
Yahoo!                              116
                                   ... 
CenturyLink Technology Solutions      1
nuvoTV                                1
Diigo                                 1
LookSmart                             1
Chiesi Pharmaceuticals                1
Name: acquirer_name, Length: 6629, dtype: int64


DUPLICATE COUNTS
6440


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code





ACQUIRER_CATEGORY_LIST
VALUE COUNTS
|Software|                                                                                                              1137
|Biotechnology|                                                                                                          459
|Enterprise Software|                                                                                                    304
|Curated Web|                                                                                                            303
|Hardware + Software|                                                                                                    297
                                                                                                                        ... 
|Collaboration|Marketplaces|Textbooks|Publishing|E-Commerce|                                                               1
|Startups|Software|Business Services|Consulting|                                      

Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
23,/organization/1st-choice-security-solutions,1st Choice Security Solutions,|Security|,Security,USA,GA,Atlanta,Atlanta,/organization/comcam,ComCam,...,USA,PA,Philadelphia,West Chester,2011-04-13,2011-04,2011-Q2,2011.0,,USD
35,/organization/24-7-solutions,24/7 Solutions,|Software|,Software,,,,,/organization/sentel,Sentel,...,,,,,2009-04-16,2009-04,2009-Q2,2009.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13018,/organization/zipcar,Zipcar,|Public Transportation|,Public Transportation,USA,MA,Boston,Boston,/organization/avis,Avis,...,,,,,2013-01-02,2013-01,2013-Q1,2013.0,,USD
13029,/organization/zitra-com,Zitra.com,|Fashion|Sales and Marketing|Marketplaces|B2B|...,Fashion,DEU,,Cologne,Cologne,/organization/keenon,Keenon,...,DEU,,Hamburg,Hamburg,2013-06-01,2013-06,2013-Q2,2013.0,,USD
13034,/organization/zoidu,zoidu,|Curated Web|,Curated Web,CAN,ON,Ottawa,Ottawa,/organization/stellar-learning-strategies,Stellar Learning Strategies,...,CAN,NS,Fredericton,Fredericton,2012-01-01,2012-01,2012-Q1,2012.0,,USD
13051,/organization/zui,Zui,,,,,,,/organization/saban-brands,Saban Brands,...,USA,CA,Los Angeles,Los Angeles,2012-09-20,2012-09,2012-Q3,2012.0,,USD





ACQUIRER_MARKET
VALUE COUNTS
Software                        1826
Enterprise Software              512
Biotechnology                    489
Curated Web                      488
Advertising                      438
                                ... 
Film                               1
All Students                       1
Business Information Systems       1
Transaction Processing             1
Tech Field Support                 1
Name: acquirer_market, Length: 451, dtype: int64


DUPLICATE COUNTS
12617


NULL COUNTS AND PERCENTAGE
1510
11.554059224118141


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
8,/organization/1-nation-technology,1 Nation Technology,,,,,,,/organization/vology,Vology,...,USA,FL,Tampa,Oldsmar,2006-01-01,2006-01,2006-Q1,2006.0,,USD
23,/organization/1st-choice-security-solutions,1st Choice Security Solutions,|Security|,Security,USA,GA,Atlanta,Atlanta,/organization/comcam,ComCam,...,USA,PA,Philadelphia,West Chester,2011-04-13,2011-04,2011-Q2,2011.0,,USD
35,/organization/24-7-solutions,24/7 Solutions,|Software|,Software,,,,,/organization/sentel,Sentel,...,,,,,2009-04-16,2009-04,2009-Q2,2009.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13018,/organization/zipcar,Zipcar,|Public Transportation|,Public Transportation,USA,MA,Boston,Boston,/organization/avis,Avis,...,,,,,2013-01-02,2013-01,2013-Q1,2013.0,,USD
13029,/organization/zitra-com,Zitra.com,|Fashion|Sales and Marketing|Marketplaces|B2B|...,Fashion,DEU,,Cologne,Cologne,/organization/keenon,Keenon,...,DEU,,Hamburg,Hamburg,2013-06-01,2013-06,2013-Q2,2013.0,,USD
13034,/organization/zoidu,zoidu,|Curated Web|,Curated Web,CAN,ON,Ottawa,Ottawa,/organization/stellar-learning-strategies,Stellar Learning Strategies,...,CAN,NS,Fredericton,Fredericton,2012-01-01,2012-01,2012-Q1,2012.0,,USD
13051,/organization/zui,Zui,,,,,,,/organization/saban-brands,Saban Brands,...,USA,CA,Los Angeles,Los Angeles,2012-09-20,2012-09,2012-Q3,2012.0,,USD





ACQUIRER_COUNTRY_CODE
VALUE COUNTS
USA    9453
GBR     561
CAN     337
DEU     205
FRA     184
       ... 
JOR       1
SWZ       1
MUS       1
URY       1
KEN       1
Name: acquirer_country_code, Length: 67, dtype: int64


DUPLICATE COUNTS
13001


NULL COUNTS AND PERCENTAGE
841
6.435075369194276


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
14,/organization/10east,10East,|Web Hosting|,Web Hosting,USA,FL,Jacksonville,Jacksonville,/organization/railcar-management,Railcar Management,...,,,,,2009-04-14,2009-04,2009-Q2,2009.0,,USD
35,/organization/24-7-solutions,24/7 Solutions,|Software|,Software,,,,,/organization/sentel,Sentel,...,,,,,2009-04-16,2009-04,2009-Q2,2009.0,,USD
47,/organization/3-day-blinds,3 day Blinds,,,USA,CA,Anaheim,Irvine,/organization/rosewood-capital,Rosewood Capital,...,,,,,2006-06-20,2006-06,2006-Q2,2006.0,,USD
49,/organization/360-scheduling,360 Scheduling,|Software|,Software,GBR,,Nottingham,Nottingham,/organization/ifs,IFS,...,,,,,2010-09-30,2010-09,2010-Q3,2010.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12924,/organization/yolk,Yolk,|Digital Media|User Experience Design|Curated ...,Curated Web,,,,,/organization/grey,Grey,...,,,,,2010-12-20,2010-12,2010-Q4,2010.0,,USD
12929,/organization/yostro,Yostro,|Public Relations|,Public Relations,CAN,ON,Ottawa,Ottawa,/organization/dataware,Dataware,...,,,,,2011-04-01,2011-04,2011-Q2,2011.0,,USD
12930,/organization/yosun-industrial-corp,Yosun Industrial Corp.,,,,,,,/organization/wpg-holdings,WPG Holdings,...,,,,,2010-03-20,2010-03,2010-Q1,2010.0,,USD
13018,/organization/zipcar,Zipcar,|Public Transportation|,Public Transportation,USA,MA,Boston,Boston,/organization/avis,Avis,...,,,,,2013-01-02,2013-01,2013-Q1,2013.0,,USD





ACQUIRER_STATE_CODE
VALUE COUNTS
CA    3492
NY    1163
MA     590
TX     476
WA     410
IL     334
FL     250
NJ     233
GA     232
PA     225
VA     216
ON     179
CO     153
MD     131
MN     129
OH     129
CT     115
AZ     111
NC     108
DC      87
MO      87
MI      78
UT      75
QC      64
OR      63
WI      61
TN      54
BC      53
SC      47
AL      40
NH      40
NE      39
IN      32
KS      32
KY      31
AB      30
NV      29
DE      21
RI      20
IA      20
AR      19
OK      17
LA      13
MT       8
ME       8
ID       8
VT       7
NS       5
NM       5
MS       2
WV       2
NL       2
ND       2
SD       2
AK       1
MB       1
HI       1
Name: acquirer_state_code, dtype: int64


DUPLICATE COUNTS
13011


NULL COUNTS AND PERCENTAGE
3287
25.151120973295587


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
6,/organization/003-ru,003.RU,,,RUS,,Moscow,Moscow,/organization/media-saturn,Media Saturn,...,ESP,,Barcelona,Barcelona,2012-07-23,2012-07,2012-Q3,2012.0,,USD
14,/organization/10east,10East,|Web Hosting|,Web Hosting,USA,FL,Jacksonville,Jacksonville,/organization/railcar-management,Railcar Management,...,,,,,2009-04-14,2009-04,2009-Q2,2009.0,,USD
20,/organization/160by2,160by2,,,,,,,/organization/way2sms-com,Way2SMS.com,...,IND,,Hyderabad,Hyderabad,2012-01-04,2012-01,2012-Q1,2012.0,,USD
22,/organization/1form-com,1Form.com,,,NOR,,Aust-Agder,,/organization/rea-group,REA Group,...,AUS,,Melbourne,Melbourne,2014-01-14,2014-01,2014-Q1,2014.0,15000000.0,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13058,/organization/zyb,ZYB,|Social Network Media|Networking|Mobile|,Social Network Media,DNK,,Copenhagen,Copenhagen,/organization/vodafone,Vodafone,...,GBR,,West Berkshire,,2008-05-16,2008-05,2008-Q2,2008.0,31500000.0,EUR
13060,/organization/zyken-nightcove,Zyken - NightCove,|Design|Health and Wellness|Hardware + Software|,Hardware + Software,FRA,,Paris,Paris,/organization/withings,Withings,...,FRA,,Paris,Issy-les-moulineaux,2013-09-01,2013-09,2013-Q3,2013.0,,USD
13062,/organization/zymenex,Zymenex,|Biotechnology|,Biotechnology,DNK,,DNK - Other,Hillerød,/organization/chiesi-pharmaceuticals,Chiesi Pharmaceuticals,...,ITA,,Parma,Parma,2013-08-26,2013-08,2013-Q3,2013.0,,USD
13066,/organization/zynk-mobile,Zynk Mobile,,,,,,,/organization/zenvia,Zenvia,...,BRA,,Porto Alegre,Porto Alegre,2013-03-01,2013-03,2013-Q1,2013.0,,USD





ACQUIRER_REGION
VALUE COUNTS
SF Bay Area      2609
New York City    1028
Boston            565
Los Angeles       441
London            409
                 ... 
ARG - Other         1
Kampala             1
UKR - Other         1
Eschborn            1
Parma               1
Name: acquirer_region, Length: 495, dtype: int64


DUPLICATE COUNTS
12573


NULL COUNTS AND PERCENTAGE
841
6.435075369194276


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
14,/organization/10east,10East,|Web Hosting|,Web Hosting,USA,FL,Jacksonville,Jacksonville,/organization/railcar-management,Railcar Management,...,,,,,2009-04-14,2009-04,2009-Q2,2009.0,,USD
35,/organization/24-7-solutions,24/7 Solutions,|Software|,Software,,,,,/organization/sentel,Sentel,...,,,,,2009-04-16,2009-04,2009-Q2,2009.0,,USD
47,/organization/3-day-blinds,3 day Blinds,,,USA,CA,Anaheim,Irvine,/organization/rosewood-capital,Rosewood Capital,...,,,,,2006-06-20,2006-06,2006-Q2,2006.0,,USD
49,/organization/360-scheduling,360 Scheduling,|Software|,Software,GBR,,Nottingham,Nottingham,/organization/ifs,IFS,...,,,,,2010-09-30,2010-09,2010-Q3,2010.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12924,/organization/yolk,Yolk,|Digital Media|User Experience Design|Curated ...,Curated Web,,,,,/organization/grey,Grey,...,,,,,2010-12-20,2010-12,2010-Q4,2010.0,,USD
12929,/organization/yostro,Yostro,|Public Relations|,Public Relations,CAN,ON,Ottawa,Ottawa,/organization/dataware,Dataware,...,,,,,2011-04-01,2011-04,2011-Q2,2011.0,,USD
12930,/organization/yosun-industrial-corp,Yosun Industrial Corp.,,,,,,,/organization/wpg-holdings,WPG Holdings,...,,,,,2010-03-20,2010-03,2010-Q1,2010.0,,USD
13018,/organization/zipcar,Zipcar,|Public Transportation|,Public Transportation,USA,MA,Boston,Boston,/organization/avis,Avis,...,,,,,2013-01-02,2013-01,2013-Q1,2013.0,,USD





ACQUIRER_CITY
VALUE COUNTS
New York               859
San Francisco          630
San Jose               358
London                 323
Mountain View          295
                      ... 
Carlotta                 1
Muiden                   1
Nicosia                  1
Bergamo                  1
Issy-les-moulineaux      1
Name: acquirer_city, Length: 1410, dtype: int64


DUPLICATE COUNTS
11658


NULL COUNTS AND PERCENTAGE
960
7.345627056392991


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
14,/organization/10east,10East,|Web Hosting|,Web Hosting,USA,FL,Jacksonville,Jacksonville,/organization/railcar-management,Railcar Management,...,,,,,2009-04-14,2009-04,2009-Q2,2009.0,,USD
35,/organization/24-7-solutions,24/7 Solutions,|Software|,Software,,,,,/organization/sentel,Sentel,...,,,,,2009-04-16,2009-04,2009-Q2,2009.0,,USD
47,/organization/3-day-blinds,3 day Blinds,,,USA,CA,Anaheim,Irvine,/organization/rosewood-capital,Rosewood Capital,...,,,,,2006-06-20,2006-06,2006-Q2,2006.0,,USD
49,/organization/360-scheduling,360 Scheduling,|Software|,Software,GBR,,Nottingham,Nottingham,/organization/ifs,IFS,...,,,,,2010-09-30,2010-09,2010-Q3,2010.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12981,/organization/zenithsolar,ZenithSolar,,,ISR,,ISR - Other,Qiryat Gat,/organization/suncore-photovoltaic-technology,Suncore Photovoltaic Technology,...,CHN,,CHN - Other,,2013-10-18,2013-10,2013-Q4,2013.0,,USD
13006,/organization/zim-plant-technology-gmbh,ZIM Plant Technology GmbH,,,,,,,/organization/yara,Yara,...,NOR,,NOR - Other,,2013-11-18,2013-11,2013-Q4,2013.0,,USD
13018,/organization/zipcar,Zipcar,|Public Transportation|,Public Transportation,USA,MA,Boston,Boston,/organization/avis,Avis,...,,,,,2013-01-02,2013-01,2013-Q1,2013.0,,USD
13058,/organization/zyb,ZYB,|Social Network Media|Networking|Mobile|,Social Network Media,DNK,,Copenhagen,Copenhagen,/organization/vodafone,Vodafone,...,GBR,,West Berkshire,,2008-05-16,2008-05,2008-Q2,2008.0,31500000.0,EUR





ACQUIRED_AT
VALUE COUNTS
2011-01-01    96
2012-01-01    73
2008-05-01    59
2010-01-01    42
2013-01-01    42
              ..
2004-10-10     1
2009-10-10     1
1999-01-04     1
2005-10-17     1
1999-07-07     1
Name: acquired_at, Length: 3078, dtype: int64


DUPLICATE COUNTS
9991


NULL COUNTS AND PERCENTAGE
0
0.0


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code





ACQUIRED_MONTH
VALUE COUNTS
2014-06    260
2014-07    246
2011-01    244
2014-10    242
2014-09    230
          ... 
1991-12      1
1989-04      1
1998-05      1
1995-04      1
1998-02      1
Name: acquired_month, Length: 279, dtype: int64


DUPLICATE COUNTS
12789


NULL COUNTS AND PERCENTAGE
2
0.01530338970081873


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
3093,/organization/datapad-inc,DataPad,|Big Data Analytics|Business Intelligence|Big ...,Analytics,USA,CA,SF Bay Area,San Francisco,/organization/cloudera,Cloudera,...,USA,CA,SF Bay Area,Palo Alto,0030-09-14,,,,,USD
11206,/organization/tastykhana,TastyKhana,|E-Commerce|,E-Commerce,IND,,Pune,Pune,/organization/foodpanda,foodpanda / hellofood,...,DEU,,Berlin,Berlin,0017-11-14,,,,,USD





ACQUIRED_QUARTER
VALUE COUNTS
2014-Q3    681
2014-Q2    661
2014-Q1    561
2013-Q4    527
2011-Q1    520
          ... 
1980-Q1      1
1984-Q3      1
1989-Q2      1
1977-Q1      1
1967-Q2      1
Name: acquired_quarter, Length: 111, dtype: int64


DUPLICATE COUNTS
12957


NULL COUNTS AND PERCENTAGE
2
0.01530338970081873


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
3093,/organization/datapad-inc,DataPad,|Big Data Analytics|Business Intelligence|Big ...,Analytics,USA,CA,SF Bay Area,San Francisco,/organization/cloudera,Cloudera,...,USA,CA,SF Bay Area,Palo Alto,0030-09-14,,,,,USD
11206,/organization/tastykhana,TastyKhana,|E-Commerce|,E-Commerce,IND,,Pune,Pune,/organization/foodpanda,foodpanda / hellofood,...,DEU,,Berlin,Berlin,0017-11-14,,,,,USD





ACQUIRED_YEAR
VALUE COUNTS
2014.0    2351
2011.0    1814
2013.0    1771
2010.0    1583
2012.0    1553
2009.0    1172
2008.0    1100
2007.0     473
2006.0     289
2005.0     203
2004.0     139
1999.0     102
2000.0      95
2003.0      76
2002.0      71
2001.0      67
1998.0      48
1997.0      40
1996.0      33
1995.0      24
1994.0      11
1993.0      11
1988.0       7
1991.0       6
1992.0       5
1987.0       4
1990.0       3
1989.0       3
1981.0       2
1984.0       2
1960.0       2
1986.0       2
1977.0       1
1980.0       1
1973.0       1
1982.0       1
1967.0       1
Name: acquired_year, dtype: int64


DUPLICATE COUNTS
13031


NULL COUNTS AND PERCENTAGE
2
0.01530338970081873


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
3093,/organization/datapad-inc,DataPad,|Big Data Analytics|Business Intelligence|Big ...,Analytics,USA,CA,SF Bay Area,San Francisco,/organization/cloudera,Cloudera,...,USA,CA,SF Bay Area,Palo Alto,0030-09-14,,,,,USD
11206,/organization/tastykhana,TastyKhana,|E-Commerce|,E-Commerce,IND,,Pune,Pune,/organization/foodpanda,foodpanda / hellofood,...,DEU,,Berlin,Berlin,0017-11-14,,,,,USD





 PRICE_AMOUNT 
VALUE COUNTS
1.000000e+07    56
3.000000e+07    52
1.000000e+08    51
5.000000e+07    49
2.000000e+07    47
                ..
5.460000e+08     1
2.260000e+07     1
1.170000e+09     1
5.240000e+07     1
6.200000e+04     1
Name:  price_amount , Length: 1311, dtype: int64


DUPLICATE COUNTS
11757


NULL COUNTS AND PERCENTAGE
9358
71.60456041013084


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
0,/organization/waywire,#waywire,|Entertainment|Politics|Social Media|News|,News,USA,NY,New York City,New York,/organization/magnify,Waywire Networks,...,USA,NY,New York City,New York,2013-10-17,2013-10,2013-Q4,2013.0,,USD
1,/organization/fluff-friends,(fluff)Friends,,,,,,,/organization/social-gaming-network,SGN (Social Gaming Network),...,USA,CA,Los Angeles,Beverly Hills,2008-09-16,2008-09,2008-Q3,2008.0,,USD
3,/organization/vandaele-holdings,.,,,,,,,/organization/hi7e,HI7E,...,USA,FL,Palm Beaches,West Palm Beach,2011-01-01,2011-01,2011-Q1,2011.0,,USD
5,/organization/vantronix,.vantronix,|Cloud Security|Network Security|Security|,Cloud Security,DEU,,Hanover,Hanover,/organization/compumatica,Compumatica,...,,,,,2011-01-01,2011-01,2011-Q1,2011.0,,USD
6,/organization/003-ru,003.RU,,,RUS,,Moscow,Moscow,/organization/media-saturn,Media Saturn,...,ESP,,Barcelona,Barcelona,2012-07-23,2012-07,2012-Q3,2012.0,,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13063,/organization/zynamics,zynamics,|Security|,Security,DEU,,Bochum,Bochum,/organization/google,Google,...,USA,CA,SF Bay Area,Mountain View,2011-03-01,2011-03,2011-Q1,2011.0,,USD
13064,/organization/zync,Zync,|Curated Web|,Curated Web,,,,,/organization/where-com,Where,...,USA,MA,Boston,Boston,2008-02-07,2008-02,2008-Q1,2008.0,,USD
13065,/organization/zync-render,Zync Render,|Digital Media|,Digital Media,USA,MA,Boston,Boston,/organization/google,Google,...,USA,CA,SF Bay Area,Mountain View,2014-08-26,2014-08,2014-Q3,2014.0,,USD
13066,/organization/zynk-mobile,Zynk Mobile,,,,,,,/organization/zenvia,Zenvia,...,BRA,,Porto Alegre,Porto Alegre,2013-03-01,2013-03,2013-Q1,2013.0,,USD





PRICE_CURRENCY_CODE
VALUE COUNTS
USD    12800
EUR      115
GBP      108
CAD       16
AUD       10
JPY        7
SEK        4
NZD        2
NOK        1
AZN        1
SAR        1
Name: price_currency_code, dtype: int64


DUPLICATE COUNTS
13057


NULL COUNTS AND PERCENTAGE
4
0.03060677940163746


NULL ROWS


Unnamed: 0,company_permalink,company_name,company_category_list,company_market,company_country_code,company_state_code,company_region,company_city,acquirer_permalink,acquirer_name,...,acquirer_country_code,acquirer_state_code,acquirer_region,acquirer_city,acquired_at,acquired_month,acquired_quarter,acquired_year,price_amount,price_currency_code
784,/organization/anywhere-fm,Anywhere.FM,|Music|,Music,,,,,/organization/imeem,imeem,...,USA,CA,SF Bay Area,San Francisco,2008-01-28,2008-01,2008-Q1,2008.0,,
6615,/organization/local2me,Local2Me,|Local|Advertising|,Local,USA,CA,SF Bay Area,San Mateo,/organization/smalltown,Smalltown,...,USA,CA,SF Bay Area,San Mateo,2007-12-01,2007-12,2007-Q4,2007.0,,
10504,/organization/snocap,Snocap,|Content|Social Network Media|Peer-to-Peer|Games|,Games,USA,CA,SF Bay Area,San Francisco,/organization/imeem,imeem,...,USA,CA,SF Bay Area,San Francisco,2008-02-01,2008-02,2008-Q1,2008.0,,
12842,/organization/xiaonei,Xiaonei,|Networking|Facebook Applications|Curated Web|,Facebook Applications,CHN,,CHN - Other,,/organization/renren-inc,Renren Inc.,...,CHN,,CHN - Other,Chaoyang,2006-10-24,2006-10,2006-Q4,2006.0,,


#Additional Tests for Data Analysis and Entity Resolution Understanding

In [26]:
mergeI = pd.merge(companies[['permalink']], investments[['investor_permalink']], left_on='permalink', right_on='investor_permalink', how='outer', indicator=True)
mergeI = mergeI[mergeI['_merge'] == 'both']
mergeI


Unnamed: 0,permalink,investor_permalink,_merge
67,/organization/1c-company,/organization/1c-company,both
107,/organization/21vianet,/organization/21vianet,both
136,/organization/2b-angels,/organization/2b-angels,both
137,/organization/2b-angels,/organization/2b-angels,both
138,/organization/2b-angels,/organization/2b-angels,both
...,...,...,...
52606,/organization/zipcar,/organization/zipcar,both
52607,/organization/zipcar,/organization/zipcar,both
52717,/organization/zoominfo,/organization/zoominfo,both
52778,/organization/zulily,/organization/zulily,both


In [27]:
mergeA = pd.merge(companies[['permalink']], acquisitions[['acquirer_permalink']], left_on='permalink', right_on='acquirer_permalink', how='outer', indicator=True)
mergeA = mergeA[mergeA['_merge'] == 'both']
mergeA

Unnamed: 0,permalink,acquirer_permalink,_merge
38,/organization/11i-solutions,/organization/11i-solutions,both
39,/organization/11i-solutions,/organization/11i-solutions,both
68,/organization/1c-company,/organization/1c-company,both
91,/organization/1stdibs,/organization/1stdibs,both
94,/organization/1world-online,/organization/1world-online,both
...,...,...,...
52170,/organization/zynga,/organization/zynga,both
52171,/organization/zynga,/organization/zynga,both
52175,/organization/zyraz-technology,/organization/zyraz-technology,both
52182,/organization/x,/organization/x,both


In [28]:
mergeA = pd.merge(investments[['investor_permalink']], acquisitions[['acquirer_permalink']], left_on='investor_permalink', right_on='acquirer_permalink', how='outer', indicator=True)
mergeA = mergeA[mergeA['_merge'] != 'both']
mergeA

Unnamed: 0,investor_permalink,acquirer_permalink,_merge
0,/person/jamessss-bondddd,,left_only
1,/organization/ekaf,,left_only
2,/person/antonio-murroni,,left_only
3,/person/filippo-murroni,,left_only
4,/organization/crp-companhia-de-participacoes,,left_only
...,...,...,...
172772,,/organization/webjet-com-au,right_only
172773,,/organization/nestle-purina-petcare,right_only
172774,,/organization/cupcake-digital,right_only
172775,,/organization/withings,right_only


In [29]:
agg = rounds.groupby('company_permalink').sum().reset_index(drop=False)[['company_permalink', ' raised_amount_usd ']]
agg = pd.merge(companies[['permalink', ' funding_total_usd ']], agg, left_on='permalink', right_on='company_permalink', how='outer', indicator=True)
agg

check = agg[agg['_merge'] != 'both']
print(len(check))
check

def replace_null(x):
  if x == 0.0:
    return np.nan
  else:
    return x
# found out that prysm is duplicate
both = agg[agg['_merge'] == 'both']
#both[' raised_amount_usd '] = both[' raised_amount_usd '].apply(lambda x: replace_null(x))
both = both[both[' funding_total_usd '] != both[' raised_amount_usd ']]
both = both.sort_values(by=' raised_amount_usd ', ascending=False)
both = both[both[' raised_amount_usd '] != 0.0]
both

35


Unnamed: 0,permalink,funding_total_usd,company_permalink,raised_amount_usd,_merge
34074,/organization/pure-storage,534856468.0,/organization/pure-storage,474856468.0,both
33939,/organization/prysm,145521193.0,/organization/prysm,293080123.0,both
33940,/organization/prysm,147558930.0,/organization/prysm,293080123.0,both
31086,/organization/oxford-nanopore-technologies,270725149.0,/organization/oxford-nanopore-technologies,211725149.0,both
40911,/organization/stripe,120000000.0,/organization/stripe,190000000.0,both
...,...,...,...,...,...
29330,/organization/niwa,200000.0,/organization/niwa,150000.0,both
37387,/organization/seesearch,27181.0,/organization/seesearch,54362.0,both
45238,/organization/urbita,675000.0,/organization/urbita,25000.0,both
4228,/organization/babbaco,4525000.0,/organization/babbaco,25000.0,both


#Creation of Tables according to our Schema

## Plan for cleaning

1.   Preliminary cleaning of companies, investments, rounds 
and acquisitions.
2.   Next, using the companies table, I create a company_entity for the financial_entity
3. I create investor_entity using the investments table
4. I create the acquirer entity from the acquisitions table
5. Original Rounds table contains after being merged on funding_round_permalink has three cases (post checking with original companies) - 
  *   companies in round but not in original companies - need to add to company_entity 
  *   Companies in original but not in rounds - nothing to be done
  *   investor as null for the round post merging - needs to be imputed to unknown in both the final rounds table and investor_entity
6. Acquisitions after checking merge with original companies has companies that are in acquisitions table that are not in original companies - needs to be added to company_entity

7. Finally need to combine all three (and update is_a accordingly if an entity is more than one of the three)

8. Give them a unique integer ID




##Cleaning up Companies

###Fixing category_list

In [30]:
def reformat_lists(x):
  if x is np.nan:
    return 'unknown'
  x = str(x)
  x = x.replace("|","   ")
  x = x.strip()
  x = x.replace("   ",", ")
  return x

companies['category_list'] = companies['category_list'].apply(lambda x: reformat_lists(x))

###Fixing Permalink duplicates

In [31]:
companies.at[33940, 'funding_rounds'] = 4
companies.at[33940, 'last_funding_at'] = '2014-11-25'
companies.at[33940, ' funding_total_usd '] = 100000000.0 + 44808930.0 + 2750000.0 + 145521193
companies[companies.duplicated(subset='permalink', keep='first')]

companies.at[44033, 'funding_rounds'] = 5
companies.at[44033, 'last_funding_at'] = '2014-07-18'
companies.at[44033, ' funding_total_usd '] = 286873.0 + 45321.0

companies = companies.drop(33939)
companies = companies.drop(44034)



In [32]:
companies = companies.reset_index(drop=False)

###Filling name null

In [33]:
companies.at[28221, 'name'] = 'Tell-IT'

###Filling Status nulls

In [34]:
def replace_nan_status(x):
  if x is np.nan:
    return 'unknown'
  else:
    return x

companies['status'] = companies['status'].apply(lambda x: replace_nan_status(x))

###Changing date to Datetime dtype and formatted

In [35]:
companies['founded_at'] = pd.to_datetime(companies['founded_at'],format='%Y-%m-%d', errors = 'coerce')

##Cleaning Investments

###Fixing investor_category_list

In [36]:
investments['investor_category_list'] = investments['investor_category_list'].apply(lambda x: reformat_lists(x))

###Resolving null investor_permalink

66 investors were imputed to as unknowns because they were nulls, and investor permalink cannot be null

In [37]:
nulls = investments[investments['investor_permalink'].isna()]
index = nulls.index

investor_impute_count = 1
for i in index:
  investments.at[i, 'investor_permalink'] = 'unknown' + str(investor_impute_count)
  investments.at[i, 'investor_name'] = 'unknown'
  investor_impute_count+=1



###Creating isPerson column

In [38]:
def create_is_person(x):
  if 'organization' in x:
    return "False"
  elif 'person' in x:
    return "True"
  else:
    return np.nan

investments['is_person'] = investments['investor_permalink'].apply(lambda x: create_is_person(x))

##Cleaning Acquisitions

###Fixing acquirer_category_list

In [39]:
acquisitions['acquirer_category_list'] = acquisitions['acquirer_category_list'].apply(lambda x: reformat_lists(x))

###Fixing null price_currency_code

In [40]:
nulls = acquisitions[acquisitions['price_currency_code'].isna()]
index = nulls.index

for i in index:
  acquisitions.at[i, 'price_currency_code'] = 'USD'

acquisitions.at[12842, 'price_currency_code'] = 'CNY'

###Converting acquired_at to datetime

In [41]:
acquisitions['acquired_at'] = pd.to_datetime(acquisitions['acquired_at'],format='%Y-%m-%d', errors = 'coerce')

##Cleaning Rounds

###Converting funded_at to datetime

In [42]:
rounds['funded_at'] = pd.to_datetime(rounds['funded_at'],format='%Y-%m-%d', errors = 'coerce')

###Creating round_number

In [43]:
rounds['round_number'] = np.arange(len(rounds))

## Creating Financial_Entity Components

###Restructuring companies

In [44]:
company_entity = companies[['permalink', 'name', 'category_list', ' market ',  'country_code', 'state_code', 'city']]
company_entity.insert(7, 'is_a', 'company')
company_entity = company_entity.rename(columns={'category_list': 'categories', ' market ': 'market', 'country_code': 'country', 'state_code': 'state'})


company_entity 

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/organization/waywire,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company
1,/organization/tv-communications,&TV Communications,Games,Games,USA,CA,Los Angeles,company
2,/organization/rock-your-paper,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company
3,/organization/in-touch-network,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company
...,...,...,...,...,...,...,...,...
49431,/organization/zzish,Zzish,"Analytics, Gamification, Developer APIs, iOS, ...",Education,GBR,,London,company
49432,/organization/zznode-science-and-technology-co...,ZZNode Science and Technology,Enterprise Software,Enterprise Software,CHN,,Beijing,company
49433,/organization/zzzzapp-com,Zzzzapp Wireless ltd.,"Web Development, Advertising, Wireless, Mobile",Web Development,HRV,,Split,company
49434,/organization/a-list-games,[a]list games,Games,Games,,,,company


###Restructuring Investments

In [45]:
investor_entity = investments[['investor_permalink', 'investor_name', 'investor_category_list', 'investor_market',  'investor_country_code', 'investor_state_code', 'investor_city']]

#To see which were duplicated
#investor_entity[investor_entity.duplicated(subset='investor_permalink', keep='first')]

investor_entity = investor_entity.drop_duplicates("investor_permalink")
investor_entity.insert(7, 'is_a', 'investor')
investor_entity = investor_entity.rename(columns={'investor_permalink': 'permalink', 'investor_name': 'name','investor_category_list': 'categories', 'investor_market': 'market', 'investor_country_code': 'country', 'investor_state_code': 'state', 'investor_city': 'city'})
investor_entity

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/person/jamessss-bondddd,jamessss bondddd,unknown,,,,,investor
1,/organization/ekaf,Ekaf,unknown,,,,,investor
2,/person/antonio-murroni,ANTONIO MURRONI,unknown,,,,,investor
3,/person/filippo-murroni,FILIPPO Murroni,unknown,,,,,investor
4,/organization/crp-companhia-de-participacoes,CRP Companhia de Participações,unknown,,,,,investor
...,...,...,...,...,...,...,...,...
114446,/person/doug-cobb,Doug Cobb,unknown,,,,,investor
114452,/person/s-kris-gopalakrishnan,"S. ""Kris"" Gopalakrishnan",unknown,,,,,investor
114466,/organization/dcl-ventures-inc,"DCL Ventures, Inc.",Pets,Pets,USA,NY,New York,investor
114482,/organization/math-venture-partners,Math Venture Partners,unknown,,USA,IL,Chicago,investor


###Restructuring Acquisitions

In [46]:
acquirer_entity = acquisitions[['acquirer_permalink', 'acquirer_name', 'acquirer_category_list', 'acquirer_market',  'acquirer_country_code', 'acquirer_state_code', 'acquirer_city']]

#To see which were duplicated
#acquirer_entity[acquirer_entity.duplicated(subset='acquirer_permalink', keep='first')]

acquirer_entity.insert(7, 'is_a', 'acquirer')
acquirer_entity = acquirer_entity.drop_duplicates("acquirer_permalink")


acquirer_entity = acquirer_entity.rename(columns={'acquirer_permalink': 'permalink', 'acquirer_name': 'name','acquirer_category_list': 'categories', 'acquirer_market': 'market', 'acquirer_country_code': 'country', 'acquirer_state_code': 'state', 'acquirer_city': 'city'})
acquirer_entity

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/organization/magnify,Waywire Networks,"Curated Web, Video",Curated Web,USA,NY,New York,acquirer
1,/organization/social-gaming-network,SGN (Social Gaming Network),Games,Games,USA,CA,Beverly Hills,acquirer
2,/organization/nationstar-mortgage-holdings,Nationstar Mortgage Holdings,Finance,Finance,USA,TX,Lewisville,acquirer
3,/organization/hi7e,HI7E,unknown,,USA,FL,West Palm Beach,acquirer
4,/organization/neustar,Neustar,"Telecommunications, Analytics, Information Tec...",Analytics,USA,VA,Sterling,acquirer
...,...,...,...,...,...,...,...,...
13052,/organization/webjet-com-au,Webjet.com.au,"Travel & Tourism, Hotels, Travel",Travel & Tourism,AUS,,Melbourne,acquirer
13053,/organization/nestle-purina-petcare,Nestle Purina Petcare,Pets,Pets,USA,MO,St Louis,acquirer
13056,/organization/cupcake-digital,Cupcake Digital,Games,Games,USA,NY,New York,acquirer
13060,/organization/withings,Withings,Hardware + Software,Hardware + Software,FRA,,Issy-les-moulineaux,acquirer


##Round

In [47]:
round_schema = pd.merge(rounds[['company_permalink','funding_round_permalink', 'funding_round_type', ' raised_amount_usd ', 'funded_at', 'round_number']], investments[['investor_permalink','funding_round_permalink']], left_on='funding_round_permalink', right_on='funding_round_permalink', how='outer', indicator=True)

###Fixing Investor Nulls

Imputing investor permalinks in cases where a funding round does not have a investor associated with it --> setting investor to be unknown. Adding newly created values to investor entity as well.

In [48]:
# Get a list of all funding_round_permalinks that have this problem
problematic_funding_rounds = round_schema[round_schema['_merge'] == 'left_only'].index
print(problematic_funding_rounds)
count = 67
for i in problematic_funding_rounds:
  round_schema.at[i, 'investor_permalink'] = 'unknown' + str(count)
  investor_entity = investor_entity.append({'permalink': ('unknown' + str(count)), 'name': 'unknown', 'is_a': 'investor' }, ignore_index=True)
  count += 1



round_schema[round_schema['_merge'] == 'left_only']


Int64Index([     6,      7,      9,     10,     11,     12,     18,     21,
                22,     23,
            ...
            148061, 148062, 148063, 148064, 148068, 148074, 148075, 148081,
            148082, 148095],
           dtype='int64', length=33590)


Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,raised_amount_usd,funded_at,round_number,investor_permalink,_merge
6,/organization/tv-communications,/funding-round/59a3669a64e39360c2b939300bcda162,venture,3000000.0,2010-09-23,1,unknown67,left_only
7,/organization/tv-communications,/funding-round/86d22afc65107b6941e6c43c671ecbb8,venture,1000000.0,2010-06-04,2,unknown68,left_only
9,/organization/in-touch-network,/funding-round/33c3f135f05d7b734b8d7b7c8ae82647,seed,1500000.0,2011-04-01,4,unknown69,left_only
10,/organization/r-ranch-and-mine,/funding-round/029720f7eeb218f51c43df5155671472,equity_crowdfunding,10000.0,2014-08-17,5,unknown70,left_only
11,/organization/r-ranch-and-mine,/funding-round/766b0bcead9ca8560af5e1ade579fb7f,equity_crowdfunding,50000.0,2014-09-26,6,unknown71,left_only
...,...,...,...,...,...,...,...,...
148074,/organization/zyrra,/funding-round/cc8cd96a500332d5b4f4bcbddb306099,angel,875000.0,2010-12-15,83854,unknown33652,left_only
148075,/organization/zyrra,/funding-round/7a0fa791c96c9c447ee54662395beab9,venture,140500.0,2012-10-18,83855,unknown33653,left_only
148081,/organization/zzzzapp-com,/funding-round/87b5f54b90a09a05ed3d1b8ee509ed11,convertible_note,12918.0,2014-09-10,83860,unknown33654,left_only
148082,/organization/zzzzapp-com,/funding-round/22ef2fafb4d20ac3aa4b86143dbf6c8e,seed,7000.0,2011-11-01,83861,unknown33655,left_only


Checking to see if finanicial entity was correctly updated

In [49]:
investor_entity[investor_entity['permalink'].str.contains('unknown')]

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
312,unknown1,unknown,unknown,,,,,investor
3388,unknown2,unknown,unknown,,,,,investor
8336,unknown3,unknown,unknown,,,,,investor
8952,unknown4,unknown,unknown,,,,,investor
9268,unknown5,unknown,unknown,,,,,investor
...,...,...,...,...,...,...,...,...
55927,unknown33652,unknown,,,,,,investor
55928,unknown33653,unknown,,,,,,investor
55929,unknown33654,unknown,,,,,,investor
55930,unknown33655,unknown,,,,,,investor


###Adding companies from Rounds

There are certain companies found in Rounds table that were not present in the original companies table. I add these to the company entity.

In [50]:
mergeCR = pd.merge(rounds, company_entity[['permalink']], left_on='company_permalink', right_on='permalink', how='outer', indicator=True)
missing_companies = mergeCR[mergeCR['_merge'] == 'left_only'][['company_permalink', 'company_name', 'company_category_list', 'company_market',  'company_country_code', 'company_state_code', 'company_city']].drop_duplicates()
missing_companies.insert(7, 'is_a', 'company')
missing_companies = missing_companies.rename(columns={'company_permalink': 'permalink', 'company_name': 'name','company_category_list': 'categories', 'company_market': 'market', 'company_country_code': 'country', 'company_state_code': 'state', 'company_city': 'city'})

company_entity = pd.concat([company_entity, missing_companies], axis=0)
company_entity

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/organization/waywire,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company
1,/organization/tv-communications,&TV Communications,Games,Games,USA,CA,Los Angeles,company
2,/organization/rock-your-paper,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company
3,/organization/in-touch-network,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company
...,...,...,...,...,...,...,...,...
64465,/organization/shift-payments,Shift Payments,|Finance Technology|Personal Finance|Finance|,Finance,USA,CA,San Francisco,company
67874,/organization/sparkow,Sparkow,,,,,,company
74118,/organization/totems,TOTEMS,,,,,,company
80113,/organization/wedpics,WedPics,,,,,,company


##Adding companies from acquisitions

In [51]:
mergeCA = pd.merge(acquisitions, company_entity[['permalink']], left_on='company_permalink', right_on='permalink', how='outer', indicator=True)
missing_companiesA = mergeCA[mergeCA['_merge'] == 'left_only'][['company_permalink', 'company_name', 'company_category_list', 'company_market',  'company_country_code', 'company_state_code', 'company_city']].drop_duplicates()
missing_companiesA.insert(7, 'is_a', 'company')
missing_companiesA = missing_companiesA.rename(columns={'company_permalink': 'permalink', 'company_name': 'name','company_category_list': 'categories', 'company_market': 'market', 'company_country_code': 'country', 'company_state_code': 'state', 'company_city': 'city'})

company_entity = pd.concat([company_entity, missing_companiesA], axis=0)

##Combining all three entities to form Financial Entity

####Starting with merging Company and Investor

In [52]:
commonCI = pd.merge(company_entity[['permalink']], investor_entity[['permalink']], left_on='permalink', right_on='permalink', how='outer', indicator=True)
commonCIBoth = commonCI[commonCI['_merge'] == 'both']
commonCIBoth = commonCIBoth['permalink']
commonCIRight = commonCI[commonCI['_merge'] == 'right_only']
commonCIRight = commonCIRight['permalink']

financial_entity_schema = company_entity.copy()

for i in commonCIBoth:
  financial_entity_schema.loc[financial_entity_schema['permalink'] == i, 'is_a'] = financial_entity_schema.loc[financial_entity_schema['permalink'] == i, 'is_a'] + ', investor'

for i in commonCIRight:
  financial_entity_schema = financial_entity_schema.append(investor_entity[investor_entity['permalink'] == i], ignore_index=True)


financial_entity_schema


Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/organization/waywire,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company
1,/organization/tv-communications,&TV Communications,Games,Games,USA,CA,Los Angeles,company
2,/organization/rock-your-paper,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company
3,/organization/in-touch-network,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company
...,...,...,...,...,...,...,...,...
113716,unknown33652,unknown,,,,,,investor
113717,unknown33653,unknown,,,,,,investor
113718,unknown33654,unknown,,,,,,investor
113719,unknown33655,unknown,,,,,,investor


In [53]:
#financial_entity_schema[financial_entity_schema['is_a'] == 'investor']
financial_entity_schema[financial_entity_schema.duplicated(subset='permalink', keep='first')]

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a


####Merging Result with Acquirers

In [54]:
commonCA = pd.merge(financial_entity_schema[['permalink']], acquirer_entity[['permalink']], left_on='permalink', right_on='permalink', how='outer', indicator=True)
commonCABoth = commonCA[commonCA['_merge'] == 'both']
commonCABoth = commonCABoth['permalink']
commonCARight = commonCA[commonCA['_merge'] == 'right_only']
commonCARight = commonCARight['permalink']

for i in commonCABoth:
  financial_entity_schema.loc[financial_entity_schema['permalink'] == i, 'is_a'] = financial_entity_schema.loc[financial_entity_schema['permalink'] == i, 'is_a'] + ', acquirer'

for i in commonCARight:
  financial_entity_schema = financial_entity_schema.append(acquirer_entity[acquirer_entity['permalink'] == i], ignore_index=True)


financial_entity_schema

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a
0,/organization/waywire,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company
1,/organization/tv-communications,&TV Communications,Games,Games,USA,CA,Los Angeles,company
2,/organization/rock-your-paper,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company
3,/organization/in-touch-network,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company
...,...,...,...,...,...,...,...,...
117200,/organization/apsis-sweden,Apsis Sweden,Advertising,Advertising,SWE,,Malma,acquirer
117201,/organization/webjet-com-au,Webjet.com.au,"Travel & Tourism, Hotels, Travel",Travel & Tourism,AUS,,Melbourne,acquirer
117202,/organization/nestle-purina-petcare,Nestle Purina Petcare,Pets,Pets,USA,MO,St Louis,acquirer
117203,/organization/cupcake-digital,Cupcake Digital,Games,Games,USA,NY,New York,acquirer


In [55]:
#financial_entity_schema[financial_entity_schema['is_a'] == 'company, investor, acquirer']
financial_entity_schema[financial_entity_schema.duplicated(subset='permalink', keep='first')]

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a


###Creating ID

In [56]:
financial_entity_schema['ID'] = np.arange(len(financial_entity_schema))
financial_entity_schema

Unnamed: 0,permalink,name,categories,market,country,state,city,is_a,ID
0,/organization/waywire,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company,0
1,/organization/tv-communications,&TV Communications,Games,Games,USA,CA,Los Angeles,company,1
2,/organization/rock-your-paper,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company,2
3,/organization/in-touch-network,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company,3
4,/organization/r-ranch-and-mine,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company,4
...,...,...,...,...,...,...,...,...,...
117200,/organization/apsis-sweden,Apsis Sweden,Advertising,Advertising,SWE,,Malma,acquirer,117200
117201,/organization/webjet-com-au,Webjet.com.au,"Travel & Tourism, Hotels, Travel",Travel & Tourism,AUS,,Melbourne,acquirer,117201
117202,/organization/nestle-purina-petcare,Nestle Purina Petcare,Pets,Pets,USA,MO,St Louis,acquirer,117202
117203,/organization/cupcake-digital,Cupcake Digital,Games,Games,USA,NY,New York,acquirer,117203


##Company

In [57]:
company_schema = financial_entity_schema[financial_entity_schema['is_a'].str.contains('company')][['permalink', 'ID']]
company_schema = pd.merge(company_schema, companies[['permalink', 'founded_at', 'homepage_url', 'status']], left_on='permalink', right_on='permalink', how='left')
company_schema = company_schema.sort_values(by='ID', ascending=True)
company_schema = company_schema[['ID', 'founded_at', 'homepage_url', 'status']]
company_schema = company_schema.rename(columns={'ID': 'company_ID', 'founded_at': 'founding_date', 'homepage_url': 'homepage_URL'})

In [58]:
company_schema['status'] = company_schema['status'].apply(lambda x: replace_nan_status(x))
company_schema

Unnamed: 0,company_ID,founding_date,homepage_URL,status
0,0,2012-06-01,http://www.waywire.com,acquired
1,1,NaT,http://enjoyandtv.com,operating
2,2,2012-10-26,http://www.rockyourpaper.org,operating
3,3,2011-04-01,http://www.InTouchNetwork.com,operating
4,4,2014-01-01,,operating
...,...,...,...,...
58447,58447,NaT,,unknown
58448,58448,NaT,,unknown
58449,58449,NaT,,unknown
58450,58450,NaT,,unknown


##Investor

In [59]:
investor_schema = financial_entity_schema[financial_entity_schema['is_a'].str.contains('investor')][['permalink', 'ID']]
investor_schema = pd.merge(investor_schema, investments[['investor_permalink', 'is_person']], left_on='permalink', right_on='investor_permalink', how='left')
investor_schema = investor_schema.sort_values(by='ID', ascending=True)
investor_schema = investor_schema[['ID', 'is_person']]
investor_schema = investor_schema.rename(columns={'ID': 'investor_ID'})
investor_schema = investor_schema.drop_duplicates("investor_ID")
investor_schema

Unnamed: 0,investor_ID,is_person
0,67,False
1,107,False
13,136,False
14,185,False
15,190,False
...,...,...
148091,113716,
148092,113717,
148093,113718,
148094,113719,


##Acquisition

In [60]:
acquisition_schema = financial_entity_schema[financial_entity_schema['is_a'].str.contains('acquirer')][['permalink', 'ID']]
acquisition_schema = pd.merge(acquisition_schema, acquisitions[['acquirer_permalink', 'company_permalink', 'acquired_at', ' price_amount ', 'price_currency_code']], left_on='permalink', right_on='acquirer_permalink', how='left')
acquisition_schema = acquisition_schema.rename(columns={'ID': 'acquirer_ID', ' price_amount ': 'price', 'price_currency_code': 'currency', 'acquired_at': 'date'})
acquisition_schema = pd.merge(acquisition_schema, financial_entity_schema[['permalink', 'ID']], left_on='company_permalink', right_on='permalink', how='left')
acquisition_schema = acquisition_schema[['acquirer_ID', 'ID', 'date', 'price', 'currency']]
acquisition_schema = acquisition_schema.rename(columns={'ID': 'acquired_ID'})
acquisition_schema

Unnamed: 0,acquirer_ID,acquired_ID,date,price,currency
0,38,53372,2011-03-28,,USD
1,38,54909,2011-04-05,,USD
2,67,6603,2008-07-18,80000000.0,USD
3,90,55198,2012-09-18,,USD
4,93,15577,2014-09-15,,USD
...,...,...,...,...,...
13064,117200,49304,2012-06-17,,USD
13065,117201,58440,2013-03-01,25000000.0,AUD
13066,117202,58441,2014-01-15,,USD
13067,117203,49393,2014-03-12,,USD


##Round Creation

In [61]:
round_schema = pd.merge(round_schema, financial_entity_schema[['permalink','ID']], left_on='company_permalink', right_on='permalink', how='left')
round_schema = round_schema.rename(columns={'ID': 'company_ID', 'funding_round_type': 'round_type', ' raised_amount_usd ': 'amount_USD', 'funded_at': 'date'})
round_schema = pd.merge(round_schema, financial_entity_schema[['permalink','ID']], left_on='investor_permalink', right_on='permalink', how='left')
round_schema = round_schema[['company_ID', 'ID', 'round_number', 'round_type', 'amount_USD', 'date']]

In [62]:
round_schema = round_schema.rename(columns={'ID': 'investor_ID'})
round_schema

Unnamed: 0,company_ID,investor_ID,round_number,round_type,amount_USD,date
0,0,66814,0,seed,1750000.0,2012-06-30
1,0,59532,0,seed,1750000.0,2012-06-30
2,0,65705,0,seed,1750000.0,2012-06-30
3,0,64316,0,seed,1750000.0,2012-06-30
4,0,71038,0,seed,1750000.0,2012-06-30
...,...,...,...,...,...,...
148091,49435,59224,83868,venture,10000000.0,2011-01-03
148092,49435,59037,83868,venture,10000000.0,2011-01-03
148093,49435,60929,83868,venture,10000000.0,2011-01-03
148094,49435,58520,83868,venture,10000000.0,2011-01-03




##Final Financial Entity

In [63]:
financial_entity_schema = financial_entity_schema[['ID', 'name', 'categories', 'market', 'country', 'state', 'city', 'is_a']]
financial_entity_schema

Unnamed: 0,ID,name,categories,market,country,state,city,is_a
0,0,#waywire,"Entertainment, Politics, Social Media, News",News,USA,NY,New York,company
1,1,&TV Communications,Games,Games,USA,CA,Los Angeles,company
2,2,'Rock' Your Paper,"Publishing, Education",Publishing,EST,,Tallinn,company
3,3,(In)Touch Network,"Electronics, Guides, Coffee, Restaurants, Musi...",Electronics,GBR,,London,company
4,4,-R- Ranch and Mine,"Tourism, Entertainment, Games",Tourism,USA,TX,Fort Worth,company
...,...,...,...,...,...,...,...,...
117200,117200,Apsis Sweden,Advertising,Advertising,SWE,,Malma,acquirer
117201,117201,Webjet.com.au,"Travel & Tourism, Hotels, Travel",Travel & Tourism,AUS,,Melbourne,acquirer
117202,117202,Nestle Purina Petcare,Pets,Pets,USA,MO,St Louis,acquirer
117203,117203,Cupcake Digital,Games,Games,USA,NY,New York,acquirer




#Convert to CSV files

In [65]:
financial_entity_schema.to_csv('Financial_Entity_Schema.csv', index=False)

In [66]:
company_schema.to_csv('Company_Schema.csv', index=False)

In [67]:
investor_schema.to_csv('Investor_Schema.csv', index=False)

In [68]:
round_schema.to_csv('Round_Schema.csv', index=False)

In [69]:
acquisition_schema.to_csv('Acquisition_Schema.csv', index=False)