# Crunchbase Snapshot © 2013 Data Analysis Notebook

## Import required modules.

In [48]:
%matplotlib inline
# import matplotlib.pyplot as plt
from pandas import DataFrame as df
from py2neo import authenticate, Graph, Node, Relationship
from scripts.vis import draw
import numpy as np
# import plotly as py
# from plotly.graph_objs import *

# CSS files for more aesthetically pleasing inline tables.
from IPython.core.display import HTML
css = open('css/style-table.css').read() + open('css/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

## Import Crunchbase Snapshot © 2013 CSV files.

In [9]:
cb_objects = df.from_csv('csv/cb_objects.csv').reset_index('id').fillna('')
cb_acquisitions = df.from_csv('csv/cb_acquisitions.csv').reset_index('id').fillna('')
cb_funding_rounds = df.from_csv('csv/cb_funding_rounds.csv').reset_index('id').fillna('')
cb_funds = df.from_csv('csv/cb_funds.csv').reset_index('id').fillna('')
cb_investments = df.from_csv('csv/cb_investments.csv').reset_index('id').fillna('')
cb_ipos = df.from_csv('csv/cb_ipos.csv').reset_index('id').fillna('')
cb_milestones = df.from_csv('csv/cb_milestones.csv').reset_index('id').fillna('')
cb_offices = df.from_csv('csv/cb_offices.csv').reset_index('id').fillna('')
cb_people = df.from_csv('csv/cb_people.csv').reset_index('id').fillna('')
cb_relationships = df.from_csv('csv/cb_relationships.csv').reset_index('id').fillna('')

odm_organizations = df.from_csv('csv/odm_organizations.csv').reset_index('crunchbase_uuid').fillna('')
odm_people = df.from_csv('csv/odm_people.csv').reset_index('crunchbase_uuid').fillna('')

## Visualize data frames.

In [52]:
# cb_acquisitions.head()
# cb_funding_rounds.head()
# cb_funds.head()
# cb_investments.head()
# cb_ipos.head()
# cb_milestones.head()
# cb_objects.head()
# cb_offices.head()
# cb_people.head()
# cb_relationships.head()
odm_organizations.head()
# odm_people.head()

Unnamed: 0,crunchbase_uuid,type,primary_role,name,crunchbase_url,homepage_domain,homepage_url,profile_image_url,facebook_url,twitter_url,linkedin_url,stock_symbol,location_city,location_region,location_country_code,short_description
0,e1393508-30ea-8a36-3f96-dd3226033abd,Organization,company,Wetpaint,https://www.crunchbase.com/organization/wetpai...,wetpaint-inc.com,http://wetpaint-inc.com,https://www.crunchbase.com/organization/wetpai...,,http://twitter.com/BachelrWetpaint,,:,Seattle,Washington,USA,Wetpaint offers an online social publishing pl...
1,bf4d7b0e-b34d-2fd8-d292-6049c4f7efc7,Organization,company,Zoho,https://www.crunchbase.com/organization/zoho?u...,zoho.com,https://www.zoho.com/,https://www.crunchbase.com/organization/zoho/p...,http://www.facebook.com/zoho,http://twitter.com/zoho,http://www.linkedin.com/company/zoho-corporati...,:,Pleasanton,California,USA,Run your entire business with Zoho's suite of ...
2,5f2b40b8-d1b3-d323-d81a-b7a8e89553d0,Organization,company,Digg,https://www.crunchbase.com/organization/digg?u...,digg.com,http://www.digg.com,https://www.crunchbase.com/organization/digg/p...,http://www.facebook.com/digg,http://twitter.com/digg,http://www.linkedin.com/company/digg,:,,,,Digg Inc. operates a website that enables its ...
3,f4d5ab44-058b-298b-ea81-380e6e9a8eec,Organization,investor,Omidyar Network,https://www.crunchbase.com/organization/omidya...,omidyar.com,http://www.omidyar.com,https://www.crunchbase.com/organization/omidya...,http://www.facebook.com/OmidyarNetwork,http://twitter.com/OmidyarNetwork,http://www.linkedin.com/company/22806,:,Redwood City,California,USA,"Pierre Omidyar, the founder of eBay, and his w..."
4,df662812-7f97-0b43-9d3e-12f64f504fbb,Organization,company,Facebook,https://www.crunchbase.com/organization/facebo...,facebook.com,http://www.facebook.com,https://www.crunchbase.com/organization/facebo...,https://www.facebook.com/,https://twitter.com/facebook,http://www.linkedin.com/company/facebook,NASDAQ:FB,Menlo Park,California,USA,Facebook is an online social networking servic...


## Begin building the neo4j graph.

In [11]:
objects = Graph(user = 'neo4j', password = 'hello')
objects.delete_all()

## Add organizations first.

In [12]:
# Populate the graph with nodes from odm_organizations data frame.
for i in range(0, len(odm_organizations)):
    # Instantiate organization node.
    company = Node('Organization', name = odm_organizations['name'][i])
    # Add node attributes.
    for j in list(odm_organizations.columns.values):
        if type(odm_organizations[j][i]) is np.int64:
            company[j] = int(odm_organizations[j][i])
        else:
            company[j] = odm_organizations[j][i]
    
    # Add the node to the objects graph.
    objects.create(company)

## Draw the graph of organizations.

In [13]:
options = {'Organization': 'name'}
draw(objects, options, physics = True)

## Add people to the graph.

In [21]:
for i in range(0, len(odm_people)):
    # Get the person's attributes.
    try:
        title = odm_people['title'][i]
        first_name = odm_people['first_name'][i]
        last_name = odm_people['last_name'][i]
        city = odm_people['location_city'][i]
        region = odm_people['location_region'][i]
        country = odm_people['location_country_code'][i]
        organization = odm_people['organization'][i]
    
        person = Node('Person', 
                      name = first_name + ' ' + last_name, 
                      last_name = last_name, 
                      first_name = first_name, 
                      city = city, 
                      region = region,
                      country = country,
                      organization = organization)
        
        objects.create(person)
    except:
        continue

## Draw the graph of people and organizations.

In [22]:
options = {'Organization': 'name', 'Person': 'name'}
draw(objects, options, physics = True)

## Build relationships between people and organizations.

In [32]:
for i in range(0, len(odm_people)):
    # Parse each row of odm_people frame.
    try:
        name = odm_people['first_name'][i] + ' ' + odm_people['last_name'][i]
        person = objects.find_one('Person', 
                                  property_key = 'name', 
                                  property_value = name)
        
        organization = objects.find_one('Organization', 
                                   property_key = 'name', 
                                   property_value = odm_people['organization'][i])
        
        r = Relationship(person, "WORKS_FOR", organization)
        
        objects.create(r)
    except:
        continue

18776

## Draw the graph with org/person relationships .

In [49]:
options = {'Organization': 'name', 'Person': 'name'}
draw(objects, options, physics = True)

## Add funds to the network.

In [41]:
cb_funds.head()

Unnamed: 0,id,fund_id,object_id,name,funded_at,raised_amount,raised_currency_code,source_url,source_description,created_at,updated_at
0,1,1,f:371,Second Fund,2008-12-16,300000000.0,USD,http://www.pehub.com/26194/dfj-dragon-raising-...,peHub,2008-12-17 03:07:16,2008-12-17 03:07:16
1,4,4,f:17,Sequoia Israel Fourth Fund,2008-12-17,200750000.0,USD,http://www.pehub.com/26725/sequoia-israel-rais...,Sequoia Israel Raises Fourth Fund,2008-12-18 22:04:42,2008-12-18 22:04:42
2,5,5,f:951,Tenth fund,2008-08-11,650000000.0,USD,http://venturebeat.com/2008/08/11/interwest-cl...,Venture Beat,2008-12-31 09:47:51,2008-12-31 09:47:51
3,6,6,f:192,New funds acquire,,625000000.0,USD,http://venturebeat.com/2008/07/28/us-venture-p...,U.S. Venture Partners raises $625M fund for ne...,2009-01-01 18:13:44,2009-01-01 18:16:27
4,7,7,f:519,Third fund,2008-05-20,200000000.0,USD,http://venturebeat.com/2008/05/20/disneys-stea...,Venture Beat,2009-01-03 09:51:58,2013-09-03 16:34:54


In [53]:
for i in range(0, len(cb_funds)):
    # Get the fund's attributes.
    try:
        object_id = cb_funds['object_id'][i]
        name = cb_funds['name'][i]
        raised_amount = cb_funds['raised_amount'][i]
        source_url = cb_funds['source_url'][i]
        funded_at = cb_funds['funded_at'][i]
    
        fund = Node('Fund', 
                    name = name, 
                    object_id = object_id,
                    raised_amount = raised_amount, 
                    source_url = source_url, 
                    funded_at = funded_at)
        
        objects.create(fund)
    except:
        continue

## Draw the graph, now with companies, people, and funds.

In [59]:
options = {'Company': 'name', 'Person': 'name', 'Fund': 'name'}
draw(objects, options, physics = True)

## Add offices.

In [40]:
cb_offices.head()

Unnamed: 0,id,object_id,office_id,description,region,address1,address2,city,zip_code,state_code,country_code,latitude,longitude,created_at,updated_at
0,1,c:1,1,,Seattle,710 - 2nd Avenue,Suite 1100,Seattle,98104,WA,USA,47.6031,-122.333,,
1,2,c:3,3,Headquarters,SF Bay,4900 Hopyard Rd,Suite 310,Pleasanton,94588,CA,USA,37.6929,-121.905,,
2,3,c:4,4,,SF Bay,135 Mississippi St,,San Francisco,94107,CA,USA,37.7647,-122.395,,
3,4,c:5,5,Headquarters,SF Bay,1601 Willow Road,,Menlo Park,94025,CA,USA,37.416,-122.152,,
4,5,c:7,7,,SF Bay,Suite 200,654 High Street,Palo Alto,94301,CA,ISR,,,,


In [None]:
for i in range(0, len(cb_offices)):
    # Get the office's attributes.
    try:
        object_id = cb_offices['object_id'][i]
        office_id = int(cb_offices['office_id'][i])
        description = cb_offices['description'][i]
        region = cb_offices['region'][i]
        zip_code = cb_offices['zip_code'][i]
        city = cb_offices['city'][i]
        state = cb_offices['state_code'][i]
        country = cb_offices['country_code'][i]
    
        office = Node('Office', 
                      object_id = object_id,
                      office_id = office_id,
                      description = description,
                      region = region,
                      zip_code = zip_code,
                      city = city,
                      state = state,
                      country = country)
    
        objects.create(office)
    except:
        continue

## Draw the graph, including offices.

In [None]:
options = {'Company': 'name', 'Person': 'name', 'Fund': 'name', 'Office': 'description'}
draw(objects, options, physics = True)

## Build edges between companies and offices.

In [39]:
cb_offices.head()

Unnamed: 0,id,object_id,office_id,description,region,address1,address2,city,zip_code,state_code,country_code,latitude,longitude,created_at,updated_at
0,1,c:1,1,,Seattle,710 - 2nd Avenue,Suite 1100,Seattle,98104,WA,USA,47.6031,-122.333,,
1,2,c:3,3,Headquarters,SF Bay,4900 Hopyard Rd,Suite 310,Pleasanton,94588,CA,USA,37.6929,-121.905,,
2,3,c:4,4,,SF Bay,135 Mississippi St,,San Francisco,94107,CA,USA,37.7647,-122.395,,
3,4,c:5,5,Headquarters,SF Bay,1601 Willow Road,,Menlo Park,94025,CA,USA,37.416,-122.152,,
4,5,c:7,7,,SF Bay,Suite 200,654 High Street,Palo Alto,94301,CA,ISR,,,,


In [None]:
for i in range(0, len(cb_offices)):
    # Parse each row of cb_offices frame.
    try:
        office_id = int(cb_offices['office_id'][i])
        person = objects.find_one('Person', 
                                  property_key = 'office_id', 
                                  property_value = office_id)
        
        company_id = cb_offices['object_id'][i]
        company = objects.find_one('Company', 
                                   property_key = 'id', 
                                   property_value = company_id)
        
        r = Relationship(company, "HAS_OFFICE", office)
        
        objects.create(r)
    except:
        continue

## Draw the company/office edges.

In [None]:
options = {'Company': 'name', 'Person': 'name', 'Fund': 'name', 'Office': 'description'}
draw(objects, options, physics = True)