# Crunchbase Snapshot © 2013 Data Analysis Notebook

## Import required modules.

In [53]:
%matplotlib inline
# import matplotlib.pyplot as plt
from pandas import DataFrame as df
from py2neo import authenticate, Graph, Node, Relationship
from scripts.vis import draw
import numpy as np
# import plotly as py
# from plotly.graph_objs import *

# CSS files for more aesthetically pleasing inline tables.
from IPython.core.display import HTML
css = open('css/style-table.css').read() + open('css/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

## Import Crunchbase 2013 Snapshot CSV files.

In [5]:
cb_objects = df.from_csv('csv/cb_objects.csv').reset_index('id')
cb_acquisitions = df.from_csv('csv/cb_acquisitions.csv').reset_index('id')
cb_funding_rounds = df.from_csv('csv/cb_funding_rounds.csv').reset_index('id')
cb_funds = df.from_csv('csv/cb_funds.csv').reset_index('id')
cb_investments = df.from_csv('csv/cb_investments.csv').reset_index('id')
cb_ipos = df.from_csv('csv/cb_ipos.csv').reset_index('id')
cb_milestones = df.from_csv('csv/cb_milestones.csv').reset_index('id')
cb_offices = df.from_csv('csv/cb_offices.csv').reset_index('id')
cb_people = df.from_csv('csv/cb_people.csv').reset_index('id')
cb_relationships = df.from_csv('csv/cb_relationships.csv').reset_index('id')

## Visualize data frames.

In [257]:
# cb_acquisitions.head()
# cb_funding_rounds.head()
# cb_funds.head()
# cb_investments.head()
# cb_ipos.head()
# cb_milestones.head()
cb_objects.head()
# cb_offices.head()
# cb_people.head()
# cb_relationships.head()

Unnamed: 0,id,entity_type,entity_id,parent_id,name,normalized_name,permalink,category_code,status,founded_at,...,last_funding_at,funding_rounds,funding_total_usd,first_milestone_at,last_milestone_at,milestones,relationships,created_by,created_at,updated_at
0,c:1,Company,1,,Wetpaint,wetpaint,/company/wetpaint,web,operating,2005-10-17,...,2008-05-19,3.0,39750000.0,2010-09-05,2013-09-18,5.0,17.0,initial-importer,2007-05-25 06:51:27,2013-04-13 03:29:00
1,c:10,Company,10,,Flektor,flektor,/company/flektor,games_video,acquired,,...,,,,,,,6.0,initial-importer,2007-05-31 21:11:51,2008-05-23 23:23:14
2,c:100,Company,100,,There,there,/company/there,games_video,acquired,,...,,,,2003-02-01,2011-09-23,4.0,12.0,initial-importer,2007-08-06 23:52:45,2013-11-04 02:09:48
3,c:10000,Company,10000,,MYWEBBO,mywebbo,/company/mywebbo,network_hosting,operating,2008-07-26,...,,,,,,,,,2008-08-24 16:51:57,2008-09-06 14:19:18
4,c:10001,Company,10001,,THE Movie Streamer,the movie streamer,/company/the-movie-streamer,games_video,operating,2008-07-26,...,,,,,,,,,2008-08-24 17:10:34,2008-09-06 14:19:18


## Begin building the neo4j graph.

In [239]:
# Build a graph for the company objects.
objects = Graph(user = 'neo4j', password = 'hello')
objects.delete_all()

## Add companies first.

In [240]:
# Populate the graph with nodes from cb_objects data frame.
for i in range(0, len(cb_objects)):
    # Instantiate company node.
    company = Node('Company', name = cb_objects['name'][i])
    # Add node attributes.
    for j in list(cb_objects.columns.values):
        if type(cb_objects[j][i]) is np.int64:
            company[j] = int(cb_objects[j][i])
        else:
            company[j] = cb_objects[j][i]
    
    # Add the node to the objects graph.
    objects.create(company)

## Draw the (lonely) graph of companies.

In [241]:
options = {'Company': 'name'}
draw(objects, options, physics = True)

## Add people to the graph.

In [255]:
cb_people.head()

Unnamed: 0,id,object_id,first_name,last_name,birthplace,affiliation_name
0,1,p:2,Ben,Elowitz,,Blue Nile
1,2,p:3,Kevin,Flaherty,,Wetpaint
2,3,p:4,Raju,Vegesna,,Zoho
3,4,p:5,Ian,Wenig,,Zoho
4,5,p:6,Kevin,Rose,"Redding, CA",i/o Ventures


In [242]:
for i in range(0, len(cb_people)):
    # Get the person's attributes.
    try:
        object_id = cb_people['object_id'][i]
        first_name = cb_people['first_name'][i]
        last_name = cb_people['last_name'][i]
        birthplace = cb_people['birthplace'][i]
        affiliation_name = cb_people['affiliation_name'][i]
    
        person = Node('Person', 
                      name = first_name + ' ' + last_name, 
                      last_name = last_name, 
                      first_name = first_name, 
                      birthplace = birthplace, 
                      object_id = object_id,
                      affiliation_name = affiliation_name)
        
        objects.create(person)
    except:
        continue

## Draw the graph of people and companies.

In [243]:
options = {'Company': 'name', 'Person': 'name'}
draw(objects, options, physics = True)

## Build relationships between people and companies.

In [244]:
# Build the relationships between people and companies.
for i in range(0, len(cb_relationships)):
    # Parse each row of cb_relationships frame.
    try:
        person_id = cb_relationships['person_object_id'][i]
        person = objects.find_one('Person', 
                                  property_key = 'object_id', 
                                  property_value = person_id)
        
        company_id = cb_relationships['relationship_object_id'][i]
        company = objects.find_one('Company', 
                                   property_key = 'id', 
                                   property_value = company_id)
        
        r = Relationship(person, "WORKS_FOR", company)
        
        objects.create(r)
    except:
        continue

## Draw the graph of people and companies.

In [245]:
options = {'Company': 'name', 'Person': 'name'}
draw(objects, options, physics = True)

## Add funds to the network.

In [256]:
cb_funds.head()

Unnamed: 0,id,fund_id,object_id,name,funded_at,raised_amount,raised_currency_code,source_url,source_description,created_at,updated_at
0,1,1,f:371,Second Fund,2008-12-16,300000000.0,USD,http://www.pehub.com/26194/dfj-dragon-raising-...,peHub,2008-12-17 03:07:16,2008-12-17 03:07:16
1,4,4,f:17,Sequoia Israel Fourth Fund,2008-12-17,200750000.0,USD,http://www.pehub.com/26725/sequoia-israel-rais...,Sequoia Israel Raises Fourth Fund,2008-12-18 22:04:42,2008-12-18 22:04:42
2,5,5,f:951,Tenth fund,2008-08-11,650000000.0,USD,http://venturebeat.com/2008/08/11/interwest-cl...,Venture Beat,2008-12-31 09:47:51,2008-12-31 09:47:51
3,6,6,f:192,New funds acquire,,625000000.0,USD,http://venturebeat.com/2008/07/28/us-venture-p...,U.S. Venture Partners raises $625M fund for ne...,2009-01-01 18:13:44,2009-01-01 18:16:27
4,7,7,f:519,Third fund,2008-05-20,200000000.0,USD,http://venturebeat.com/2008/05/20/disneys-stea...,Venture Beat,2009-01-03 09:51:58,2013-09-03 16:34:54


In [250]:
for i in range(0, len(cb_funds)):
    # Get the fund's attributes.
    try:
        object_id = cb_funds['object_id'][i]
        name = cb_funds['name'][i]
        raised_amount = cb_funds['raised_amount'][i]
        source_url = cb_funds['source_url'][i]
    
        fund = Node('Fund', 
                      name = name, 
                      object_id = object_id,
                      raised_amount = raised_amount, 
                      source_url = source_url)
        
        objects.create(fund)
    except:
        continue

## Draw the graph, now with companies, people, and funds.

In [251]:
options = {'Company': 'name', 'Person': 'name', 'Fund': 'name'}
draw(objects, options, physics = True)

## Build relationships between people and funds.

In [252]:
# Build the relationships between people and companies.
for i in range(0, len(cb_relationships)):
    # Parse each row of cb_relationships frame.
    try:
        person_id = cb_relationships['person_object_id'][i]
        person = objects.find_one('Person', 
                                  property_key = 'object_id', 
                                  property_value = person_id)
        
        fund_id = cb_relationships['relationship_object_id'][i]
        fund = objects.find_one('Fund', 
                                   property_key = 'id', 
                                   property_value = fund_id)
        
        r = Relationship(person, "WORKS_FOR", fund)
        
        objects.create(r)
    except:
        continue

## Draw the graph, including new edges between people and funds.

In [254]:
options = {'Company': 'name', 'Person': 'name', 'Fund': 'name'}
draw(objects, options, physics = True)