In [1]:
import pandas as pd
import requests
from pathlib import Path
from zipfile import ZipFile
from neo4j import GraphDatabase
import getpass

In [2]:
def download_data(network_name, data_directory):
    """Download the data from http://networkrepository.com/ and unzip
    
    Args:
        network_name (str): the name of the animal network to get
        data_directory (Path): the parent directory in which to save downloads
        
    Returns:
        Path: the child directory where the data was saved
    """
    download_url = 'http://nrvis.com/download/data/asn/{}.zip'
    r = requests.get(download_url.format(network_name))
    download_dir = data_directory.joinpath(network_name)
    Path(download_dir).mkdir(exist_ok=True)
    zip_path = download_dir.joinpath(network_name + '.zip')
    open(zip_path, 'wb').write(r.content)
    with ZipFile(zip_path) as zipObj:
        zipObj.extractall(download_dir)
    print(f'Downloaded {network_name} to {download_dir}')
    return download_dir

In [3]:
def load_neo4j(graph_name, source_dir, uri, user, pwd):
    """Parse file and upload nodes and relationships to Neo4j
    
    Args:
        graph_name (str): name of the graph being loaded
        source_dir (Path): Path to data file
        uri (str): URI for Neo4j server
        user (str): user to load data
        pwd (str): password for neo4j
    """
    edges_file = list(source_dir.glob('*.edges'))[0]
    edges_df = pd.read_csv(edges_file, sep=' ', header=None)
    nodes = pd.concat([edges_df[0], edges_df[1]]).unique()
    driver=GraphDatabase.driver(uri, auth=(user, pwd))
    node_query = '''MERGE (n:Animal {sourceGraph:$graph_name, animalNumber:$value})'''
    no_weight_query = '''MATCH (n1:Animal {sourceGraph: $graph_name, animalNumber:$col1}), 
        (n2:Animal {sourceGraph: $graph_name, animalNumber:$col2})
        WITH n1, n2
        MERGE (n1)-[iw:INTERACTS_WITH]->(n2)
    '''
    add_weight = ' SET iw.weight = $weight'
    with driver.session() as session:
        tx= session.begin_transaction()
        for node in nodes:
            tx.run(node_query, parameters={'graph_name': graph_name, 'value':float(node)})
        for index, row in edges_df.iterrows():
            merge_query = no_weight_query
            params = {'graph_name': graph_name, 'col1':float(row[0]), 'col2':float(row[1])}
            if row.size == 3:
                merge_query += add_weight
                params['weight'] = float(row[2])
            tx.run(merge_query, parameters=params)
        tx.commit()
    print(f'Loaded {edges_df.shape[0]} rows from {download_dir}')

### Get the list of all graph names from http://networkrepository.com/asn.php

In [4]:
repos_df = pd.read_html('http://networkrepository.com/asn.php')[0]

In [5]:
repos_df['Graph Name']

0      aves-barn-swallow-contact-network
1         aves-barn-swallow-non-physical
2             aves-geese-female-foraging
3               aves-geese-male-foraging
4                aves-hens-pecking-order
                     ...                
811    reptilia-tortoise-network-sg-2000
812         reptilia-tortoise-network-sg
813    reptilia-tortoise-network-sl-2012
814    reptilia-tortoise-network-sl-2013
815         reptilia-tortoise-network-sl
Name: Graph Name, Length: 816, dtype: object

### Set constants for the environment

Data will be downloaded to a directory called Data in the same location as this notebook.

In [6]:
data_dir = Path.cwd().joinpath('Data')

In [7]:
pwd = getpass.getpass("Neo4j password")

Neo4j password········


### Load a single network

Load the vampire bats foodsharing graph as an example

In [8]:
vampire_bats = 'mammalia-vampire-bats-foodsharing'

In [9]:
download_dir = download_data(vampire_bats, data_dir)
load_neo4j(vampire_bats, download_dir, 'neo4j:\\localhost:7678', 'neo4j', pwd)

Downloaded mammalia-vampire-bats-foodsharing to C:\Users\smithnathana\Documents\Animal_Networks\Data\mammalia-vampire-bats-foodsharing
Loaded 72 rows from C:\Users\smithnathana\Documents\Animal_Networks\Data\mammalia-vampire-bats-foodsharing


### Load the first 20 animal networks

In [10]:
for graph_name in repos_df['Graph Name'][:20]:
    download_dir = download_data(graph_name, data_dir)
    load_neo4j(graph_name, download_dir, 'neo4j:\\localhost:7678', 'neo4j', pwd)

Downloaded aves-barn-swallow-contact-network to C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-barn-swallow-contact-network
Loaded 53 rows from C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-barn-swallow-contact-network
Downloaded aves-barn-swallow-non-physical to C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-barn-swallow-non-physical
Loaded 122 rows from C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-barn-swallow-non-physical
Downloaded aves-geese-female-foraging to C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-geese-female-foraging
Loaded 190 rows from C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-geese-female-foraging
Downloaded aves-geese-male-foraging to C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-geese-male-foraging
Loaded 253 rows from C:\Users\smithnathana\Documents\Animal_Networks\Data\aves-geese-male-foraging
Downloaded aves-hens-pecking-order to C:\Users\smithnathana\Documents\Animal_Networks\