Some pre-loading of library steps below

In [6]:
%matplotlib inline

import numpy as np
import pandas as pd
import networkx as nx
import requests
import matplotlib.pyplot as plt

# set some nicer defaults for matplotlib
from matplotlib import rcParams

#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
                (0.4, 0.4, 0.4)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = False
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'

def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    """
    Minimize chartjunk by stripping out unnecessary plot borders and axis ticks
    
    The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
    """
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    #turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    #now re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()

In [7]:
url = 'http://opsahl.co.uk/tnet/datasets/OF_longitudinal_weightedchar.txt'
r = requests.get(url)

u'"2004-05-14 20:53:16" 201 3 23\n"2004-05-14 20:54:08" 187 3 63\n"2004-05-14 20:55:40" 138 2 6\n"2004-05-14 21:00:04" 345 3 38\n"2004-05-14 21:02:17" 233 3 60\n"2004-05-14 21:02:21" 140 4 7\n"2004-05-14 21:03:35" 307 3 33\n"2004-05-14 21:06:14" 352 3 12\n"2004-05-14 21:09:11" 269 3 42\n"2004-05-14 21:14:15" 187 5 61\n"2004-05-14 21:14:29" 269 5 75\n"2004-05-14 21:15:37" 201 5 101\n"2004-05-14 21:18:57" 233 5 28\n"2004-05-14 21:39:16" 281 3 11\n"2004-05-14 21:44:30" 44 7 9\n"2004-05-14 21:50:12" 239 3 36\n"2004-05-14 21:55:20" 239 5 102\n"2004-05-14 21:55:30" 213 9 7\n"2004-05-14 21:55:52" 402 9 4\n"2004-05-14 21:56:02" 402 9 33\n"2004-05-14 21:56:40" 402 9 4\n"2004-05-14 21:56:47" 356 9 15\n"2004-05-14 21:56:51" 402 9 14\n"2004-05-14 21:56:59" 402 9 33\n"2004-05-14 21:57:05" 402 9 16\n"2004-05-14 22:00:53" 216 10 5\n"2004-05-14 22:01:47" 187 10 5\n"2004-05-14 22:02:50" 201 10 25\n"2004-05-14 22:06:35" 356 10 2\n"2004-05-14 22:18:06" 269 10 38\n"2004-05-14 22:26:31" 345 10 3\n"2004-05-

In [11]:
# data_time = pd.read_csv(r.text, delim_whitespace=True, header=None)
# head(data_time)

In [12]:
# Create a dataframe from requests 
# requests_df = pd.DataFrame(r.text)
from bs4 import BeautifulSoup
soup = BeautifulSoup(r.text)

In [19]:
import string
answer = string.split(r.text, '\n')
type(answer)
answer[:10]

[u'"2004-05-14 20:53:16" 201 3 23',
 u'"2004-05-14 20:54:08" 187 3 63',
 u'"2004-05-14 20:55:40" 138 2 6',
 u'"2004-05-14 21:00:04" 345 3 38',
 u'"2004-05-14 21:02:17" 233 3 60',
 u'"2004-05-14 21:02:21" 140 4 7',
 u'"2004-05-14 21:03:35" 307 3 33',
 u'"2004-05-14 21:06:14" 352 3 12',
 u'"2004-05-14 21:09:11" 269 3 42',
 u'"2004-05-14 21:14:15" 187 5 61']

In [27]:
data_array = []
for lines in answer:
    line = lines.split(" ")
    data_array.append(line)
data_array

[[u'"2004-05-14', u'20:53:16"', u'201', u'3', u'23'],
 [u'"2004-05-14', u'20:54:08"', u'187', u'3', u'63'],
 [u'"2004-05-14', u'20:55:40"', u'138', u'2', u'6'],
 [u'"2004-05-14', u'21:00:04"', u'345', u'3', u'38'],
 [u'"2004-05-14', u'21:02:17"', u'233', u'3', u'60'],
 [u'"2004-05-14', u'21:02:21"', u'140', u'4', u'7'],
 [u'"2004-05-14', u'21:03:35"', u'307', u'3', u'33'],
 [u'"2004-05-14', u'21:06:14"', u'352', u'3', u'12'],
 [u'"2004-05-14', u'21:09:11"', u'269', u'3', u'42'],
 [u'"2004-05-14', u'21:14:15"', u'187', u'5', u'61'],
 [u'"2004-05-14', u'21:14:29"', u'269', u'5', u'75'],
 [u'"2004-05-14', u'21:15:37"', u'201', u'5', u'101'],
 [u'"2004-05-14', u'21:18:57"', u'233', u'5', u'28'],
 [u'"2004-05-14', u'21:39:16"', u'281', u'3', u'11'],
 [u'"2004-05-14', u'21:44:30"', u'44', u'7', u'9'],
 [u'"2004-05-14', u'21:50:12"', u'239', u'3', u'36'],
 [u'"2004-05-14', u'21:55:20"', u'239', u'5', u'102'],
 [u'"2004-05-14', u'21:55:30"', u'213', u'9', u'7'],
 [u'"2004-05-14', u'21:55:52"',

In [30]:
#Creating an empty dataframe to populate in python
index = np.arange(len(answer)) #Number of indexes needed
columns = ['date','time', 'person', 'forum', 'unknown']
df = pd.DataFrame(columns=columns, index = index)

#create data frame from array
for val, item in enumerate(data_array):
    df.ix[val] = item

In [31]:
df.head()

Unnamed: 0,date,time,person,forum,unknown
0,"""2004-05-14","20:53:16""",201,3,23
1,"""2004-05-14","20:54:08""",187,3,63
2,"""2004-05-14","20:55:40""",138,2,6
3,"""2004-05-14","21:00:04""",345,3,38
4,"""2004-05-14","21:02:17""",233,3,60


In [47]:
#Test writing to database in cypher language
from py2neo import neo4j, authenticate, Graph, Node, Relationship

# set up authentication parameters
authenticate("localhost:7474", "user", "pass")

#Creates the connection to the graph which defaults to the localhost
remote_graph = Graph("http://localhost:7474/db/data/")

# graph = Graph("http://user:password@localhost:7474/db/data/")

# #This begins a batch process of writing to the Neo4j database
# tx = remote_graph.cypher.begin()

# statement = "CREATE  (A:Person {time: {B}})" #Cypher statement to create nodes
# for line in data_array:
#     for date_a, time_a, person_A, forum_a, unknown_a in line:
#         tx.append(statement, {"A": person_A, "B": time_a})
# tx.commit()


NameError: name 'authenticate' is not defined

In [46]:
alice = Node("Person", name="Alice")
bob = Node("Person", name="Bob")
alice_knows_bob = Relationship(alice, "KNOWS", bob)
remote_graph.create(alice_knows_bob)

Unauthorized: http://localhost:7474/db/data/