In [6]:
import pandas as pd
from py2neo import Graph, authenticate, Relationship
graph = Graph(user="neo4j", password="admin")

# Load the data

In [4]:

def loadData(filePath):
    with open(filePath) as f:
        data = f.read().split('\n')
    elements = []
    for line in data:
        el = line.split()
        try:
            elements.append([el[0],el[4], ' '.join(el[7:])])
        except:
            pass
    
    #Build Dataframe
    df = pd.DataFrame(elements, columns=['timestamp','AP','message'])
    df.drop_duplicates(inplace=True)
    
    df['Disassociating'] = df.message.str.extract(r'Disassociating (\w+)', expand=False)
    df['Station'] = df.message.str.extract(r'Station (\w+)', expand=False)
    df['Action'] = df.message.str.extract(r'Station \w+ (\w+)', expand=False)
    
    df.ix[ ~ df.Disassociating.isnull(),'Station'] = df.ix[ ~ df.Disassociating.isnull(),'Disassociating']
    df.ix[ ~ df.Disassociating.isnull(),'Action'] = 'Disassociating'
    
    df['Deauthenticating'] = df.message.str.extract(r'Deauthenticating (\w+)', expand=False)
    df.ix[ ~ df.Deauthenticating.isnull(),'Station'] = df.ix[ ~ df.Deauthenticating.isnull(),'Deauthenticating']
    df.ix[ ~ df.Deauthenticating.isnull(),'Action'] = 'Deauthenticating'
    
    df.drop(['Disassociating','Deauthenticating'], axis=1, inplace=True)
    
    
    df['BuildingType'] = df.AP.str.extract(r'(\w+)Bldg', expand=False)
    df['BuildingNumber'] = df.AP.str.extract(r'Bldg(\d+)', expand=False)
    df['APnumber'] = df.AP.str.extract(r'AP(\d+)', expand=False)
    df['BuildingId'] = df.BuildingType + df.BuildingNumber
    return df

In [5]:
fname = "/home/michael/Documents/syslog-v3.3/20010411.log"
df = loadData(fname)

# Write the data into neo4j

In [40]:
#erase db
graph.run('''
MATCH (n)
DETACH DELETE n
''')

<py2neo.database.Cursor at 0x7ff9a302eed0>

In [41]:
def write_df(df):
    departements = list(set(df.BuildingType))
    buildings = list(set(df.BuildingId))
    ap = list(set(df.AP))
    stations = list(set(df.ix[~df.Station.isnull(),'Station']))
    
    
    for d in departements:
        graph.run('''
        MERGE (:DEPARTEMENT {Type: {N}})
        ''', {'N': d})
        
    x = graph.run('''
        MATCH (d:DEPARTEMENT)
        RETURN COUNT(d) AS DEPARTEMENT
    ''').data()[0]
    
    print("Writing %d departments" % x['DEPARTEMENT'])
    
    print("writing buildings")
    for b in buildings:
        graph.run('''
        MERGE (:BUILDING {Number: {N}})
        ''', {'N': b})
        
    x = graph.run('''
        MATCH (d:BUILDING)
        RETURN COUNT(d) AS BUILDING
    ''').data()[0]
    
    print("Writing %d buildings" % x['BUILDING'])

    print("writing AP")
    for a in ap:
        graph.run('''
        MERGE (:AP {Number: {N}})
        ''', {'N': a})
        
    x = graph.run('''
        MATCH (d:AP)
        RETURN COUNT(d) AS AP
    ''').data()[0]
    
    print("Writing %d APs" % x['AP'])
    
    print("Create relationships...")
    for i,row in df.iterrows():
        graph.run('''

        MATCH (d:DEPARTEMENT), (b:BUILDING)
        WHERE d.Type = {DEP} AND b.Number = {BUILD}
        MERGE (d)-[:HAS_BUILDING]->(b)

        ''',{'DEP':row.BuildingType, 'BUILD':row.BuildingId})
        
    for i,row in df.iterrows():
        graph.run('''

        MATCH (b:BUILDING), (a:AP)
        WHERE b.Number = {BUILD} AND a.Number = {AP}
        MERGE (b)-[:HAS_AP]->(a)

        ''',{'BUILD':row.BuildingId, 'AP':row.AP})

    
    print("writing stations")
    for s in stations:
        graph.run('''
        MERGE (:STATION {Number: {N}})
        ''', {'N': s})
    
    #create index
    graph.run('''
    CREATE INDEX ON :STATION(Number)
    ''')
    
    print("Create relationships...")
    for i,row in df.iterrows():
        graph.run('''

        MATCH (s:STATION), (a:AP)
        USING INDEX s:STATION(Number)
        WHERE s.Number = {NUM} AND a.Number = {AP}
        MERGE (s)-[:INTERACT {Action: {R}, Timestamp: toInteger({T})}]->(a)

        ''',{'NUM':row.Station,'AP':row.AP, 'R':row.Action, 'T': row.timestamp})
    


In [42]:
write_df(df)

Writing 4 departments
writing buildings
Writing 9 buildings
writing AP
Writing 28 APs
Create relationships...
writing stations
Create relationships...


In [44]:
graph.run('''
MATCH (s:STATION)-[r]->(:AP)
RETURN COUNT(r) AS ACTIONS
''').data()[0]

{u'ACTIONS': 9078}