In [25]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv
import torch.utils.data

from sklearn.mixture import GaussianMixture 

import networkx as nx
import os

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def loadData(file):
    data = pd.read_csv(file)
    print('Raw shape: ',data.shape)
    data['date'] = pd.to_datetime(data.date)
    print('Days: ',len(set(data.date)))
    return data

In [3]:
def getTimeSeries(df):
    table = pd.pivot_table(df, values='amount', index=['date'],
                    columns=['start_id','end_id'], aggfunc=np.sum, fill_value=0)
    return table,table.index.values

In [9]:
def getMatrixData():
    file = dataDir + dataFile
    dataRaw = loadData(file)
    dataTs,dates = getTimeSeries(dataRaw)
    #print(dataTs.columns)
    matrix = dataTs.values
    print('Matrix Shape: ',matrix.shape)
    return matrix,dates

In [21]:
def getComboPartition(G,maxcom,comboDir):
    #save network in net format
    nodes={}
    nodenum={}
    i=0
    #create a dictionary transforming nodes to unique numbers
    for n in list(G.nodes()):
        nodenum[n]=i
        nodes[i]=n
        i+=1
    tempNetFile = comboDir+'temp/temp.net'
    f = open(tempNetFile, 'w')
    f.write('*Arcs\n')
    print("Nodes: ",len(G.nodes()))
    print("Edges: ",len(G.edges()))
    for e in G.edges(data=True):
        f.write('{0} {1} {2}\n'.format(nodenum[e[0]],nodenum[e[1]],e[2]['weight']))
    f.close()
    
    #run combo
    command=comboDir+'comboCPP '+tempNetFile
    if maxcom<np.inf:
        command=command+' {0}'.format(maxcom) 
    os.system(command)
    
    #read resulting partition
    partitionFile = "".join(tempNetFile.split('.')[:-1]) + '_comm_comboC++.txt'
    f = open(partitionFile, 'r')
    i=0
    partition={}
    for line in f:
        partition[nodes[i]]=int(line)
        i+=1
    f.close()
    print("Communities: ",len(set(partition.values())))
    return partition    

In [22]:
def makeGraphfromDf(df):
    G=nx.DiGraph()
    nx.set_edge_attributes(G,'weight', 0)
    for k in df.index:
        G.add_edge(df['start_id'][k],df['end_id'][k],weight=df['amount'][k])
    return G

In [19]:
def getCommunityPartition(aggregatedData,maxcom):
    Graph = makeGraphfromDf(aggregatedData)
    partition = getComboPartition(Graph,maxcom,comboDir)
    return partition

In [10]:
dataDir = '/home/urwa/Documents/Projects/AnomalyDetection/Pipeline/data/'
dataFile = '20190402_TaipeiEdgesDatewise.csv'
events_data =dataDir+'TaipeiEvents.csv'

In [11]:
#matrix = np.stack((dataOut.values, dataIn.values),-1)
matrix,dates = getMatrixData()
matrix = matrix.astype(float)
matrix.shape

Raw shape:  (7374816, 5)
Days:  638
Matrix Shape:  (638, 11664)


(638, 11664)

In [12]:
columnInfo = pd.read_csv('../columnInfo.csv')
columnInfo['amount']=np.sum(matrix, axis=0)
columnInfo = columnInfo[['start_id','end_id','amount']]
columnInfo.head(3)

Unnamed: 0,start_id,end_id,amount
0,BL01,BL01,26603.0
1,BL01,BL02,110416.0
2,BL01,BL03,90774.0


In [17]:
maxcom = np.inf
comboDir = '/home/urwa/Documents/Projects/AnomalyDetection/Pipeline/combo/'

In [26]:
partition = getCommunityPartition(columnInfo,maxcom)

Nodes:  108
Edges:  11664
Communities:  4


In [27]:
columnInfo['start_community'] = columnInfo['start_id'].apply(lambda x: partition[x])
columnInfo['end_community'] = columnInfo['end_id'].apply(lambda x: partition[x])
columnInfo.head(3)

Unnamed: 0,start_id,end_id,amount,start_community,end_community
0,BL01,BL01,26603.0,3,3
1,BL01,BL02,110416.0,3,3
2,BL01,BL03,90774.0,3,3


In [28]:
communities = list(set(partition.values()))
communities

[0, 1, 2, 3]

In [29]:
pList = []
for c in communities:
    d = columnInfo[(columnInfo.start_community == c) & (columnInfo.end_community == c)]
    graph = makeGraphfromDf(d)
    p = getComboPartition(graph,maxcom,comboDir)
    pList.append(p)

Nodes:  25
Edges:  625
Communities:  2
Nodes:  22
Edges:  484
Communities:  2
Nodes:  39
Edges:  1521
Communities:  4
Nodes:  22
Edges:  484
Communities:  2


In [30]:
add = [0]
v =0
for i in range(len(pList)-1):
    v += max(pList[i].values())+1
    add.append(v)
add

[0, 2, 4, 8]

In [31]:
for i in range(len(pList)):
    for k in list(pList[i].keys()):
        pList[i][k] += add[i]
        
nestedPartition = {}
for p in pList:
    for k in list(p.keys()):
        nestedPartition[k]= p[k]

In [32]:
columnInfo['start_community'] = columnInfo['start_id'].apply(lambda x: nestedPartition[x])
columnInfo['end_community'] = columnInfo['end_id'].apply(lambda x: nestedPartition[x])
columnInfo.head(3)

Unnamed: 0,start_id,end_id,amount,start_community,end_community
0,BL01,BL01,26603.0,8,8
1,BL01,BL02,110416.0,8,8
2,BL01,BL03,90774.0,8,8


In [33]:
n_com = len(set(columnInfo.start_community))
newmatrix = np.zeros((matrix.shape[0],n_com**2))
for i in range(columnInfo.shape[0]):
    s_com = columnInfo.iloc[i]['start_community']
    e_com = columnInfo.iloc[i]['end_community']
    col = n_com * s_com + e_com
    newmatrix[:,col] += matrix[:,i]
    
newmatrix.shape

(638, 100)

In [35]:
np.save('communityAggregatedMatrix',newmatrix)