In [1]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data

In [2]:
def loadData(file):
    data = pd.read_csv(file)
    print('Raw shape: ',data.shape)
    data['date'] = pd.to_datetime(data.date)
    print('Days: ',len(set(data.date)))
    return data

In [3]:
def getTimeSeries(df):
    table = pd.pivot_table(df, values='amount', index=['date'],
                    columns=['start_id','end_id'], aggfunc=np.sum, fill_value=0)
    return table

In [4]:
dataDir = '/home/urwa/Documents/Projects/AnomalyDetection/Pipeline/data/'
comboDir = 'combo/'
dataFile = '20190402_TaipeiEdgesDatewise.csv'
events_data =dataDir+'TaipeiEvents.csv'

In [5]:
file = dataDir + dataFile
data = loadData(file)

Raw shape:  (7374816, 5)
Days:  638


In [6]:
dataTs = getTimeSeries(data)

In [7]:
dataTs.head()

start_id,BL01,BL01,BL01,BL01,BL01,BL01,BL01,BL01,BL01,BL01,...,R28,R28,R28,R28,R28,R28,R28,R28,R28,R28
end_id,BL01,BL02,BL03,BL04,BL05,BL06,BL07,BL08,BL09,BL10,...,R20,R21,R22,R22A,R23,R24,R25,R26,R27,R28
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-01,70,111,97,174,147,420,991,249,96,213,...,559,510,1610,1143,532,469,1729,2019,1577,811
2017-01-02,40,125,91,180,227,344,838,232,76,191,...,331,398,1242,733,309,360,1233,1270,705,440
2017-01-03,49,140,134,436,334,388,610,480,159,214,...,299,481,1408,502,210,219,1499,947,396,244
2017-01-04,32,159,150,454,351,393,592,436,155,230,...,343,516,1448,492,277,223,1563,1011,380,205
2017-01-05,37,138,175,420,303,395,593,431,159,203,...,311,508,1380,522,241,226,1569,992,406,195


In [8]:
data.head()

Unnamed: 0.1,Unnamed: 0,start_id,end_id,date,amount
0,0,BL01,BL01,2017-01-01,70.0
1,1,BL01,BL01,2017-01-02,40.0
2,2,BL01,BL01,2017-01-03,49.0
3,3,BL01,BL01,2017-01-04,32.0
4,4,BL01,BL01,2017-01-05,37.0


In [9]:
dates = list(set(dataTs.index))
DOW = list(pd.to_datetime(dataTs.index.values).dayofweek)

In [10]:
stations = list(set(data.start_id))

In [11]:
stationToIdx = dict(zip(stations,list(range(len(stations))))) 

In [12]:
n= len(stations)
edge_index = [[a//n,a%n] for a in range(n*n)]
edge_index = torch.tensor(edge_index, dtype=torch.long)

In [44]:
dataList = []

x = torch.tensor(np.ones((n,2)), dtype=torch.float)
y = torch.tensor(np.array([0]), dtype=torch.long)
data = Data(x=x, edge_index=edge_index.t().contiguous(),y=y)
dataList.append(data)

x = torch.tensor(np.zeros((n,2)), dtype=torch.float)
y = torch.tensor(np.array([1]), dtype=torch.long)
data = Data(x=x, edge_index=edge_index.t().contiguous(),y=y)
dataList.append(data)

data

In [46]:
data.num_nodes, data.num_edges, data.num_node_features

(108, 11664, 2)

In [47]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv

In [51]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(2, 16)
        self.conv2 = GCNConv(16, 2)
        self.linear1 = nn.Linear(108*2,2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.view(-1,108*2)
        x = self.linear1(x)
        return F.log_softmax(x, dim=1)

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
#data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

In [53]:
model.train()
for epoch in range(200):
    for data in dataList:
        #data.view(1,-1,-1)
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        # ===================log========================
    print('epoch [{}/{}], loss:{:.4f}, Accuracy:{:.4f}'
        .format(epoch + 1, num_epochs, loss.item(), MSE_loss.item()))   