In [247]:
import pandas as pd
import numpy as np
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from pymongo import MongoClient
from datetime import datetime

def _connect_mongo(host, port, username, password, db):
    if username and password:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)
        
    return conn[db]

def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True):
    db =_connect_mongo(db=db, host=host, port=port, username=username, password=password)
    cursor = db[collection].find(query)
        
    return list(cursor)

def _build_vector(event):
    # vector_len = 290
    # f_vector = np.zeros(vectror_len)
    
    m_country = assign_country_mapper()
    m_event = assign_event_type_mapper()
    m_agent_os = assign_agent_os_mapper()
    m_agent_name = assign_agent_name_mapper()
    
    f_vector_evty = np.zeros(4)
    f_vector_evty[m_event[event['type']]] = 1
    
    
    dt = datetime.utcfromtimestamp(event['timestamp'])
    minutes = (dt.hour * 60) + dt.minute
    day = dt.weekday()
    f_vector_dt = np.zeros(2)
    f_vector_dt[0] = round(minutes / 1439, 5)
    f_vector_dt[1] = round(day / 6, 5)
    
    latitude = event['geoip']['latitude'][0]
    longitude = event['geoip']['longitude'][0]
    f_vector_geo = np.zeros(2)
    f_vector_geo[0]= round((latitude + 90) / 180,5)
    f_vector_geo[1]= round((longitude + 180) / 360, 5)
    
    f_vector_agos = np.zeros(8)
    os = event['agent']['os'][0]
    if os in m_agent_os:
        f_vector_agos[m_agent_os[os]] = 1
    else:
        f_vector_agos[-1] = 1
        
    f_vector_agna = np.zeros(26)
    name = event['agent']['name'][0]
    if name in m_agent_name:
        f_vector_agna[m_agent_name[name]] = 1
    else:
        f_vector_agna[-1] = 1
        
    f_vector_country = np.zeros(248)
    country = event['geoip']['country'][0]
    if country in m_country:
        f_vector_country[m_country[country]] = 1
    else:
        f_vector_country[-1] = 1
        
    
    return np.concatenate([f_vector_evty, f_vector_dt, f_vector_geo, f_vector_agos, f_vector_agna, f_vector_country])

def vectorize(json):
    events = sorted(np.concatenate([json['connects'], json['plays'], json['h5liveStats'],json['closes']]), key=lambda d: d['timestamp'])
    vector_list = []
    for event in events:
        vector_list.append(_build_vector(event))
    
    return np.stack(vector_list)

def add_event_types(json):
    if 'rtmpStats' in json:
        rtmpStats = json['rtmpStats']
        for x in rtmpStats:
            x['type']='rtmp'
    
    if 'connects' in json:
        connects = json['connects'] 
        for x in connects:
            x['type']='connect'
        
    if 'plays' in json:
        plays = json['plays']
        for x in plays:
            x['type']='play'
            
    if 'h5liveStats' in json:
        h5liveStats = json['h5liveStats']
        for x in h5liveStats:
            x['type']='h5live'
    
    if 'closes' in json:
        closes = json['closes']
        for x in closes:
            x['type']='close'  
    
    return json

In [100]:
import json

def assign_country_mapper():
    with open('../../src/util/country_mapper.json') as f:
        d = json.load(f)
        return d

def assign_event_type_mapper():
    with open('../../src/util/event_type_mapper.json') as f:
        d = json.load(f)
        return d
    
def assign_agent_os_mapper():
    with open('../../src/util/agent_os_mapper.json') as f:
        d = json.load(f)
        return d
    
def assign_agent_name_mapper():
    with open('../../src/util/agent_name_mapper.json') as f:
        d = json.load(f)
        return d

In [227]:
# load data from MongoDB
misuses = read_mongo('dataset1', 'misuse_data')
regulars = read_mongo('dataset1', 'regular_data')

In [249]:
VList = []
memory = 0
for x in misuses:
    x = add_event_types(x)
    y = vectorize(x)
    print(y.shape)
    VList.append(y)

    
print(memory)
# for x in regulars:
    # x = add_event_types(x)
    # vectorize(x)

(629, 290)
(101, 290)
(163, 290)
(1233, 290)
(1419, 290)
(1042, 290)
(779, 290)
(3057, 290)
(4000, 290)
(213, 290)
(110, 290)
(68, 290)
(334, 290)
(202, 290)
(115, 290)
(77, 290)
(613, 290)
(95, 290)
(2334, 290)
(38, 290)
(1449, 290)
(54, 290)
(2044, 290)
(52, 290)
(177, 290)
(234, 290)
(1647, 290)
(822, 290)
(420, 290)
(175, 290)
(421, 290)
(184, 290)
(2673, 290)
(212, 290)
(501, 290)
(528, 290)
(134, 290)
(367, 290)
(577, 290)
(1436, 290)
(544, 290)
(99, 290)
(1639, 290)
(53, 290)
(2034, 290)
(784, 290)
(180, 290)
(141, 290)
(3260, 290)
(284, 290)
(3300, 290)
(627, 290)
(418, 290)
(575, 290)
(168, 290)
(1863, 290)
(177, 290)
(3213, 290)
(52, 290)
(70, 290)
(67, 290)
(2072, 290)
(1730, 290)
(65, 290)
(723, 290)
(1763, 290)
(413, 290)
(3300, 290)
(1070, 290)
(2368, 290)
(3300, 290)
(2511, 290)
(55, 290)
(183, 290)
162005600


ValueError: all input arrays must have the same shape

In [242]:
VList.shape

AttributeError: 'list' object has no attribute 'shape'