In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import gzip
import random

In [2]:
with gzip.open('../rtkit/radius.log.2.gz') as f:
    s = f.read().decode(encoding='cp1251', errors='ignore')
    s = s.split('\n')

In [3]:
start = [line for line in s if 'start,' in line.lower()]
stop = [line for line in s if 'stop,' in line.lower()]
failed = [line for line in s if 'failed' in line.lower() 
          and 'no username' not in line.lower() and 'authenticate-only' not in line.lower()]
# alive = [line for line in s if 'alive,' in line.lower()]
# added = [line for line in s if 'added' in line.lower()]
# skipped = [line for line in s if 'skiped' in line.lower()]
# auth_only = [line for line in s if 'authenticate-only' in line.lower()]
# empty = [line for line in s if 'ppp,,' in line.lower() and 'failed' not in line.lower()]
# disconnect = [line for line in s if 'nak' in line.lower()]

In [4]:
columns=['time', 'session_id', 'type', 'login', 'switch', 'bras_ip', 'client_mac', 
         'client_ip', 'traffic_in', 'traffic_out', 'delay', 'error_code']

In [13]:
def parse_stop_start(line):
    left = line.rsplit('(', 1)[0].strip().split()
    center = line.split('(', 1)[1].strip().rsplit(')', 1)[0].split(',')
    right = line.split(')')[-1].split()
    if len(left) < 9:
        print(line)
    left[8] = left[8].replace("'", '')
    login = left[8].split('@')[0]
    switch = left[8].split('@')[0].split('-')[0]
    datetime = pd.to_datetime('2020 ' + left[0] + ' ' + left[1] + ' ' + left[2], format='%Y %b %d %X')
    error_code = 0
    session_type = ''
    
    if left[12] == 'FAILED':
        if left[-3] == 'code':
            error_code = int(left[-2].split(',')[0])
        if left[-4] == 'code':
            error_code = int(left[-3].split(',')[0])
        session_type = 'Failed'
            
    traffic_in = 0
    traffic_out = 0
    client_mac = ''
    client_ip = ''
    
    
    if session_type == 'Failed' and error_code != -42:
        session_id = center[-2]
        bras_ip = center[0].split(':')[0]
        client_mac = center[-1]
        client_ip = ''
    else:        
        session_type = center[5]
        session_id = center[6]
        if session_type == 'Stop':
            traffic_in = int(center[13])
            traffic_out = int(center[14])
        
        bras_ip = center[1].split(':')[0]

        if len(center) >= 11:
            client_mac = center[9]
            client_ip = center[10]
    delay = float(line.split()[-1])

    return [datetime, session_id, session_type, login, switch, bras_ip, client_mac, client_ip, 
            traffic_in, traffic_out, delay, error_code]

In [6]:
df_start = pd.DataFrame([parse_stop_start(line) for line in start], columns=columns)

In [9]:
df_stop = pd.DataFrame([parse_stop_start(line) for line in stop], columns=columns)

In [14]:
df_failed = pd.DataFrame([parse_stop_start(line) for line in failed], columns=columns)

In [17]:
df_start.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code
0,2020-04-29 00:11:45,ULSK-BR0223011120000005f6ca096297,Start,6EdbVr-sLW99,6EdbVr,89.239.189.1,558401-HDSLAM56-PRIG atm 0/2/0/25:0.33,89.239.158.89,0,0,0.078896,0
1,2020-04-29 00:11:45,ULSK-BR0223011120000005f6ca096297,Start,6EdbVr-sLW99,6EdbVr,89.239.189.1,558401-HDSLAM56-PRIG atm 0/2/0/25:0.33,89.239.158.89,0,0,2.2e-05,0
2,2020-04-29 00:11:45,ULSK-BR0223011120000005f6ca096297,Start,6EdbVr-sLW99,6EdbVr,89.239.189.1,558401-HDSLAM56-PRIG atm 0/2/0/25:0.33,89.239.158.89,0,0,1.9e-05,0
3,2020-04-29 00:11:45,ULSK-BR0223011120000005f6ca096297,Start,6EdbVr-sLW99,6EdbVr,89.239.189.1,558401-HDSLAM56-PRIG atm 0/2/0/25:0.33,89.239.158.89,0,0,2.3e-05,0
4,2020-04-29 00:11:45,ULSK-BR0223011120000005f6ca096297,Start,6EdbVr-sLW99,6EdbVr,89.239.189.1,558401-HDSLAM56-PRIG atm 0/2/0/25:0.33,89.239.158.89,0,0,3.8e-05,0


In [18]:
df_stop.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code
0,2020-04-29 00:11:45,ULSK-BR092320337000004e22cc155066,Stop,9usLJuaM-9v,9usLJuaM,89.239.189.2,e8:de:27:88:83:41,95.68.195.215,617400,24383973,0.537162,0
1,2020-04-29 00:11:45,ULSK-BR092320337000004e22cc155066,Stop,9usLJuaM-9v,9usLJuaM,89.239.189.2,e8:de:27:88:83:41,95.68.195.215,7557218,451203577,0.588429,0
2,2020-04-29 00:11:44,ULSK-BR092320337000004e22cc155066,Stop,9usLJuaM-9v,9usLJuaM,89.239.189.2,e8:de:27:88:83:41,95.68.195.215,5983289,183669222,0.486763,0
3,2020-04-29 00:11:44,ULSK-BR092320337000004e22cc155066,Stop,9usLJuaM-9v,9usLJuaM,89.239.189.2,e8:de:27:88:83:41,95.68.195.215,105701857,2289581967,0.527544,0
4,2020-04-29 00:11:45,ULSK-BR092320337000004e22cc155066,Stop,9usLJuaM-9v,9usLJuaM,89.239.189.2,e8:de:27:88:83:41,95.68.195.215,4493883,18189948,0.626556,0


In [15]:
df_failed.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code
0,2020-04-29 00:11:46,ULSK-BR092321100000006e7aca216332,Failed,8H89A99AAHvL,8H89A99AAHvL,89.239.189.2,d4:76:ea:01:68:bc,,0,0,0.016439,-3
1,2020-04-29 00:11:46,ULSK-BR09232123500000672cad053087,Failed,BJF0tlbs,BJF0tlbs,89.239.189.2,ec:4c:4d:87:28:62,,0,0,0.016162,-3
2,2020-04-29 00:11:46,ULSK-BR152330713000007fb4bc169088,Failed,9usLavHv-Ws,9usLavHv,89.239.189.2,50:ff:20:0e:f6:7d,,0,0,0.016054,-3
3,2020-04-29 00:11:47,ULSK-BR0923213750000054420d169216,Failed,sWHsA9-W,sWHsA9,89.239.189.2,88:d7:f6:64:28:30,,0,0,0.015139,-52
4,2020-04-29 00:11:49,ULSK-BR0223004400000057debf216759,Failed,9usLJMAv-Wg,9usLJMAv,89.239.189.2,00:24:21:59:c1:86,,0,0,0.01471,-3
