In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import gzip
import random
import string

In [2]:
with gzip.open('../rtkit/radius.log.1.gz') as f:
    s = f.read().decode(encoding='cp1251', errors='ignore')
    s = s.split('\n')

In [3]:
def is_login_correct(line):
    line = line.split('@dhcp')[0].split()[-1]
    line = line.replace("'", '')
    line = line.lower()
    if len(line.split('-')) != 2:
        return False
    left, right = line.split('-')
    def is_hex(s):
        return all(c in string.hexdigits for c in s)
    return is_hex(left) and is_hex(right)

In [11]:
start = [line for line in s if 'start,' in line.lower()]
stop = [line for line in s if 'stop,' in line.lower()]
failed = [line for line in s if 'failed' in line.lower() 
          and 'no username' not in line.lower() and 'authenticate-only' not in line.lower() and ')@pppoe' not in line.lower()]
# alive = [line for line in s if 'alive,' in line.lower()]
# added = [line for line in s if 'added' in line.lower()]
# skipped = [line for line in s if 'skiped' in line.lower()]
# auth_only = [line for line in s if 'authenticate-only' in line.lower()]
# empty = [line for line in s if 'ppp,,' in line.lower() and 'failed' not in line.lower()]
# disconnect = [line for line in s if 'nak' in line.lower()]

In [15]:
columns=['time', 'session_id', 'type', 'login', 'switch', 'bras_ip', 'client_mac', 
         'client_ip', 'traffic_in', 'traffic_out', 'delay', 'error_code', 'correct_login']

In [16]:
def parse_stop_start(line):
    left = line.split('(', 1)[0].strip().split()
    center = line.split('(', 1)[1].strip().rsplit(')', 1)[0].split(',')
    right = line.split(')')[-1].split()
    correct_login = is_login_correct(line)
    left[8] = left[8].replace("'", '')
    login = left[8].split('@')[0]
    switch = left[8].split('@')[0].split('-')[0]
    datetime = pd.to_datetime('2020 ' + left[0] + ' ' + left[1] + ' ' + left[2], format='%Y %b %d %X')
    error_code = 0
    session_type = ''
    if left[12] == 'FAILED':
        if left[-2] == 'code':
            error_code = int(left[-1].split(',')[0])
        if left[-3] == 'code':
            error_code = int(left[-2].split(',')[0])
        if left[-4] == 'code':
            error_code = int(left[-3].split(',')[0])
        if left[-5] == 'code':
            error_code = int(left[-4].split(',')[0])
        session_type = 'Failed'
        if error_code == -42:
            center[5] = 'Failed'
            
    traffic_in = 0
    traffic_out = 0
    client_mac = ''
    client_ip = ''
    
    
    if session_type == 'Failed' and error_code != -42:
        session_id = center[-2]
        bras_ip = center[0].split(':')[0]
        client_mac = center[-1]
        client_ip = ''
    else:        
        session_type = center[5]
        session_id = center[6]
        if session_type == 'Stop':
            traffic_in = int(center[13])
            traffic_out = int(center[14])
        
        bras_ip = center[1].split(':')[0]

        if len(center) >= 11:
            client_mac = center[9]
            client_ip = center[10]
    delay = float(line.split()[-1])

    return [datetime, session_id, session_type, login, switch, bras_ip, client_mac, client_ip, 
            traffic_in, traffic_out, delay, error_code, correct_login]

In [20]:
df_start = pd.DataFrame([parse_stop_start(line) for line in start], columns=columns)

In [21]:
df_stop = pd.DataFrame([parse_stop_start(line) for line in stop], columns=columns)

In [22]:
df_failed = pd.DataFrame([parse_stop_start(line) for line in failed], columns=columns)

In [24]:
df_start.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code,correct_login
0,2020-05-02 00:11:28,ULSK-BR022301509000002b4f78182637,Start,0A49D519-07,0A49D519,89.239.189.1,00:1f:ce:a3:7f:e8,89.239.169.40,0,0,0.079219,0,True
1,2020-05-02 00:11:28,ULSK-BR022301509000002b4f78182637,Start,0A49D519-07,0A49D519,89.239.189.1,00:1f:ce:a3:7f:e8,89.239.169.40,0,0,2.6e-05,0,True
2,2020-05-02 00:11:28,ULSK-BR022301509000002b4f78182637,Start,0A49D519-07,0A49D519,89.239.189.1,00:1f:ce:a3:7f:e8,89.239.169.40,0,0,2.1e-05,0,True
3,2020-05-02 00:11:28,ULSK-BR022301509000002b4f78182637,Start,0A49D519-07,0A49D519,89.239.189.1,00:1f:ce:a3:7f:e8,89.239.169.40,0,0,1.5e-05,0,True
4,2020-05-02 00:11:28,ULSK-BR022301509000002b4f78182637,Start,0A49D519-07,0A49D519,89.239.189.1,00:1f:ce:a3:7f:e8,89.239.169.40,0,0,3.9e-05,0,True


In [25]:
df_stop.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code,correct_login
0,2020-05-02 00:11:16,ULSK-BR02230062600000f2648e049180,Stop,0A499368-18,0A499368,89.239.189.2,b8:a3:86:1f:6e:20,95.68.149.105,2900056,30611982,0.179366,0,True
1,2020-05-02 00:11:16,ULSK-BR02230062600000f2648e049180,Stop,0A499368-18,0A499368,89.239.189.2,b8:a3:86:1f:6e:20,95.68.149.105,336400676,9126176420,0.233075,0,True
2,2020-05-02 00:11:16,ULSK-BR02230062600000f2648e049180,Stop,0A499368-18,0A499368,89.239.189.2,b8:a3:86:1f:6e:20,95.68.149.105,471668,2365636,0.292533,0,True
3,2020-05-02 00:11:16,ULSK-BR02230062600000f2648e049180,Stop,0A499368-18,0A499368,89.239.189.2,b8:a3:86:1f:6e:20,95.68.149.105,23738947,1233557443,0.30368,0,True
4,2020-05-02 00:11:16,ULSK-BR02230062600000f2648e049180,Stop,0A499368-18,0A499368,89.239.189.2,b8:a3:86:1f:6e:20,95.68.149.105,16608205,253690395,0.324687,0,True


In [27]:
df_failed.head()

Unnamed: 0,time,session_id,type,login,switch,bras_ip,client_mac,client_ip,traffic_in,traffic_out,delay,error_code,correct_login
0,2020-05-02 00:11:18,ULSK-BR09232112900000bbb97e152271,Failed,p13ay4Zl,p13ay4Zl,89.239.189.2,ec:4c:4d:c1:73:b9,,0,0,0.016178,1011,False
1,2020-05-02 00:11:18,ULSK-BR02230109000000955fb7226507,Failed,6EdbVr-gXXXs,6EdbVr,89.239.189.1,bc:f6:85:01:9a:e7,,0,0,0.016163,-52,False
2,2020-05-02 00:11:20,ULSK-BR02230044000000859ea7139589,Failed,9usLJMAv-Wg,9usLJMAv,89.239.189.2,00:24:21:59:c1:86,,0,0,0.016723,-3,False
3,2020-05-02 00:11:20,ULSK-BR02230108600000e317fd018937,Failed,sdAv4VXQ,sdAv4VXQ,89.239.189.2,ec:4c:4d:bf:7c:d2,,0,0,0.018275,-3,False
4,2020-05-02 00:11:21,ULSK-BR1523310210000079affe161206,Failed,XsA999AAHAH,XsA999AAHAH,89.239.189.1,08:60:6e:bd:b6:c0,,0,0,0.017471,-3,False
