<a href="https://colab.research.google.com/github/manhdqhe153129/Regex_Generator_for_WAF_Genetic/blob/master/rule_based_waf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import package

In [18]:
import json
import numpy as np
import pandas as pd

## Convert access log to readable json file

In [37]:
def convert_to_json(log_file, save_file):
  requests_list = []

  with open(log_file) as f:
      for json_obj in f:
          request = json.loads(json_obj)
          requests_list.append(request)

  with open(save_file, 'w') as f:
      json.dump(requests_list, f)

convert_to_json("access_fxh.log", "access_log.json")

## Load json access log

In [38]:
with open('access_log.json') as f:
    requests_list = json.load(f)

In [39]:
len(requests_list)

445

In [22]:
accepted_request = []
for req in requests_list:
    if int(req['status']) < 400:
        accepted_request.append(req)

len(accepted_request)


439

In [23]:
accepted_request = [req['request']['request'] for req in accepted_request]
len(accepted_request)

439

In [24]:
accepted_request = set(accepted_request)
len(accepted_request)

191

In [None]:
## Request 

In [25]:
requests = {}

for req in accepted_request:
  request = {}

  # req_content includes method, uri and parameters
  req_content = req.split(" ")
  query = req_content[1].split('?')
  method = req_content[0]
  if method not in requests:
    requests[method] = []
  
  request['uri'] = query[0]
  if len(query) > 1:
    p = {}
    params = query[1].split('&')
    for param in params:
      key_val = param.split('=')
      if len(key_val) == 2:
        key = key_val[0]
        val = key_val[1]
      else:
        key = key_val[0]
        val = None
      p[key] = val
    request['params'] = p
  requests[method].append(request)



## Shorten URI

In [26]:
for method in requests:
  method_requests = requests[method]
  for request in method_requests:
    words = request['uri'].split('/')
    if len(words) > 2:
      end = words[-1]
      if '.' in end:
        request['uri'] = request['uri'][:len(request['uri'])-len(end)]


In [27]:
requests

{'GET': [{'params': {'1635100342': None},
   'uri': '/media/vendor/bootstrap/js/'},
  {'params': {'0.2.0': None},
   'uri': '/media/vendor/joomla-custom-elements/css/'},
  {'params': {'3739c4e9aad6e1d60e964c728efa6c17': '1',
    'client_id': '0',
    'function': 'jSelectMenu_jform_params_customRegLinkMenu',
    'layout': 'modal',
    'option': 'com_menus',
    'tmpl': 'component',
    'view': 'items'},
   'uri': '/index.php'},
  {'params': {'Itemid': '101',
    'id': '16',
    'option': 'com_config',
    'return': 'aHR0cDovL2xvY2FsaG9zdC9pbmRleC5waHAvY29tcG9uZW50L3VzZXJzL2xvZ2luP0l0ZW1pZD0xMDE%3D',
    'view': 'modules'},
   'uri': '/index.php'},
  {'params': {'cf6e1c019dfc542b9b8a481f01d53f09': None},
   'uri': '/installation/template/js/'},
  {'params': {'37c5f2d68d971312b4cd5bd42cc0c18e': None},
   'uri': '/installation/template/css/'},
  {'params': {'ce6270fd318f821b946b987d392416a6': None},
   'uri': '/media/system/js/fields/'},
  {'params': {'Itemid': '101',
    'id': '16',
    '

## Merge requests that have same URI


In [28]:
def merge_requests(requests):
  # print(requests[0]['uri'])
  # print(len(requests))
  new_request = {}
  new_request['uri'] = requests[0]['uri']
  # print(requests[0])
  params = {}
  
  for request in requests:
    if 'params' in request:
      for p in request['params']:
        value = request['params'][p]
      
        if p not in params:
          params[p] = [value]
        else: 
          params[p].append(value)
        params[p] = list(set(params[p]))
    
  new_request['params'] = params
  return new_request


In [29]:
def refine_request_list(requests):
  group_requests = {}
  for method in requests:
    rq_list = requests[method]
    rq_list.sort(key=lambda x: x['uri'])
    group_requests[method] = []

    i = 0
    same_uri = []
    while i < len(rq_list):
      if len(same_uri) == 0:
        same_uri = [rq_list[i]]

      if i < len(rq_list) - 1 and rq_list[i]['uri'] in rq_list[i+1]['uri'] and rq_list[i]['uri'] != '/':
        same_uri.append(rq_list[i+1])
        rq_list.remove(rq_list[i+1])
        continue
      else:
        request = merge_requests(same_uri)
        group_requests[method].append(request)
        same_uri = []
      
      i += 1  
  return group_requests 

In [30]:
group_requests = refine_request_list(requests)

In [31]:
group_requests

{'GET': [{'params': {}, 'uri': '/'},
  {'params': {'30d60475f546daa706ad6d1915d82554': ['1'],
    '3739c4e9aad6e1d60e964c728efa6c17': ['1'],
    'Itemid': ['101'],
    'a8e69811d5b82f84ca0384ee19e93e02': ['1'],
    'a969c55130034e00a01d7a61cf17ff60': ['1'],
    'ae479c3ec9ebdd2d4ce697834f8000e5': ['1'],
    'client_id': ['0'],
    'function': ['jSelectMenu_jform_params_logout',
     'jSelectMenu_jform_params_login',
     'jSelectMenu_jform_params_customRegLinkMenu',
     'jSelectMenu_jform_params_base'],
    'id': ['17', '16', '1'],
    'layout': ['modal', 'edit'],
    'menutype': ['mainmenu'],
    'module_id': ['17', '16', '1'],
    'option': ['com_config', 'com_menus', 'com_modules'],
    'position': ['top-a',
     'below-top',
     'breadcrumbs',
     'banner',
     'footer',
     'search',
     'sidebar-right'],
    'return': ['aHR0cDovL2xvY2FsaG9zdC9pbmRleC5waHA%2Fb3B0aW9uPWNvbV9jb25maWcmdmlldz1tb2R1bGVzJmlkPTEmSXRlbWlkPTEwMSZyZXR1cm49YUhSMGNEb3ZMMnh2WTJGc2FHOXpkQzlwYm1SbGVDNXdhSE

## Generalize parameters values

In [32]:
for method in group_requests:
  requests = group_requests[method]
  for request in requests:
    for param in request['params']:
      list_value = request['params'][param]
      check_digit = True
      if list_value is not None:
        for value in list_value:
          if value is None or not value.isdigit():
            check_digit = False
            break
        if check_digit:
          request['params'][param] = ["^[0-9]*$"]


## Format rules

In [33]:
rules = []
for method in group_requests:
  requests = group_requests[method]
  for request in requests:
    rule = request
    rule['method'] = method
    rules.append(rule)

In [34]:
with open('rules.json', 'w') as f:
  json.dump(rules, f)

## Update rules based on new requests

In [40]:
convert_to_json('new_log.log', 'new_log.json')

In [None]:
def check_request(request, rules):
  for rule in rules:
    if request['method'] == rule['method'] and rule['uri'] in request['uri']:
        request_params = request['params']
        params = rule['params']
        if(len(request_params) == 0):
          break
        elif(len(params) == 0):
          return = False
          break
        else:
          for p in request_params:
            if p not in params or request_params[p] not in params[p]:
              return False
        return True
    return False
        

In [None]:
with open('rules.json', 'r') as f:
  rules = json.load(f)
with open('new_log.json', 'r') as f:
  new_log = json.load(f)
