In [1]:
# Базовые исключения
from pathlib import Path
from functools import wraps
from tqdm import trange, tqdm, tqdm_notebook
from abc import ABC, abstractmethod


class FileNotFound(Exception):
    def __init__(self, message: str = "File not found", *args: object) -> None:
        if len(args) == 0:
            raise TypeError("FileNotFound exception requires exactly one argument.")
        elif len(args) > 1:
            raise TypeError("FileNotFound exception requires exactly one argument, but multiple were given.")
        elif not (isinstance(args[0], str) or isinstance(args[0], Path)):
            raise TypeError("FileNotFound exception requires a string or Path argument representing the file path.")
        self.filepath = args[0]
        super().__init__(message)
     
    def __str__(self) -> str:
        return super().__str__() + f"\nFile '{self.filepath}' not found - please check the filepath and try again."


class FileFormatIsWrong(Exception):
    def __init__(self, *args: object) -> None:
        if len(args) == 0:
            raise TypeError(f"{self.__class__.__name__} exception requires exactly one argument.")
        elif len(args) > 1:
            raise TypeError(f"{self.__class__.__name__} requires exactly one argument, but multiple were given.")
        elif not (isinstance(args[0], str) or isinstance(args[0], Path)):
            raise TypeError(f"{self.__class__.__name__} exception requires a string or Path argument representing the file path.")
        self.message = args[0]
        super().__init__(self.message)
     
    def __str__(self) -> str:
        return super().__str__() + f"\nFile Format Is Wrong. Request must need start from GET|POST|PUT"



In [2]:
from typing import List, Tuple, overload
from collections.abc import Sequence

In [None]:
ENCODING = 'windows-1252' # Spanish letters charset
SHIFT = Path('../..')
TRAIN_DATA_PATH = SHIFT / "normalTrafficTraining.txt"
TEST_DATA_NORMAL_PATH = SHIFT / "normalTrafficTest.txt"
TEST_DATA_ANOMALY_PATH = SHIFT / "anomalousTrafficTest.txt"
def read_file(file_path: Path) -> str:
    if not file_path.exists():
        raise FileNotFound("read file error", file_path)
    
        
    with file_path.open('r', encoding=ENCODING) as file:
        return file.read()

def starts_with_method(line: str) -> bool:
    return line.startswith(('GET', 'POST', 'PUT'))
   
def parse(path: Path, request_reg: str = SPLIT_REQUEST) -> List[str]:
    request_readed_from_txt = read_file(path)
    requests = re.split(request_reg, request_readed_from_txt)
    rejected_indexes = []
    for index, request in tqdm(enumerate(requests), total=len(requests)):
        if not starts_with_method(request):
            rejected_indexes.append(index)
    
    return requests

In [None]:
SPLIT_REQUEST =  r'\n\n(?=GET|POST)'
def parse(path, request_reg: string):
    with open(path) as file:
        data = file.read()

        requests = []
        [requests.append(request.group(0)) for request in re.finditer(request_reg, data, re.MULTILINE)]

        return requests