## Clase TSTR


In [1]:
import inspect
from typing import *

### Métodos  proxy definidos a *Mano*

In [12]:
# Este código base es del libro fuzzing book
class tstr(str):
    """Wrapper for strings, saving taint information"""

    def __new__(cls, value, *args, **kw):
        """Create a tstr() instance. Used internally."""
        return str.__new__(cls, value)

    def __init__(self, value: Any, taint: Any = None, **kwargs) -> None:
        """Constructor.
        `value` is the string value the `tstr` object is to be constructed from.
        `taint` is an (optional) taint to be propagated to derived strings."""
        self.taint: Any = taint
        self.trace = []
        self.parent = None

    def __repr__(self):
        """Return a representation."""
        return tstr(str.__repr__(self), taint=self.taint)

    def __str__(self) -> str:
        """Convert to string"""
        return str.__str__(self)

    def clear_taint(self):
        """Remove taint"""
        self.taint = None
        self.trace = []
        return self

    def has_taint(self):
        """Check if taint is present"""
        return self.taint is not None

    def create(self, s):
        new_tstr = tstr(s, taint=self.taint)
        new_tstr.parent = self
        return new_tstr

    def __radd__(self, value):
        """Return value + self, as a `tstr` object"""
        result = self.create(value + str(self))
        self.__log_trace()
        return result

    def __log_trace(self):
      frame = inspect.currentframe()
      self.__log_trace_recursive(frame.f_back)

    def __log_trace_recursive(self, frame):
      if frame is None:
          return

      try:
          function_name = frame.f_code.co_name
          caller_globals = frame.f_globals
          caller_locals = frame.f_locals

          function_ref = caller_locals.get(function_name) or caller_globals.get(function_name)
          if function_ref:
              function_starting_line = inspect.getsourcelines(function_ref)[1]
              operation_line_number = frame.f_lineno
              relative_line = operation_line_number - function_starting_line
              self.append_to_original([function_ref, relative_line])
              return
      except Exception:
          pass

      self.__log_trace_recursive(frame.f_back)

    def append_to_original(self, value):
        if self.parent:
            self.parent.append_to_original(value)
        self.trace.append(value)



    @staticmethod
    def make_str_wrapper(fun, name):
        """Make `fun` (a `str` method) a method in `tstr`"""
        def proxy(self, *args, **kwargs):
            res = fun(self, *args, **kwargs)
            self.__log_trace()
            return self.create(res)

        if hasattr(fun, '__doc__'):
            # Copy docstring
            proxy.__doc__ = fun.__doc__

        return proxy

### Métodos Proxy Autogenerados

In [13]:
def informationflow_init_1():
    for name in ['__format__', '__mod__', '__rmod__', '__getitem__',
                 '__add__', '__mul__', '__rmul__',
                 'capitalize', 'casefold', 'center', 'encode',
                 'expandtabs', 'format', 'format_map', 'join',
                 'ljust', 'lower', 'lstrip', 'replace',
                 'rjust', 'rstrip', 'strip', 'swapcase', 'title', 'translate', 'upper']:
        fun = getattr(str, name)
        setattr(tstr, name, tstr.make_str_wrapper(fun,name))
informationflow_init_1()
INITIALIZER_LIST = [informationflow_init_1]
def initialize():
    for fn in INITIALIZER_LIST:
        fn()

initialize()

### Tests

In [14]:
# Code: test1
def doo(a):
  # line 1
  return a +"a" # line 2


input = tstr("hello", taint ='LOW')
doo(input)
input.trace

[[<function __main__.doo(a)>, 2]]

In [15]:
# Code: test2
def imprimir(a):
  print("value: " + a) # line 1

def bar(a):
  c = a[:1]  # line 1
  imprimir(c)     # line 2
  # line 3
  imprimir(a + c) # line 4

def foo(a):
  # line 1
  return bar(a) # line 2


input2 = tstr("hello,")
foo(input2)
input2.trace

value: h
value: hello,h


[[<function __main__.bar(a)>, 1],
 [<function __main__.imprimir(a)>, 1],
 [<function __main__.bar(a)>, 4],
 [<function __main__.imprimir(a)>, 1]]

In [16]:
# Code: test3
from urllib.parse import urlparse

url_string = tstr("http://www.google.com/search?q=fuzzing")
urlparse(url_string)

url_string.trace

[[<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 24],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 28],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 28],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 28],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 41],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 45],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 45],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 45],
 [<function urllib.parse.urlsplit(url, scheme='', allow_fragments=True)>, 47],
 [<function urllib.parse._splitnetloc(url, start=0)>, 6],
 [<function urllib.parse._splitnetloc(url, start=0)>, 6]]

In [17]:
import re


email_pattern = tstr(r'\S+@\S+\.\S+')
text = "Contact us at support@example.com or info@example.org"

re.findall(email_pattern, text)

email_pattern.trace

[[<function sre_parse.parse(str, flags=0, state=None)>, 3],
 [<function sre_parse.parse(str, flags=0, state=None)>, 3],
 [<function sre_parse.parse(str, flags=0, state=None)>, 3],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  31],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  31],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  31],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)>,
  18],
 [<function sre_parse._parse(source, state, verbose, nested, first=False)

## Testing the html report

In [45]:
from collections import defaultdict
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import *
import inspect

def to_html(trace):
    functions_code = {}
    html_output = ""

    for function, relative_line in trace:
        if function not in functions_code:
            try:
                source_lines, start_line = inspect.getsourcelines(function)
                functions_code[function] = {"source_lines": source_lines, "start_line": start_line, "interactions": [(1, relative_line)], "interaction_counter": 1}
            except Exception as e:
                continue
        else:
            functions_code[function]["interaction_counter"] += 1
            functions_code[function]["interactions"].append((functions_code[function]["interaction_counter"], relative_line))

    for function, data in functions_code.items():
        source_lines = data["source_lines"]

        html_output += f"<code>"
        source_lines = [line.replace("\n", "<br>").replace("  ", "&emsp;") for line in source_lines]
        for interaction in data["interactions"]:
            interaction_number, line_number = interaction
            if not source_lines[line_number].startswith("["):
                source_lines[line_number] = f"[{interaction_number}] {source_lines[line_number]}"

        html_output += ''.join(source_lines)
        html_output += "</code><br>"

    return html_output

In [46]:
from IPython.display import HTML, display

def imprimir(a):
  print("value: " + a) # line 1

def bar(a):
  c = a[:1]  # line 1
  imprimir(c)     # line 2
  # line 3
  imprimir(a + c) # line 4

def foo(a):
  # line 1
  return bar(a) # line 2


html_input = tstr("hello sdfgsdf")
foo(html_input)


display(HTML(to_html(html_input.trace)))

value: h
value: hello sdfgsdfh
