In [104]:
#Code from: https://github.com/lsiddiqsunny/IoT-Security-Improper-Authentication/tree/main/CWE-369:%20Divide%20By%20Zero
#AST analysis of CWE 369 in C/C++ code examples from SO posts related to IoT

import os, re, json,sys
from pycparser import parse_file, c_ast
from pycparser.plyparser import Coord

In [105]:
folderPath = './Testdata/'
fileList = os.listdir(folderPath)
for i in fileList:
    print(i)
# This is not required if you've installed pycparser into
# your site-packages/ with setup.py
#
sys.path.extend(['.', '..'])

RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
RE_INTERNAL_ATTR = re.compile('__.*__')


class CJsonError(Exception):
    pass


def memodict(fn):
    """ Fast memoization decorator for a function taking a single argument """
    class memodict(dict):
        def __missing__(self, key):
            ret = self[key] = fn(key)
            return ret
    return memodict().__getitem__


@memodict
def child_attrs_of(klass):
    """
    Given a Node class, get a set of child attrs.
    Memoized to avoid highly repetitive string manipulation

    """
    non_child_attrs = set(klass.attr_names)
    all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
    return all_attrs - non_child_attrs


def to_dict(node):
    """ Recursively convert an ast into dict representation. """
    klass = node.__class__

    result = {}

    # Metadata
    result['_nodetype'] = klass.__name__

    # Local node attributes
    for attr in klass.attr_names:
        result[attr] = getattr(node, attr)

    # Coord object
    if node.coord:
        result['coord'] = str(node.coord)
    else:
        result['coord'] = None

    # Child attributes
    for child_name, child in node.children():
        # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
        match = RE_CHILD_ARRAY.match(child_name)
        if match:
            array_name, array_index = match.groups()
            array_index = int(array_index)
            # arrays come in order, so we verify and append.
            result[array_name] = result.get(array_name, [])
            if array_index != len(result[array_name]):
                raise CJsonError('Internal ast error. Array {} out of order. '
                    'Expected index {}, got {}'.format(
                    array_name, len(result[array_name]), array_index))
            result[array_name].append(to_dict(child))
        else:
            result[child_name] = to_dict(child)

    # Any child attributes that were missing need "None" values in the json.
    for child_attr in child_attrs_of(klass):
        if child_attr not in result:
            result[child_attr] = None

    return result


def to_json(node, **kwargs):
    """ Convert ast node to json string """
    return json.dumps(to_dict(node), **kwargs)


def file_to_dict(filename):
    ast = parse_file(filename, use_cpp=True,
            cpp_path='clang',
            cpp_args=['-E', r'-Iutils/fake_libc_include'])
    return to_dict(ast)


def file_to_json(filename, **kwargs):
    """ Load C file into json string representation of ast """
    ast = parse_file(filename, use_cpp=True)
    return to_json(ast, **kwargs)


def _parse_coord(coord_str):
    """ Parse coord string (file:line[:column]) into Coord object. """
    if coord_str is None:
        return None

    vals = coord_str.split(':')
    vals.extend([None] * 3)
    filename, line, column = vals[:3]
    return Coord(filename, line, column)


def _convert_to_obj(value):
    """
    Convert an object in the dict representation into an object.
    Note: Mutually recursive with from_dict.

    """
    value_type = type(value)
    if value_type == dict:
        return from_dict(value)
    elif value_type == list:
        return [_convert_to_obj(item) for item in value]
    else:
        # String
        return value


def from_dict(node_dict):
    """ Recursively build an ast from dict representation """
    class_name = node_dict.pop('_nodetype')

    klass = getattr(c_ast, class_name)

    # Create a new dict containing the key-value pairs which we can pass
    # to node constructors.
    objs = {}
    for key, value in node_dict.items():
        if key == 'coord':
            objs[key] = _parse_coord(value)
        else:
            objs[key] = _convert_to_obj(value)

    # Use keyword parameters, which works thanks to beautifully consistent
    # ast Node initializers.
    return klass(**objs)


def from_json(ast_json):
    """ Build an ast from json string representation """
    return from_dict(json.loads(ast_json))

post-1671941.c
post-1787118.c
post-1787123.c
post-1851406.cpp
post-1851473.cpp
post-1851526.cpp
post-1994885.cpp
post-2018284.c
post-2527455.cpp
post-2527458.cpp
post-2527460.cpp
post-2527463.cpp
post-2527556.cpp
post-2695372.c
post-2889764-2.c
post-2889764-3.c
post-2889764.c
post-3031433.cpp
post-3032588.cpp
post-3069960.cpp
post-3198143.cpp
post-3270981.cpp
post-3271018.cpp
post-3454897.cpp
post-3463862.cpp
post-3497735.c
post-3497740-2.c
post-3497740-3.c
post-3497740.c
post-3497787.c
post-3497863-2.c
post-3497863.c
post-3497868.c
post-3543533-2.cpp
post-3543533.cpp
post-3839812.c
post-824083.c
post-899226.c
post-949915-2.cpp
post-949915.cpp
post-949916.cpp
post-949981.cpp
post-950307.cpp
post-951046.cpp
post-982213.c


In [106]:
def traverse(json_object):
    if type(json_object) is dict and json_object:
        checkOp = False
        Left = {}
        Right = {}
        Coord = ""
        for key in json_object:
            if key == "op" and json_object[key] == "/":
                checkOp = True
            if checkOp and key == "coord":
                Coord = json_object[key]
            if checkOp and key == "left":
                Left = json_object[key]
            if checkOp and key == "right":
                Right = json_object[key]
            traverse(json_object[key])
        if checkOp:                
            if Right['_nodetype'] == 'Constant' and Right['value'] == '0':
                print('Possible division by zero error at',Coord)
            else:
                print('Possible division by zero error at',Coord)
    elif type(json_object) is list and json_object:
        for item in json_object:
            traverse(item)

In [107]:
def comment_remover(text):
    def replacer(match):
        s = match.group(0)
        if s.startswith('/'):
            return ""
        else:
            return s
    pattern = re.compile(
        r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
        re.DOTALL | re.MULTILINE
    )
    return re.sub(pattern, replacer, text)

In [108]:
for i in fileList:
    print(i)
    with open('./Testdata/'+i) as f:
        contents = f.read()
        with open('./Testdata/'+i, 'w') as f:
            f.write(comment_remover(contents))
    ast_dict = file_to_dict('./Testdata/'+i)
    ast = from_dict(ast_dict)
    jsonString = to_json(ast,indent=4)
    jsonObject = json.loads(jsonString)
    with open("./Output/"+i+".json", "w") as outfile:
        json.dump(jsonString, outfile)
    traverse(jsonObject)

post-1671941.c
post-1787118.c
Possible division by zero error at ./Testdata/post-1787118.c:3:22
post-1787123.c
post-1851406.cpp


ParseError: ./Testdata/post-1851406.cpp:4:11: before: =