In [None]:
from swagger_parser import SwaggerParser

parser = SwaggerParser(swagger_path='swagger.json')  # Init with file

# Get an example of dict for the definition Foo
parser.definitions_example.get('Pet')

In [150]:
from collections import defaultdict
import json

with open("swagger.json","r",encoding="utf-8-sig") as infile:
    swagger = json.load(infile)

In [184]:
from faker import Faker
from chance import chance
import random
fake = Faker()


In [244]:
def normalize_parameter_name(param_name):
    return re.sub("([A-Z]+)"," \g<1>", str(param_name)).lower()

def normalize_parameter_value(param_value):
    param_value = str(param_value)
    if "_" not in param_value:
        return param_value
    
    return param_value.split("_")[1]

def random_prop(obj, obj_name):
    prop_string = ""
    keys = set([random.choice(list(obj.keys())) for i in range(random.randint(1,len(obj)))])
    for key in keys:
        prop_string += "{0} {1},".format(normalize_parameter_name(key), normalize_parameter_value(obj[key]))
    return prop_string

def construct_param_string(parameter_val):
    param_string = ""

    list_items = []
    for list_item in parameter_val:
        if type(list_item) is dict:
            list_items.append(random_prop(list_item, parameter_name))
        elif list_item is not None:
            list_items.append(list_item)
    list_items = list(set(list_items))
    if len(list_items) == 0:
        return sentence

    for i in range(len(list_items) - 1):
        param_string += list_items[i] + ", "

    if len(list_items) > 1:
        param_string += "and "
    param_string += list_items[-1]
    return param_string
    
def construct_sentence_from_generated_method_parameters(parameter_name, parameter_val, sentence):
    if type(parameter_val) is list:        
        if len(parameter_val) == 0:
            return sentence
        
        param_string = construct_param_string(parameter_val)
        
        sentence.append(normalize_parameter_name(parameter_name))
        sentence.append(" ")
        sentence.append(normalize_parameter_value(param_string))
        sentence.append(",")
    elif type(parameter_val) is str or type(parameter_val) is bool or type(parameter_val) is int:
        sentence.append(", ")
        sentence.append(normalize_parameter_name(parameter_name))
        sentence.append(" ")
        sentence.append(normalize_parameter_value(parameter_val))
    else:        
        if parameter_val is None:
            return sentence
        for subparam_name in parameter_val:
            if type(parameter_val[subparam_name]) is dict:
                sentence.append(random_prop(parameter_val[subparam_name], subparam_name))
            elif type(parameter_val[subparam_name]) is list:
                param_string = construct_param_string(parameter_val[subparam_name])
            else:
                sentence.append(" {0} {1}".format(normalize_parameter_name(subparam_name), 
                                                      normalize_parameter_value(parameter_val[subparam_name])))
            sentence.append(", ")
    return sentence

class Command:
    def __init__(self, command_invocation):
        self.raw_command_invocation = command_invocation
        self.cleaned_command_invocation = command_invocation.split(".")[0]
        self.parameters = {}
        
    def set(self, parameter_name, generated_value):
        self.parameters[parameter_name] = generated_value
        
    def generate_utterance(self):
        utterance = [self.cleaned_command_invocation, " with "]
        included_params = []
        excluded_params = []
        for parameter_name,parameter_value_generator in self.parameters.items():
            parameter_val = parameter_value_generator() if parameter_value_generator is not None else None
            if random.random() < 0.5:
                included_params.append((parameter_name, parameter_val))
            else:
                excluded_params.append((parameter_name, parameter_val))
                
        for param in included_params:
            utterance = construct_sentence_from_generated_method_parameters(param[0], param[1], utterance)
        utterance = "".join(utterance).replace("  "," ").replace(" , "," ").replace(", ,",",")
        for param in included_params:
            yield utterance, param[0], param[1]


In [245]:
re.sub("([A-Z]+)"," \g<1>", "photoUrls").lower()

'photo urls'

In [246]:
property_generators = {
    "category_name": lambda: "foo",
    "name": lambda: fake.name(),
    "organization": lambda: "my_org",
    "date-time": lambda: fake.date_time(),
    "username":lambda: chance.email(),
    "email": lambda: chance.email(),
    "id":lambda: fake.uuid4(),
    "quantity":lambda: random.randint(0,100),
    "int":lambda: random.randint(0,100),
    "integer":lambda: random.randint(0,100),
    "bool": lambda: chance.boolean(),
    "boolean": lambda: chance.boolean(),
    "firstName": lambda: fake.name().split(" ")[0],
    "lastName": lambda: fake.name().split(" ")[-1],
    "password": lambda: fake.password(),
    "phone": lambda: fake.phone_number(),
    "url": lambda: chance.url(),
    "photoUrls": lambda: chance.url(),
    "status": lambda: "dead", # this is malformed,
    "tags_name": lambda: "some tag",
}


def generate_from_schema(schema_string, name):
    return lambda: list(generate(schema_string.split("/")[-1], name))[0]

def generate_array_from_schema(schema_string, name):
    return lambda: [generate_from_schema(schema_string, name)() for x in range(random.randint(0,10))]

def generate_from_enum(enum):
    return lambda: random.choice(enum)

def generate_array_from_enum(enum):
    return lambda: [generate_from_enum(enum)() for x in range(random.randint(0,10))]

def generate_prop_from_name(name):
    return lambda: property_generators[name]() if name in property_generators else name + str(random.randint(0,10))

def generate_array_from_name(name):
    return lambda: [generate_prop_from_name(name)() for x in range(random.randint(0,10))]

def generate_prop_from_type(typ):
    return lambda:  property_generators[typ]() 

def generate_prop_from_format(fmt):
    return lambda:  property_generators[fmt]() 

def generate_array_from_type(typ):
    return lambda:  [property_generators[typ]()]

def transform_instance_from_schema_to_nested(generator, prop_name):
    generated = generator()
    keys = list(generated.keys())
    for k in keys:
        generated[prop_name + "_" + k] = generated[k]
        del generated[k]
    return generated
    
def get_parameter_generator_from_definition_property(prop_name, prop_val):
    if "$ref" in prop_val:
        generated = generate_from_schema(prop_val["$ref"], prop_name if prop_name != "body" else None)
        return lambda: transform_instance_from_schema_to_nested(generated, prop_name)
    elif "enum" in prop_val:
        return generate_from_enum(prop_val["enum"])
    elif prop_val["type"] == "array" and "enum" in prop_val["items"]:
        return generate_array_from_enum(prop_val["items"]["enum"])
    elif prop_val["type"] == "array" and "type" in prop_val["items"] and prop_val["items"]["type"] == "string":
        return generate_array_from_name(prop_name)
    elif prop_val["type"] == "array" and "type" in prop_val["items"] and prop_val["items"]["type"] == "integer":
        return generate_array_from_type(prop_val["items"]["type"])
    elif prop_val["type"] == "array" and "$ref" in prop_val["items"]:
        return generate_array_from_schema(prop_val["items"]["$ref"], prop_name)
    #elif prop_val["type"] == "string" and "enum" in prop_val:
    #    return generate_from_enum(prop_val["enum"])
    elif prop_val["type"] == "string":
        if "format" in prop_val:
            return generate_prop_from_format(prop_val["format"])
        else:
            return generate_prop_from_name(prop_name)
    elif prop_val["type"] in ["int", "integer", "bool", "boolean"]:
        return generate_prop_from_type(prop_val["type"])
    else:
        raise Exception(prop_name)

def get_command_parameter_generator_from_method_param(param):
    if "schema" in param:
        if "$ref" in param["schema"]:
            return generate_from_schema(param["schema"]["$ref"], param["name"] if param["name"] != "body" else None)
        elif param["schema"]["type"] == "array":
            return generate_array_from_schema(param["schema"]["items"]["$ref"], param["name"])
        elif param["schema"]["format"] == "Stream":
            return None
        else: 
            raise Exception(param)
    elif "$ref" in param:
        return generate_from_schema(param["$ref"], param["name"])
    elif param["type"] == "array" and "enum" in param["items"]:
        return generate_array_from_enum(param["items"]["enum"])
    elif param["type"] == "array" and param["items"]["type"] == "string":
        return generate_array_from_name(param["name"])
    elif param["type"] == "string" and "enum" in param:
        return generate_from_enum(param["enum"])
    elif param["type"] == "string":
        return generate_prop_from_name(param["name"])
    elif "int" in param["type"] or param["type"] == "integer" or "bool" in param["type"]:
        return generate_prop_from_type(param["type"])
    elif param["type"] == "file":
        return None
    else:
        raise Exception(param)
        
object_generators = {}

def get_generator_for_definition(definition_name):
    definition = swagger['definitions'][definition_name] 
    def object_generator(prefix=None):
        instance = {}       
        # iterate through properties 
        for prop_name in definition['properties']:
            prefixed_prop_name = prefix + "_" + prop_name if prefix is not None else prop_name
            val_gen = get_parameter_generator_from_definition_property(prefixed_prop_name, definition['properties'][prop_name])
            instance[prop_name] = val_gen() if val_gen is not None else None
        yield instance
    return object_generator
    
for definition_name in swagger["definitions"]:
    object_generators[definition_name] = get_generator_for_definition(definition_name)

def generate(obj_name, prefix):
    if obj_name not in object_generators:
        raise Exception("Object " + obj_name + " is unknown")
    return object_generators[obj_name](prefix)

commands = []
for path_name in swagger["paths"]:
    path = swagger["paths"][path_name]
    for method in path:
        invocation_utterance = path[method]["summary"] if "summary" in path[method] and len(path[method]["summary"]) > 0 else path[method]["description"]
        command = Command(invocation_utterance)
        for param in path[method]["parameters"]:
            if "in" not in param or param["in"] == "header":
                continue
            command.set(param["name"], get_command_parameter_generator_from_method_param(param))
        commands.append(command)
commands

[<__main__.Command at 0x7fd17d03ff98>,
 <__main__.Command at 0x7fd1481e2e10>,
 <__main__.Command at 0x7fd148195e10>,
 <__main__.Command at 0x7fd1481bdd30>,
 <__main__.Command at 0x7fd1481bdd68>,
 <__main__.Command at 0x7fd1481bde80>,
 <__main__.Command at 0x7fd1481bdeb8>,
 <__main__.Command at 0x7fd1481bdef0>,
 <__main__.Command at 0x7fd1481bdf28>,
 <__main__.Command at 0x7fd1481bdf60>,
 <__main__.Command at 0x7fd1481bdf98>,
 <__main__.Command at 0x7fd1481bdfd0>,
 <__main__.Command at 0x7fd148196048>,
 <__main__.Command at 0x7fd148196080>,
 <__main__.Command at 0x7fd1481960b8>,
 <__main__.Command at 0x7fd1481960f0>,
 <__main__.Command at 0x7fd148196128>,
 <__main__.Command at 0x7fd148196160>,
 <__main__.Command at 0x7fd148196198>,
 <__main__.Command at 0x7fd1481961d0>]

In [247]:
import uuid

def write(outfile, utterance, key, obj):
    if obj is None:
        return
    if type(obj) is dict:
        for subkey in obj:
            write(outfile, utterance, str(subkey), obj[subkey]);
    elif type(obj) is list:
        for subobj in obj:
            write(outfile, utterance, str(key), subobj);
    else:
        obj = str(obj)
        outfile.write("\"{0}\",\"{1}\",\"{2}\",\"{3}\",\"{4}\",\"{5}\"\n".format(uuid.uuid4(), utterance, key, obj, utterance.find(obj), utterance.find(obj) + len(obj)))
num_instances = 100
with open("out/commands_train.csv", "w") as trainfile:
    with open("out/commands_test.csv", "w") as testfile:
        for x in range(num_instances):
            for command in commands:
                for utterance, param_name, param_value in command.generate_utterance():
                    outfile = testfile if random.random() < 0.3 else trainfile
                    write(outfile, utterance, normalize_parameter_name(param_name), param_value)
