# Code generator

Neural network that generates Golang code

In [1]:
import json
import numpy as np
import os
import tensorflow as tf

In [2]:
class Dataset(object):
    dataset_file = 'dataset.dt'
    dataset_folder = 'dataset'
    
    def __init__(self):
        dataset = self.load_dataset()
        self.encoded, self.vocab_to_int, self.int_to_vocab = dataset
        
    def load_dataset(self):
        if self.exists_clean_dataset():
            return self.load_clean_dataset()
        dataset = self.create_dataset()
        self.save_clean_dataset(dataset)
        return dataset

    def exists_clean_dataset(self):
        return os.path.isfile(self.dataset_file)

    def load_clean_dataset(self):
        with open(self.dataset_file, "r") as d:
            dataset = json.loads(d.read())
            return (
                np.array(dataset["encoded"]),
                dataset["vocab_to_int"],
                dataset["int_to_vocab"]
            )
        
    def create_dataset(self):
        file_names = self.get_file_names()
        content = self.get_content_from_files(file_names)
        return self.create_vocab(content)
            
    def get_file_names(self):
        files = os.listdir(self.dataset_folder)
        return [f for f in files if f.endswith('.go')]
    
    def get_content_from_files(self, file_names):
        content = ''
        for name in file_names:
            with open(os.path.join(self.dataset_folder, name), 'r') as f:
                content += f.read() + '\n'
        return content
    
    def create_vocab(self, content):
        vocab = set(content)
        vocab_to_int = {c: i for i, c in enumerate(vocab)}
        int_to_vocab = dict(enumerate(vocab))
        encoded = np.array([vocab_to_int[c] for c in content], dtype=np.int32)
        return encoded, vocab_to_int, int_to_vocab
    
    def save_clean_dataset(self, dataset):
        encoded, v_to_int, int_to_v = dataset
        with open(self.dataset_file, "w+") as d:
            d.write(json.dumps({
                'encoded' : [int(i) for i in encoded], 
                'vocab_to_int' : v_to_int,
                'int_to_vocab' : int_to_v
            }))

In [None]:
class Model(object):
    # TODO: Change to None
    batch_size = 200
    sequence_size = 100
    
    hidden_layer_size = 512
    num_layers = 2
    
    learning_rate = 0.0005
    
    checkpoint = 10
    chekpoints_folder = 'checkpoints'
    
    def __init__(self):
        self.init()
        self.build_graph()
        self.init_graph()
    
    def init(self):
        tf.reset_default_graph()
        self.dataset = Dataset()
        self.io_size = len(self.dataset.vocab_to_int)
        self.create_checkpoints_folder()
        
    def create_checkpoints_folder(self):
        if not os.path.exists(self.chekpoints_folder):
            os.makedirs(self.chekpoints_folder)

    def build_graph(self):
        self.build_inputs()
        lstm_outputs = self.build_lstm()
        logits = self.build_output(lstm_outputs)
        self.build_loss(logits)
        self.build_optimizer()
        
    def build_inputs(self):
        self.inputs = tf.placeholder(tf.int32, [None, self.sequence_size], name='inputs')
        self.targets = tf.placeholder(tf.int32, [None, self.sequence_size], name='targets')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        
    def build_lstm(self):
        def create_lstm():
            lstm = tf.contrib.rnn.BasicLSTMCell(self.hidden_layer_size)
            lstm = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=self.keep_prob)
            return lstm
        
        cell = tf.contrib.rnn.MultiRNNCell([create_lstm() for _ in range(self.num_layers)])
        self.initial_state = cell.zero_state(self.batch_size, tf.float32)
        x_one_hot = tf.one_hot(self.inputs, self.io_size)
        outputs, self.final_state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        return outputs
    
    def build_output(self, lstm_outputs):
        x = tf.reshape(lstm_outputs, [-1, self.hidden_layer_size])
        
        weights = tf.Variable(tf.truncated_normal((self.hidden_layer_size, self.io_size), stddev=0.1))
        bias = tf.Variable(tf.zeros(self.io_size))
        
        logits = tf.add(tf.matmul(x, weights), bias)
        
        self.prediction = tf.nn.softmax(logits)
        
        return logits
    
    def build_loss(self, logits):
        y_one_hot = tf.one_hot(self.targets, self.io_size)
        y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
        self.loss = tf.reduce_mean(loss)
        
    def build_optimizer(self):
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.optimizer = optimizer.minimize(self.loss)
    
    def init_graph(self):
        self.saver = tf.train.Saver()
        self.session = tf.Session()
        init = tf.global_variables_initializer()
        self.session.run(init)
        self.restore_graph()
        
    def restore_graph(self):
        if self.checkpoint:
            self.saver.restore(self.session, self.checkpoint)
        
        
Model()

TypeError: object() takes no parameters

In [None]:
class Trainer(Model):
    pass

In [None]:
class Predcitor(Model):
    pass