In [1]:
import binascii
import json
import random
import torch
import torch.nn as nn

from json import JSONDecodeError

In [2]:
def bitstring_generator(words, word_size=8):
    return ''.join([random.choice(['0', '1']) for j in range(words) for i in range(word_size)])

In [3]:
def bin_to_string(binary_string):
    return binascii.unhexlify(format(int(binary_string, 2), 'x')).decode('utf8')

In [4]:
bin_to_string('0b110100001100101011011000110110001101111001000000111011101101111011100100110110001100100')

'hello world'

In [5]:
def string_to_bin(raw_string):
    return bin(int(binascii.hexlify(raw_string), 16))

In [6]:
string_to_bin(b' ')

'0b100000'

In [26]:
def string_padder(raw_string, pad_with=' ', pad_len=10000):
    string_length = len(raw_string)
    if string_length > pad_len:
        return raw_string
    pad_space_count = pad_len - string_length
    return raw_string + (pad_with * pad_space_count)

In [27]:
string_padder('apples', pad_len=20)

'apples              '

In [28]:
def validate_str_is_json(raw_string):
    try:
        json.loads(raw_string)
        return 1
    except JSONDecodeError as e:
        return -1

In [29]:
validate_str_is_json("{'a': 1}")

-1

In [30]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, _input, hidden):
        input_combined = torch.cat((_input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [31]:
len(string_to_bin(b'{"key_1": 3, "key_2": [1, 2, 3]}'))

257

In [59]:
def make_tensor(line, clip_factor=100000, input_type='bitstr'):
    pad_with = '100000' if input_type is 'bitstr' else ' '
    clip_line = string_padder(line[:clip_factor], pad_with=pad_with)
    if input_type == 'bitstr':
        safe_clip_line = clip_line.replace('0b', '')
    else:
        safe_clip_line = clip_line
    tensor = torch.zeros(clip_factor)
    print(len(safe_clip_line))
    for i, ch in enumerate(safe_clip_line):
        if input_type == 'bitstr':
            tensor[i] = int(ch)
        else:
            tensor[i] = ord(ch)

In [60]:
def random_dict_generator():
    empty_dict = {}
    primitives = ['str', 'int']
    key_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890-_'
    random_key_count = random.randint(0, 20)
    for i in range(random_key_count):
        random_key_length = random.randint(0, 10)
        random_key = ''.join(random.sample(key_chars, random_key_length))
        random_value_type = random.choice(primitives)
        random_value_len = random.randint(0, 20)
        if random_value_type == 'str':
            empty_dict[random_key] = ''.join(random.sample(key_chars, random_value_len))
        else:
            empty_dict[random_key] = random_value_len
    return json.dumps(empty_dict)

In [61]:
def training_data(qty=100000, dist=0.6):
    training_data = []
    for i in range(qty * dist):
        random_dict = random_dict_generator().encode('utf8')
        training_data[i] = [make_tensor(string_to_bin(random_dict)), make_tensor(random_dict, input_type='raw_string')]
    return training_data

In [62]:
criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(input_line_tensor, target_line_tensor):
    hidden = rnn.initHidden()
#     rnn.zero_grad()
    loss = 0
    print(input_line_tensor)
#     for i in range(input_line_tensor):
#         output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
#         l = criterion(output, target_line_tensor[i])
#         loss += l

#     loss.backward()

#     for p in rnn.parameters():
#         p.data.add_(-learning_rate, p.grad.data)

#     return output, loss.item() / input_line_tensor.size(0)

In [63]:
random_dict = random_dict_generator().encode('utf8')
train(make_tensor(string_to_bin(random_dict)), make_tensor(random_dict))

53433


TypeError: can't concat str to bytes

In [None]:
a = '0b1111011001000100011000101101101011100010100011000110101001000100011101000100000001100010011011100101100001000000010001001000001001100000010001000111010001000000011001000101100001000000010001001101111010100000101001001110000001000100011101000100000001100010010110000100000001000100110101101000001011011010101111101000010001000100011101000100000001000100101101001001010001110000110011101000011011110010110100100110100011000110110110101111000010101010101011000110011011011100010001000101100001000000010001000110100010111110111001101001011001101010101010000100010001110100010000000110000001011000010000000100010011010100111100101100011011011100110000101110011001101100101001000100010001110100010000000100010011100100110010101001110011000010010001000101100001000000010001001010110011011110101101000111000010001000100110100111001001000100011101000100000001100010011100101111101      '

In [152]:
a.replace('0b', '')

'1111011001000100011000101101101011100010100011000110101001000100011101000100000001100010011011100101100001000000010001001000001001100000010001000111010001000000011001000101100001000000010001001101111010100000101001001110000001000100011101000100000001100010010110000100000001000100110101101000001011011010101111101000010001000100011101000100000001000100101101001001010001110000110011101000011011110010110100100110100011000110110110101111000010101010101011000110011011011100010001000101100001000000010001000110100010111110111001101001011001101010101010000100010001110100010000000110000001011000010000000100010011010100111100101100011011011100110000101110011001101100101001000100010001110100010000000100010011100100110010101001110011000010010001000101100001000000010001001010110011011110101101000111000010001000100110100111001001000100011101000100000001100010011100101111101      '

In [191]:
ord('A')

65