# Simulating Colours

In [None]:
# library to read / write json
#
import json

# library to plot data
#
import plotly.graph_objects as go

# use standard random library to generate random numbers
#
import random

# so we can always repeat the experiment
#
random.seed(221166)

# library to print structures 
#
import pprint as pp

## We will simulate data for a JSON that consists of FIVE dimensions - three RGB values, a colour label and a unique record ID 

We need to demonstrate the learning machine's capability of:

* Ability to explain the knowledge learned by the machine - so we will use RGB to facilitate visualisation
* Ability to cluster data that is similar - so we will add a small amount of noise to some the rgb dimensions
* Ability to classify - so we will add a colour label that represents the RGB mix
* Ability to predict - so we will provide patterns in the sequence of colours
* Ability to detect anomalies - so we will change the pattern in the timeseries at two points

In [None]:
# standard RGB colours (red, green, blue)

rgb = {'RED': (255, 0, 0),
       'ORANGE': (255, 129, 0),
       'YELLOW': (255, 233, 0),
       'GREEN': (0, 202, 14),
       'BLUE': (22, 93, 239),
       'PURPLE': (166, 1, 214),
       'BROWN': (151, 76, 2),
       'GREY': (128, 128, 128),
       'BLACK': (0, 0, 0),
       'TURQUOISE': (150, 255, 255)}

# number of times to repeat sequence
#
sequence_count = 10


sequences = [['ORANGE', 'BLUE', 'YELLOW'],
             ['PURPLE', 'GREEN', 'RED', 'GREY', 'BROWN','ORANGE', 'BLUE', 'YELLOW'],
             ['TURQUOISE', 'BLACK', 'PURPLE', 'GREEN', 'RED', 'GREY', 'BROWN', 'ORANGE', 'BLUE', 'YELLOW']
            ]

# maximum amount of noise is 10%
#
noise = 255 * 0.1

# the data for training
#
simulated_data = []

record_id = 0
for seq_id in range(len(sequences)):
    for seq_count in range(sequence_count):
        for colour in sequences[seq_id]:

            # represent a row as a dictionary
            #
            record = {'COLOUR': colour, 'record_id': record_id}
            record_id += 1
            
            # calc some noise and modify the red component of this colour
            #
            rnd = random.random()
            noise_delta = noise * rnd 
            if rnd >= 0.5:
                record['r'] = int(min(rgb[colour][0] + noise_delta, 255))
            else:
                record['r'] = int(max(rgb[colour][0] - noise_delta, 0))

            # same for green
            #
            rnd = random.random()
            noise_delta = noise * rnd 
            if rnd >= 0.5:
                record['g'] = int(min(rgb[colour][1] + noise_delta, 255))
            else:
                record['g'] = int(max(rgb[colour][1] - noise_delta, 0))
            
            # same for blue
            #
            rnd = random.random()
            noise_delta = noise * rnd 
            if rnd >= 0.5:
                record['b'] = int(min(rgb[colour][2] + noise_delta, 255))
            else:
                record['b'] = int(max(rgb[colour][2] - noise_delta, 0))
            
            # add the record
            #
            simulated_data.append(record)
            

seq_start = 0
print('first sequence')
pp.pprint(simulated_data[seq_start:len(sequences[0]) * 2] )

seq_start = seq_start + sequence_count * len(sequences[0]) -1
print('second sequence')
pp.pprint(simulated_data[seq_start : seq_start + len(sequences[1]) * 2 ])

seq_start = seq_start + sequence_count * len(sequences[1]) -1
                        
print('third sequence')
pp.pprint(simulated_data[seq_start : seq_start + len(sequences[2]) * 2 ])

print('total number of records', len(simulated_data))

## We will make use of Plotly to visualise what is going on

Below is a plot of the timeseries of colours we have created

In [None]:

# lists to hold x y coordinates of a coloured blob
#
x = []
y = []

# lists to hold x y coordinantes of lines connecting coloured blobs
#
x_line =[]
y_line = []

# list of RGB colours for each blob
#
colours = []

# list of labels to be displayed when mouse hovers over a blob
#
labels = []


for idx in range(len(simulated_data)):
    
    # a coloured blog for each piece of data plotted at y = 1.0
    #
    x.append(idx)
    y.append(1.0)
    
    # specify the colour for this circle
    #
    colours.append('rgb({},{},{})'.format(simulated_data[idx]['r'],simulated_data[idx]['g'],simulated_data[idx]['b']))
    
    # specify the hover text
    #
    labels.append('r: {} g: {} b: {}'.format(simulated_data[idx]['r'],simulated_data[idx]['g'],simulated_data[idx]['b']))
    
    # join this circle to the previous circle with a line
    #
    if idx > 0:
        x_line.append(idx - 1)
        x_line.append(idx)
        x_line.append(None)
        y_line.append(1.0)
        y_line.append(1.0)
        y_line.append(None)        

# the lines
#
ts_lines = go.Scatter(x=x_line, y=y_line, mode='lines', line=dict(width=1, color='grey'))

# the coloured points
#
ts_points = go.Scatter(x=x, y=y, hovertext=labels, mode='markers', marker=dict(size=15, color=colours, opacity=1.0))

# a plot consists of a figure object
#
fig = go.Figure(data=[ts_lines, ts_points])
fig.update_layout(width=2000, height=900, title=dict(text='Time Series'))
fig.show()

## And we can plot the data within the RGB 3 dimensional space

In [None]:

# specify the starting and ending point in the data to plot
#
start_idx = 0
end_idx = 30

# 3D requires additional z lists
#
x = []
y = []
z = []
x_line =[]
y_line = []
z_line = []
colours = []
labels = []

for idx in range(len(simulated_data[start_idx: end_idx])):
    curr_idx = start_idx + idx
    
    # the X, Y , Z coordiantes corerespond to the RGB colours of the blob
    #
    x.append(simulated_data[curr_idx]['r'])
    y.append(simulated_data[curr_idx]['g'])
    z.append(simulated_data[curr_idx]['b'])
    
    colours.append('rgb({},{},{})'.format(simulated_data[curr_idx]['r'],simulated_data[curr_idx]['g'],simulated_data[curr_idx]['b']))
    labels.append('{} Record_id: {}'.format(simulated_data[idx]['COLOUR'], simulated_data[idx]['record_id']))
    
    # join the blobs in 3d space
    #
    if idx > 0:
        x_line.append(simulated_data[curr_idx - 1]['r'])
        x_line.append(simulated_data[curr_idx]['r'])
        x_line.append(None)
        y_line.append(simulated_data[curr_idx - 1]['g'])
        y_line.append(simulated_data[curr_idx]['g'])
        y_line.append(None)
        z_line.append(simulated_data[curr_idx - 1]['b'])
        z_line.append(simulated_data[curr_idx]['b'])
        z_line.append(None)
        
ts_lines = go.Scatter3d(x=x_line, y=y_line, z=z_line, mode='lines', line=dict(width=1, color='grey'))
ts_points = go.Scatter3d(x=x, y=y, z=z, hovertext=labels, mode='markers', marker=dict(size=15, color=colours, opacity=0.7))
fig = go.Figure(data=[ts_lines, ts_points])
fig.update_layout(width=2000, height=900, title=dict(text='Time Series'))
fig.show()

## Save Data to JSON file

In [None]:
import json

file_name= '../data/example_colours.json'
with open(file_name, 'w') as fp:
    json.dump(simulated_data, fp, indent=4)