# Load, normalise and save dataset

This notebook combines different trials and creates normalised training and testing datasets.

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
#import libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
import random

**Load Images and Targets, save samples from different trials together**

In [None]:
# Start with zeros to supplement dataset
additional_zeros = 9000
images = np.zeros((additional_zeros,9,23,23))
print(images.shape)
for i in range(0,8,1):
  new_images = np.load('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/skull_images_{}.npz'.format(i))['a']
  images = np.vstack((images, new_images))

  print(images.shape)

final_max, final_min = np.max(images), np.min(images)
norm = np.max(np.abs(images))
images = images/norm    # Normalise data

print(final_max, final_min, norm)
print(np.max(images), np.min(images))

(9000, 9, 23, 23)
(43020, 9, 23, 23)
(78750, 9, 23, 23)
(115710, 9, 23, 23)
(141930, 9, 23, 23)
(168450, 9, 23, 23)
(205710, 9, 23, 23)
(238440, 9, 23, 23)
(271680, 9, 23, 23)
3.005867554861652e-08 -2.37463630980983e-08 3.005867554861652e-08
1.0 -0.7900003132104485


In [None]:
np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_inputs', images)

In [None]:
labels = np.zeros((additional_zeros,11,1))
print(labels.shape)
for i in range(0,8,1):
  new_labels = np.load('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/skull_labels_{}.npz'.format(i))['a']
  labels = np.vstack((labels, new_labels))

  print(labels.shape)

labels /= norm

print(np.max(labels), np.min(labels))

(9000, 11, 1)
(43020, 11, 1)
(78750, 11, 1)
(115710, 11, 1)
(141930, 11, 1)
(168450, 11, 1)
(205710, 11, 1)
(238440, 11, 1)
(271680, 11, 1)
0.3309766019975735 -0.2603016393907643


In [None]:
np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_labels', labels)

In [None]:
print(images.shape, labels.shape)

(271680, 9, 23, 23) (271680, 11, 1)


**Load wave dataset**



In [None]:
wavedataset = np.asarray(list(zip(np.load('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_inputs.npz')['arr_0'], np.load('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_labels.npz')['arr_0'])))
print(wavedataset.shape)

(271680, 2)


  return array(a, dtype, copy=False, order=order)


**Split into train and test data**

In [None]:
# Shuffle data
shuffle_array = wavedataset.reshape(-1, 30, 2)
np.random.shuffle(shuffle_array)
print(shuffle_array.shape)

(9056, 30, 2)


In [None]:
# Optional - reduce size of dataset
no_samples = shuffle_array.shape[0]
print(no_samples)
random_indices = np.random.choice(no_samples, size=int(180000/30))
print(random_indices.shape)
reduced_dataset = shuffle_array[random_indices, :]
reduced_dataset = reduced_dataset.reshape(-1, 2)
print(reduced_dataset.shape)

9056
(6000,)
(180000, 2)


In [None]:
# Training data is 80% of whole dataset
test_data = reduced_dataset[:42000]
training_data = reduced_dataset[42000:]
       
print(training_data.shape)
print(test_data.shape)

(138000, 2)
(42000, 2)


In [None]:
# for i in range (0, len(training_data), 30000):
#   print('Train ' + str(i))
#   plt.axis('off')
#   plt.imshow(training_data[i][0][0])
#   plt.show()
#   plt.axis('off')
#   plt.imshow(training_data[i][1])
#   plt.show()

In [None]:
# np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/training_data_all_zeroed', training_data)
np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_training_data', training_data)

In [None]:
# for i in range (0,len(test_data),1000):
#   print('Test ' + str(i))
#   plt.axis('off')
#   plt.imshow(test_data[i][0][0])
#   plt.show()

In [None]:
# np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/test_data_all_zeroed', test_data)
np.savez_compressed('/content/drive/My Drive/ML_Ultrasound_Project/UROPdata/artificial_test_data', test_data)