## Convert MATLAB data files to Numpy Array from S3 Storage
- Requires local storage of path_config.py in source directory 
- Saves NP file of data in source directory 

In [1]:
# Import libraries
from utils import convert_mat
from path_config import mat_path
import boto3
import numpy as np

Access S3 file storage and check buckets and objects needed

In [2]:
# Acces AWS S3 MATLAB file 
pubkey = mat_path['ACCESS_KEY']
seckey = mat_path['SECRET_KEY']
client = boto3.client('s3', aws_access_key_id = pubkey, aws_secret_access_key = seckey)
response = client.list_buckets()
s3 = boto3.resource('s3', aws_access_key_id = pubkey, aws_secret_access_key = seckey)

In [3]:
list(s3.buckets.all())

[s3.Bucket(name='aws-cloudtrail-logs-598991111123-85ddbc5c'),
 s3.Bucket(name='clockdrawingbattery'),
 s3.Bucket(name='clockdrawingimages'),
 s3.Bucket(name='clockdrawingimages1'),
 s3.Bucket(name='clockdrawingimages2'),
 s3.Bucket(name='clockdrawingimages3'),
 s3.Bucket(name='clockimages'),
 s3.Bucket(name='healthyhomes591'),
 s3.Bucket(name='teambrainiac'),
 s3.Bucket(name='test-bucket-clockids-aicrowd')]

In [4]:
bucket = s3.Bucket('teambrainiac')
print("bucket name: ", bucket)
bucket_ = bucket.name

obj_name = list(bucket.objects.all())
print("object", obj_name)
obj = obj_name[0].key

bucket name:  s3.Bucket(name='teambrainiac')
object [s3.ObjectSummary(bucket_name='teambrainiac', key='all_data.mat')]


Download .mat file and convert to numpy array using Scipy.io package

In [5]:
# Download MATLAB file from bucket and save to file temporarily before assignment
filename = "downloaded_mat.mat"
client.download_file(bucket_, obj, filename)
mat_data = convert_mat(filename)
mat_data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Wed Aug 15 16:04:59 2012',
 '__version__': '1.0',
 '__globals__': [],
 'train_data': array([[[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
 
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
 
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 1],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
 
         ...,
 
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0,

### Take a look at the Training data and the Labels
- print shapes
- find number of voxels in the data
- reshape the 4D data into 2D array

In [6]:
print("Train data shape:", mat_data['train_data'].shape)

Train data shape: (64, 64, 30, 180)


In [7]:
#mat_data['test_data'].shape

In [8]:
print("Label data shape: ", mat_data['motor_labels'].shape)

Label data shape:  (180, 1)


In [9]:
# num time points
n_time_points = mat_data['train_data'].shape[-1]
n_time_points

180

In [10]:
# x, y, z of image
print("x, y, z, of the image: ", mat_data['train_data'].shape[:-1])

x, y, z, of the image:  (64, 64, 30)


In [11]:
# get num of voxels
num_voxels = np.prod(mat_data['train_data'].shape[:-1])
print("number of voxels: ", num_voxels)

number of voxels:  122880


In [12]:
#reshape
voxels_by_time = mat_data['train_data'].reshape((num_voxels, n_time_points))
voxels_by_time

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [3, 2, 1, ..., 2, 1, 1],
       [2, 1, 2, ..., 1, 1, 1],
       [1, 2, 1, ..., 2, 2, 1]], dtype=uint16)

In [13]:
voxels_by_time.shape

(122880, 180)

### Save data locally

In [16]:
with open('train.npy', 'wb') as f:
    np.save(f, voxels_by_time)
    
f.close()