# Dependance

In [8]:
class Scanner:
  import numpy
  
  def __init__(self, type):
    self.type = type
    self.tokens = None;

  def scan(self, content):
    return {
        'csv': lambda x: self.scanCSV(x)
    }[self.type](content)
    
  def scanCSV(self, content):
    lines = content.split()
    items = []

    for line in lines:  
      items.append( line.split(',') )

    tokens = numpy.array(items)
    return tokens


In [9]:
class Parser:
  import numpy
  
  def __init__(self, methods):
    self.methods = methods
  
  def parse(self, slices):
    methods = self.methods
    items = []

    for i in range( len(methods) ):
      method = methods[i]
      matrix = numpy.array(slices[i]).transpose()

      for row in matrix:
        newrow = list( map(lambda x: method(x), row) )
        items.append( newrow )

    datas = numpy.array(items).transpose()
    return datas

In [83]:
def source(path, source):
  import os

  if not os.path.isfile(path):
    !mkdir data
    !wget $source --force-directories -O $path 

In [84]:
def write(path, content):
  fp = open(path, 'wb')
  fp.write(content)
  fp.close()

In [87]:
def unpickle(path):
  import pickle
  with open(path, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
  return dict

# Iris
- feature: 4
- target: 1
- types: Setosa, Versicolour, Virginica

In [154]:
class Iris:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/iris.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 0:-1]
    ])

    targets = Parser([
      lambda x: ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'].index(x)
    ]).parse([
      tokens[:, -1:]
    ])

    classes = ['setosa', 'versicolor', 'virginica']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/iris/iris.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget -O $path $SOURCE
    return open(path).read()

In [155]:
iris = Iris()
print(iris.classes)
print(iris.features[:5])
print(iris.targets[:5])

['setosa', 'versicolor', 'virginica']
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[0]
 [0]
 [0]
 [0]
 [0]]


# Waveform

In [51]:
class Waveform:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/waveform.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 0:-1]
    ])

    targets = Parser([
      lambda x: ['0', '1', '2'].index(x)
    ]).parse([
      tokens[:, -1:]
    ])

    classes = ['waveform 1', 'waveform 2', 'waveform 3']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/waveform/waveform.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget $SOURCE --force-directories -O $path 
    return open(path).read()

In [62]:
waveform = Waveform()
print(waveform.classes)
print(waveform.features[:5])
print(waveform.targets[:5])

wget https://raw.githubusercontent.com/s10459020/Database/main/waveform/waveform.data --force-directories -O data/waveform.data
['waveform 1', 'waveform 2', 'waveform 3']
[[-1.23 -1.56 -1.75 -0.28  0.6   2.22  0.85  0.21 -0.2   0.89  1.08  4.2
   2.89  7.75  4.59  3.15  5.12  3.32  1.2   0.24 -0.56]
 [-0.69  2.43  0.61  2.08  2.3   3.25  5.52  4.55  2.97  2.22  2.81  1.61
   1.24  1.89  1.88 -1.34  0.83  1.41  1.78  0.6   2.42]
 [-0.12 -0.94  1.29  2.59  2.42  3.55  4.94  3.25  1.9   2.07  0.51  1.45
   2.5   0.12  1.41  2.78  0.64  0.62 -0.01 -0.79 -0.12]
 [ 0.86  0.29  2.19 -0.02  1.13  2.51  2.37  5.45  5.45  4.84  4.65  4.05
   2.58  1.4   1.24  1.41  1.07 -1.43  2.84 -1.18  1.12]
 [ 1.16  0.37  0.4  -0.59  2.66  1.    2.69  4.06  5.34  3.53  4.82  4.79
   4.3   1.84  1.73  0.21 -0.18  0.13 -0.21 -0.8  -0.68]]
[[2]
 [1]
 [0]
 [1]
 [1]]


# WDBC

In [56]:
class Wdbc:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/wdbc.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 2:12]
    ])

    targets = Parser([
      lambda x: ['M', 'B'].index(x)
    ]).parse([
      tokens[:, 1:2]
    ])

    classes = ['malignant', 'benign']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/wdbc/wdbc.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget $SOURCE --force-directories -O $path 
    return open(path).read()

In [60]:
wdbc = Wdbc()
print(wdbc.classes)
print(wdbc.features[:5])
print(wdbc.targets[:5])

['malignant', 'benign']
[[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
  1.471e-01 2.419e-01 7.871e-02]
 [2.057e+01 1.777e+01 1.329e+02 1.326e+03 8.474e-02 7.864e-02 8.690e-02
  7.017e-02 1.812e-01 5.667e-02]
 [1.969e+01 2.125e+01 1.300e+02 1.203e+03 1.096e-01 1.599e-01 1.974e-01
  1.279e-01 2.069e-01 5.999e-02]
 [1.142e+01 2.038e+01 7.758e+01 3.861e+02 1.425e-01 2.839e-01 2.414e-01
  1.052e-01 2.597e-01 9.744e-02]
 [2.029e+01 1.434e+01 1.351e+02 1.297e+03 1.003e-01 1.328e-01 1.980e-01
  1.043e-01 1.809e-01 5.883e-02]]
[[0]
 [0]
 [0]
 [0]
 [0]]


# MNest

# CIFAR-10

In [142]:
class Cifar10:
  def __init__(self, content = None):
    import numpy
    FILEPATH = 'data/cifar10.pickle'
    TEMP = 'data/cifar10_batch'

    if content: 
      write(FILEPATH, content)
      cifar = unpickle(FILEPATH)
      self.data = cifar[b'data']
      self.label = cifar[b'labels']

    else: 
      dataArray = []
      labelArray = []
      for i in range(1, 6):   
        source( TEMP + str(i), 'https://raw.githubusercontent.com/s10459020/Database/main/cifar-10/data_batch_' + str(i) )
        cifar_batch = unpickle( TEMP + str(i) )

        dataArray = numpy.concatenate( (dataArray, cifar_batch[b'data']) ) if len(dataArray) > 0 else cifar_batch[b'data']
        labelArray = labelArray + cifar_batch[b'labels'] if len(labelArray) > 0 else cifar_batch[b'labels']

      self.data = dataArray
      self.label = labelArray

In [157]:
cifar10 = Cifar10()
print(f'data({ len(cifar10.data) }): { cifar10.data }')
print(f'label({ len(cifar10.label) }): { cifar10.label }')

data(50000): [[ 59  43  50 ... 140  84  72]
 [154 126 105 ... 139 142 144]
 [255 253 253 ...  83  83  84]
 ...
 [ 35  40  42 ...  77  66  50]
 [189 186 185 ... 169 171 171]
 [229 236 234 ... 173 162 161]]
label(50000): [6, 9, 9, 4, 1, 1, 2, 7, 8, 3, 4, 7, 7, 2, 9, 9, 9, 3, 2, 6, 4, 3, 6, 6, 2, 6, 3, 5, 4, 0, 0, 9, 1, 3, 4, 0, 3, 7, 3, 3, 5, 2, 2, 7, 1, 1, 1, 2, 2, 0, 9, 5, 7, 9, 2, 2, 5, 2, 4, 3, 1, 1, 8, 2, 1, 1, 4, 9, 7, 8, 5, 9, 6, 7, 3, 1, 9, 0, 3, 1, 3, 5, 4, 5, 7, 7, 4, 7, 9, 4, 2, 3, 8, 0, 1, 6, 1, 1, 4, 1, 8, 3, 9, 6, 6, 1, 8, 5, 2, 9, 9, 8, 1, 7, 7, 0, 0, 6, 9, 1, 2, 2, 9, 2, 6, 6, 1, 9, 5, 0, 4, 7, 6, 7, 1, 8, 1, 1, 2, 8, 1, 3, 3, 6, 2, 4, 9, 9, 5, 4, 3, 6, 7, 4, 6, 8, 5, 5, 4, 3, 1, 8, 4, 7, 6, 0, 9, 5, 1, 3, 8, 2, 7, 5, 3, 4, 1, 5, 7, 0, 4, 7, 5, 5, 1, 0, 9, 6, 9, 0, 8, 7, 8, 8, 2, 5, 2, 3, 5, 0, 6, 1, 9, 3, 6, 9, 1, 3, 9, 6, 6, 7, 1, 0, 9, 5, 8, 5, 2, 9, 0, 8, 8, 0, 6, 9, 1, 1, 6, 3, 7, 6, 6, 0, 6, 6, 1, 7, 1, 5, 8, 3, 6, 6, 8, 6, 8, 4, 6, 6, 1, 3, 8, 3, 4, 1, 7, 1, 3, 8, 