# Dependance

In [3]:
class Scanner:
  import numpy
  
  def __init__(self, type):
    self.type = type
    self.tokens = None;

  def scan(self, content):
    return {
        'csv': lambda x: self.scanCSV(x)
    }[self.type](content)
    
  def scanCSV(self, content):
    lines = content.split()
    items = []

    for line in lines:  
      items.append( line.split(',') )

    tokens = numpy.array(items)
    return tokens


In [4]:
class Parser:
  import numpy
  
  def __init__(self, methods):
    self.methods = methods
  
  def parse(self, slices):
    methods = self.methods
    items = []

    for i in range( len(methods) ):
      method = methods[i]
      matrix = numpy.array(slices[i]).transpose()

      for row in matrix:
        newrow = list( map(lambda x: method(x), row) )
        items.append( newrow )

    datas = numpy.array(items).transpose()
    return datas

In [5]:
def source(path, source):
  import os

  if not os.path.isfile(path):
    !mkdir data
    !wget $source --force-directories -O $path 

In [6]:
def write(path, content):
  fp = open(path, 'wb')
  fp.write(content)
  fp.close()

In [7]:
def unpickle(path):
  import pickle
  with open(path, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
  return dict

# Iris
- feature: 4
- target: 1
- types: Setosa, Versicolour, Virginica

In [154]:
class Iris:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/iris.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 0:-1]
    ])

    targets = Parser([
      lambda x: ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'].index(x)
    ]).parse([
      tokens[:, -1:]
    ])

    classes = ['setosa', 'versicolor', 'virginica']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/iris/iris.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget -O $path $SOURCE
    return open(path).read()

In [None]:
iris = Iris()
print(iris.classes)
print(iris.features[:5])
print(iris.targets[:5])

# Waveform

In [51]:
class Waveform:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/waveform.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 0:-1]
    ])

    targets = Parser([
      lambda x: ['0', '1', '2'].index(x)
    ]).parse([
      tokens[:, -1:]
    ])

    classes = ['waveform 1', 'waveform 2', 'waveform 3']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/waveform/waveform.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget $SOURCE --force-directories -O $path 
    return open(path).read()

In [None]:
waveform = Waveform()
print(waveform.classes)
print(waveform.features[:5])
print(waveform.targets[:5])

# WDBC

In [56]:
class Wdbc:
  def __init__(self, content = None):
    if content is None: content = self.__source('data/wdbc.data')

    tokens = Scanner('csv').scan(content);

    features = Parser([
      lambda x: float(x)
    ]).parse([
      tokens[:, 2:12]
    ])

    targets = Parser([
      lambda x: ['M', 'B'].index(x)
    ]).parse([
      tokens[:, 1:2]
    ])

    classes = ['malignant', 'benign']

    self.features = features
    self.targets = targets
    self.classes = classes

  def __source(self, path):
    SOURCE = 'https://raw.githubusercontent.com/s10459020/Database/main/wdbc/wdbc.data'

    if not os.path.isfile(path):
      !mkdir data
      !wget $SOURCE --force-directories -O $path 
    return open(path).read()

In [None]:
wdbc = Wdbc()
print(wdbc.classes)
print(wdbc.features[:5])
print(wdbc.targets[:5])

# CIFAR10

In [1]:
class Cifar10:
  def __init__(self, content = None):
    import numpy
    FILEPATH = 'data/cifar10.pickle'
    TEMP = 'data/cifar10_batch'

    if content: 
      write(FILEPATH, content)
      cifar = unpickle(FILEPATH)
      self.data = cifar[b'data']
      self.label = cifar[b'labels']

    else: 
      dataArray = []
      labelArray = []
      for i in range(1, 6):   
        source( TEMP + str(i), 'https://raw.githubusercontent.com/s10459020/Database/main/cifar-10/data_batch_' + str(i) )
        cifar_batch = unpickle( TEMP + str(i) )

        dataArray = numpy.concatenate( (dataArray, cifar_batch[b'data']) ) if len(dataArray) > 0 else cifar_batch[b'data']
        labelArray = labelArray + cifar_batch[b'labels'] if len(labelArray) > 0 else cifar_batch[b'labels']

      self.data = dataArray
      self.label = labelArray

In [None]:
cifar10 = Cifar10()
print(f'data({ len(cifar10.data) }): { cifar10.data }')
print(f'label({ len(cifar10.label) }): { cifar10.label }')