<a href="https://colab.research.google.com/github/p82maavd/MIML/blob/main/src/miml/tutorial/data_miml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mimllearning

In [2]:
from miml.datasets import load_dataset

dataset = load_dataset("toy.arff", from_library=True)
print("Show dataset in table mode")
print("--------------------------")
dataset.show_dataset(mode="table")
print("")
print("Show dataset in compact mode")
print("----------------------------")
dataset.show_dataset(mode="compact")

Show dataset in table mode
--------------------------
+--------+------+------+------+----------+----------+----------+----------+
|  bag1  |  f1  |  f2  |  f3  |  label1  |  label2  |  label3  |  label4  |
|   0    |  42  | -198 | -109 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
|   1    | 41.9 | -191 | -142 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
|   2    |  35  | 14.2 | 6.33 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
+--------+-------+------+------+----------+----------+----------+----------+
|  bag2  |  f1   |  f2  |  f3  |  label1  |  label2  |  label3  |  label4  |
|   0    | 11.25 | -98  |  10  |    0     |    1     |    1     |    0     |
+--------+-------+------+------+----------+----------+----------+----------+
|   1    |  31   | 40.5 | 7.85

In [3]:
# Shows dataset metrics
dataset.describe()

-----MULTILABEL-----
Cardinality:  2.0
Density:  0.5
Distinct:  0.125

-----MULTIINSTANCE-----
Nº of bags:  2
Total instances:  5
Average Instances per bag:  2.5
Min Instances per bag:  2
Max Instances per bag:  3
Features per bag:  3
Labels per bag:  4
Attributes per bag:  7

Distribution of bags:
	Bags with  2  instances:  1
	Bags with  3  instances:  1


In [4]:
import numpy
print("Add a new attribute to the dataset and modify one of its attributes")
dataset.add_attribute(name="new_feature", position=2, values=numpy.random.rand(dataset.get_number_instances()))
dataset.set_attribute(bag=0, index_instance=0, attribute=2, value=3.13)
dataset.show_dataset(start=0, end=1)

print("Delete the attribute added before")
dataset.delete_attribute(position=2)
dataset.show_dataset(start=0, end=1)

Add a new attribute to the dataset and modify one of its attributes
+--------+------+------+---------------+------+----------+----------+----------+----------+
|  bag1  |  f1  |  f2  |  new_feature  |  f3  |  label1  |  label2  |  label3  |  label4  |
|   0    |  42  | -198 |     3.13      | -109 |    1     |    0     |    0     |    1     |
+--------+------+------+---------------+------+----------+----------+----------+----------+
|   1    | 41.9 | -191 |   0.165707    | -142 |    1     |    0     |    0     |    1     |
+--------+------+------+---------------+------+----------+----------+----------+----------+
|   2    |  35  | 14.2 |   0.188282    | 6.33 |    1     |    0     |    0     |    1     |
+--------+------+------+---------------+------+----------+----------+----------+----------+
Delete the attribute added before
+--------+------+------+------+----------+----------+----------+----------+
|  bag1  |  f1  |  f2  |  f3  |  label1  |  label2  |  label3  |  label4  |
|   0    |

In [5]:
from miml.data import Instance, Bag

print("Creation and modification of an instance")
values = [38, 62, 5.09, 1, 0, 0, 1]
instance = Instance(values)
instance.set_attribute(attribute=2, value=74)
instance.show_instance()

print("Add an instance to the dataset")
dataset.add_instance(bag=0, instance=instance)
dataset.show_dataset()

print("Create a bag and add it to the dataset")
bag=Bag("bag3")
bag.add_instance(instance)
dataset.add_bag(bag)
dataset.show_dataset()


Creation and modification of an instance
+----+----+----+---+---+---+---+
| 38 | 62 | 74 | 1 | 0 | 0 | 1 |
+----+----+----+---+---+---+---+
Add an instance to the dataset
+--------+------+------+------+----------+----------+----------+----------+
|  bag1  |  f1  |  f2  |  f3  |  label1  |  label2  |  label3  |  label4  |
|   0    |  42  | -198 | -109 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
|   1    | 41.9 | -191 | -142 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
|   2    |  35  | 14.2 | 6.33 |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
|   3    |  38  |  62  |  74  |    1     |    0     |    0     |    1     |
+--------+------+------+------+----------+----------+----------+----------+
+--------+-------+------+------+----------+----------+----------+----

In [6]:
# Shows all bags in the dataset
for bag_index in range(dataset.get_number_bags()):

    # Recover a bag
    bag = dataset.get_bag(bag_index)
    print("Bag:", bag.key)
    print("\tNumInstances:", bag.get_number_instances())
    print("\tNumFeatures:", bag.get_number_features())
    print("\tNumLabels:", bag.get_number_labels())
    print("\tNumAttributes:", bag.get_number_attributes())

    # Shows all instances in the bag
    for instance_index in range(bag.get_number_instances()):
        # Recovers an instance
        instance = dataset.get_instance(bag.key, instance_index)
        print("\t\tInstance:", instance_index, "NumAttributes:", instance.get_number_attributes())
        for attribute_index in range(instance.get_number_attributes()):
            print("\t\t\tAttribute", attribute_index, ":", instance.get_attribute(attribute=attribute_index))

Bag: bag1
	NumInstances: 4
	NumFeatures: 3
	NumLabels: 4
	NumAttributes: 7
		Instance: 0 NumAttributes: 7
			Attribute 0 : 42.0
			Attribute 1 : -198.0
			Attribute 2 : -109.0
			Attribute 3 : 1.0
			Attribute 4 : 0.0
			Attribute 5 : 0.0
			Attribute 6 : 1.0
		Instance: 1 NumAttributes: 7
			Attribute 0 : 41.9
			Attribute 1 : -191.0
			Attribute 2 : -142.0
			Attribute 3 : 1.0
			Attribute 4 : 0.0
			Attribute 5 : 0.0
			Attribute 6 : 1.0
		Instance: 2 NumAttributes: 7
			Attribute 0 : 35.0
			Attribute 1 : 14.2
			Attribute 2 : 6.33
			Attribute 3 : 1.0
			Attribute 4 : 0.0
			Attribute 5 : 0.0
			Attribute 6 : 1.0
		Instance: 3 NumAttributes: 7
			Attribute 0 : 38.0
			Attribute 1 : 62.0
			Attribute 2 : 74.0
			Attribute 3 : 1.0
			Attribute 4 : 0.0
			Attribute 5 : 0.0
			Attribute 6 : 1.0
Bag: bag2
	NumInstances: 2
	NumFeatures: 3
	NumLabels: 4
	NumAttributes: 7
		Instance: 0 NumAttributes: 7
			Attribute 0 : 11.25
			Attribute 1 : -98.0
			Attribute 2 : 10.0
			Attribute 3 : 0.