<a href="https://colab.research.google.com/github/p82maavd/MIML/blob/main/demo_MIML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [7]:
import numpy as np
import requests
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import hamming_loss


# Obtención de datasets

In [6]:
toy = requests.get('https://drive.google.com/uc?export=download&confirm=9_s_&id=' + '1WDnCfmX20-Oyc7wowTqh6Xvl_tTgok-Z').text
miml_birds = requests.get('https://drive.google.com/uc?export=download&confirm=9_s_&id=' + '1FVf-cJQxNBQnKss1GMuLcDmQtD0qoRJ5').text
miml_birds_train = requests.get('https://drive.google.com/uc?export=download&confirm=9_s_&id=' + '1BCt6Ohz3x3tFYfrjFfvlNV0sovkEmLn8').text
miml_birds_test = requests.get('https://drive.google.com/uc?export=download&confirm=9_s_&id=' + '108SLl5pog-V1nkhAgJxp2aFmVJVl4azP').text

# Clase para gestionar datasets MIML en .arff

In [8]:
class MIMLArff:
    def __init__(self) -> None:
        self.name = "undefined"
        self.attributes = []
        self.data = dict()
        self.numberlabels=0

    def setName(self, name):
        self.name = name
    def getName(self):
        return self.name

    def setAttributes(self, attributes):
        self.attributes=attributes
    def getAttributes(self):
        return self.attributes

    def setNumberLabels(self, labels):
        self.numberlabels=labels
    def getNumberLabels(self):
        return self.numberlabels

    def addBag(self,key,values,labels):
        self.data[key]=(values,labels)

    def showArff(self):
        #TODO: Improve this to table style
        for keys,values in self.data.items():
            print(keys)
            print(values)


# Lectura datasets

In [35]:
def arffMIMLReader(file,delimiter="'"):

    arff = MIMLArff()
    #arff_file = open(file)
    file=file.split("\n")
    attrib=[]
    flag=0
    for line in file:

            #Comprobamos que la cadena no contenga espacios en blanco a la izquierda ni que sea vacía
            line=line.lstrip()
            if line=="":
                continue


            if line.startswith("@"):
                if not (line.startswith("%")):
                    if line.startswith("@relation"):
                        arff.setName(line[line.find(" ")+1:])
                    elif line.startswith("@attribute bag relational"):
                        flag = 1
                    elif line.startswith("@end bag"):
                        flag = 0
                    elif flag==1:
                        if line.startswith("@attribute"):

                            attrib.append(line[line.find(" ")+1:line.find(" ",line.find(" ")+1)])


            else:
                #Eliminanos el salto de línea del final de la cadena
                line=line.strip("\n")

                #Asumimos que el primer elemento de cada instancia es el identificador de la bolsa
                key = line[0:line.find(",")]
                #print("Key: ", key)

                #Empiezan los datos de la bolsa cuando encontremos la primera '"' y terminan con la segunda '"'
                line=line[line.find(delimiter)+1:]
                #TODO: Testeo con toy.arff
                values = line[:line.find(delimiter,line.find(delimiter,line.find(delimiter)))]
                #Separamos los valores por instancias de la bolsa
                values=values.split("\\n")
                #print("Values ", values)

                #El resto de la cadena se trata de las etiquetas
                labels=line[line.find(delimiter,line.find(delimiter,line.find(delimiter)))+2:]
                arff.setNumberLabels(len(labels.split(",")))
                #print("Labels: ", labels)

                valueslist=[]
                for v in values:
                    valueslist.append(np.array([float(i) for i in v.split(',')]))

                arff.addBag(key,np.array(valueslist),np.array([int(i) for i in labels.split(',')]))
                    #TODO: añadir gestion atributos
                    #TODO: quizas separar en funciones para data y para atributos
                    #TODO: incluso diccionario aparte para stats

    arff.setAttributes(attrib)
    return arff

# Transformación MIML a ML

In [31]:
def convertToMultiLabel(arff):
    #TODO: Media aritmetica, geometrica, min-max
    x = np.empty(shape=(len(arff.data.keys()),len(arff.getAttributes())))
    y = np.empty(shape=(len(arff.data.keys()),arff.getNumberLabels()))
    count=0
    for keys,pattern in arff.data.items():
            newinstance=np.empty(pattern[0][0].shape[0])
            for instance in pattern[0]:
                newinstance+=instance
            newinstance/=pattern[0].shape[0]
            #print(newinstance)
            x[count]=newinstance
            y[count]=pattern[1]
            count+=1
    #print("X Data: ",x)
    #print("Y Data: ",y)

    return x,y

# Demo de ejecución

In [36]:
birdstrain=arffMIMLReader(miml_birds_train)
X_train,y_train = convertToMultiLabel(birdstrain)

birdstest=arffMIMLReader(miml_birds_test)
X_test, y_test= convertToMultiLabel(birdstest)


classifier = MultiOutputClassifier(RandomForestClassifier(random_state=27))
classifier.fit(X_train, y_train)

# Predicciones
y_pred = classifier.predict(X_test)

# Evaluación del modelo
print("Reporte de clasificación:\n", classification_report(y_test, y_pred, zero_division=0))

#print("Y TEST:",y_test)
#print("Y Pred:",y_pred)

print('Hamming Loss: ', round(hamming_loss(y_test, y_pred),2))

Reporte de clasificación:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.92      0.75      0.83        16
           2       1.00      0.25      0.40         4
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         0
           6       1.00      0.60      0.75         5
           7       0.00      0.00      0.00         7
           8       0.80      0.40      0.53        10
           9       0.00      0.00      0.00        10
          10       0.62      0.29      0.40        17
          11       1.00      0.33      0.50         3
          12       0.50      0.33      0.40         3
          13       0.00      0.00      0.00         3
          14       1.00      0.14      0.25         7
          15       0.00      0.00      0.00         3
          16       0.00      0.00      0.00         1
