In [98]:
from mlquantify.methods import *
import mlquantify as mq
from mlquantify.evaluation.protocol import APP
from sklearn.model_selection import train_test_split
import numpy as np
from joblib import Parallel, delayed
import os

In [99]:
FILES = os.listdir('datasets com score')
FILES

['bourbon.csv',
 'continental.csv',
 'foz_plaza.csv',
 'nadai.csv',
 'taroba.csv',
 'viale_cataratas.csv',
 'viale_tower.csv']

In [100]:
class HotelProtocol(APP):
    def __init__(self,     
                 models, 
                 batch_size,
                 learner = None, 
                 n_prevs = 50,
                 n_iterations = 1,
                 n_jobs = -1,
                 random_state = 32,
                 verbose = False,
                 return_type = "predictions",
                 measures = None):
        
        super().__init__(models=models,
                         batch_size=batch_size,
                         learner=learner,
                         n_jobs=n_jobs,
                         random_state=random_state,
                         verbose=verbose,
                         return_type=return_type,
                         measures=measures)
        self.n_prevs = n_prevs
        self.batch_size = batch_size if isinstance(batch_size, list) else [batch_size]
        self.n_prevs = n_prevs
        self.n_iterations = n_iterations
    
    
    def fit(self, X_train, y_train, train_scores):
        args = ((model, X_train, y_train, train_scores) for model in self.models)
        
        wrapper = tqdm if self.verbose else lambda x, **kwargs: x
    
        self.models = Parallel(n_jobs=self.n_jobs)(  # Parallel processing of models
            delayed(self._delayed_fit)(*arg) for arg in wrapper(args, desc="Fitting models", total=len(self.models))
        )
        return self
    
    
    def _predict(self, iteration, X, y, model, prev, batch_size, verbose):
        model_name = model.__class__.__name__
        if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
            model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"
        
        X_sample, _ = self._new_sample(X, y, prev, batch_size)
          
        posteriors_test = pd.DataFrame({
            '0': abs(1 - X_sample["score"]),
            '1': X_sample["score"]
        })
        
        X_sample = X_sample.drop(columns=["score"])
        
        y_pred = (posteriors_test.iloc[:, 1] > 0.5).astype(int)
        
        
        mq.set_arguments(posteriors_test=posteriors_test,
                      y_pred=y_pred)
        
        prev_pred = np.asarray(list(model.predict(X_sample).values()))
        
        return (iteration+1, model_name, prev, prev_pred, batch_size)
    
    
    
    
    
    def _delayed_fit(self, model, X_train, y_train, train_scores):
        model_name = model.__class__.__name__
        if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
            model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"

        y_train_pred = (train_scores.iloc[:, 1] > 0.5).astype(int)

        mq.set_arguments(posteriors_train=train_scores,
                        y_labels=y_train,
                        y_pred_train=y_train_pred
                        )
        model = model.fit(X=X_train, y=y_train)

        return model

In [101]:
final_table = pd.DataFrame()

mq.methods.METHODS.pop("ENSEMBLE", None)
# mq.methods.METHODS.pop("HDx", None)
# mq.methods.METHODS.pop("GAC", None)

for file in FILES:
    name = file.split('.')[0]
    dataset = pd.read_csv(f"datasets com score/{file}")
    dataset['date'] = pd.to_datetime(dataset['date'])
    dataset.sort_values('date', inplace=True)
    i = 0
    aspects = dataset["aspect"].unique()
    for aspect in aspects:
        print(f"Processing aspect {i+1}/{len(aspects)} -> {aspect}")
        i = i + 1
        df = dataset[dataset["aspect"] == aspect]
        
        if df['class'].value_counts().min() < 2:
            train_df, test_df = train_test_split(df, test_size=0.3, random_state=32)
        else:
            train_df, test_df = train_test_split(df, test_size=0.3, stratify=df['class'], random_state=32)

        X_train = train_df.drop(['class', 'score', 'date'], axis=1)
        y_train = train_df['class']
        X_test = test_df.drop(['class', 'date'], axis=1)
        y_test = test_df['class']

        print(f"Train samples: {len(y_train)}, Test samples: {len(y_test)}")

        train_scores = pd.DataFrame({
            '0': abs(1 - train_df["score"]),
            '1': train_df["score"]
        })

        batch_sizes = list(np.linspace(25, len(y_test), 5).astype(int))

        mq.ARGUMENTS_SETTED = True
        hotel_protocol = HotelProtocol(models="all",
                                    n_prevs=50,
                                    batch_size=batch_sizes,
                                    return_type="table",
                                    measures=["mae", "rae"])
        
        hotel_protocol.fit(X_train, y_train, train_scores=train_scores)

        table = hotel_protocol.predict(X_test, y_test)
        
        table["hotel"] = name
        table["aspect"] = aspect

        final_table = pd.concat([final_table, table], ignore_index=True)

final_table.to_csv("quantification_results.csv", index=False)
final_table.head()


Processing aspect 1/5 -> Atendimento da equipe
Train samples: 367, Test samples: 158


Running APP: 100%|██████████| 4500/4500 [00:55<00:00, 80.70it/s]


Processing aspect 2/5 -> Quarto
Train samples: 510, Test samples: 219


Running APP: 100%|██████████| 4500/4500 [00:58<00:00, 77.35it/s]


Processing aspect 3/5 -> Café da manhã
Train samples: 266, Test samples: 115


Running APP: 100%|██████████| 4500/4500 [00:55<00:00, 80.71it/s]


Processing aspect 4/5 -> Localização
Train samples: 142, Test samples: 61


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 86.66it/s]


Processing aspect 5/5 -> Áreas comuns
Train samples: 158, Test samples: 68


Running APP: 100%|██████████| 4500/4500 [00:52<00:00, 86.33it/s]


Processing aspect 1/2 -> Quarto
Train samples: 203, Test samples: 88


Running APP: 100%|██████████| 4500/4500 [00:52<00:00, 85.39it/s]


Processing aspect 2/2 -> Atendimento da equipe
Train samples: 153, Test samples: 66


Running APP: 100%|██████████| 4500/4500 [00:52<00:00, 86.10it/s]


Processing aspect 1/3 -> Áreas comuns
Train samples: 140, Test samples: 61


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 86.59it/s]


Processing aspect 2/3 -> Quarto
Train samples: 233, Test samples: 100


Running APP: 100%|██████████| 4500/4500 [00:53<00:00, 84.40it/s]


Processing aspect 3/3 -> Atendimento da equipe
Train samples: 145, Test samples: 63


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 86.55it/s]


Processing aspect 1/4 -> Atendimento da equipe
Train samples: 226, Test samples: 97


Running APP: 100%|██████████| 4500/4500 [00:53<00:00, 84.37it/s]


Processing aspect 2/4 -> Quarto
Train samples: 293, Test samples: 126


Running APP: 100%|██████████| 4500/4500 [00:54<00:00, 82.72it/s]


Processing aspect 3/4 -> Áreas comuns
Train samples: 182, Test samples: 79


Running APP: 100%|██████████| 4500/4500 [00:52<00:00, 85.87it/s]


Processing aspect 4/4 -> Café da manhã
Train samples: 147, Test samples: 64


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 86.56it/s]


Processing aspect 1/8 -> Restaurante/Bar
Train samples: 340, Test samples: 146


Running APP: 100%|██████████| 4500/4500 [00:55<00:00, 81.66it/s]


Processing aspect 2/8 -> Atendimento da equipe
Train samples: 1388, Test samples: 596


Running APP: 100%|██████████| 4500/4500 [01:12<00:00, 62.21it/s]


Processing aspect 3/8 -> Experiência
Train samples: 307, Test samples: 132


Running APP: 100%|██████████| 4500/4500 [00:54<00:00, 82.63it/s]


Processing aspect 4/8 -> Café da manhã
Train samples: 553, Test samples: 238


Running APP: 100%|██████████| 4500/4500 [00:58<00:00, 76.47it/s]


Processing aspect 5/8 -> Quarto
Train samples: 1109, Test samples: 476


Running APP: 100%|██████████| 4500/4500 [01:08<00:00, 65.71it/s]


Processing aspect 6/8 -> Banheiro
Train samples: 284, Test samples: 123


Running APP: 100%|██████████| 4500/4500 [00:53<00:00, 83.63it/s]


Processing aspect 7/8 -> Áreas comuns
Train samples: 737, Test samples: 316


Running APP: 100%|██████████| 4500/4500 [01:01<00:00, 72.86it/s]


Processing aspect 8/8 -> Localização
Train samples: 209, Test samples: 90


Running APP: 100%|██████████| 4500/4500 [00:52<00:00, 85.30it/s]


Processing aspect 1/6 -> Café da manhã
Train samples: 122, Test samples: 53


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 87.87it/s]


Processing aspect 2/6 -> Experiência
Train samples: 104, Test samples: 45


Running APP: 100%|██████████| 4500/4500 [00:50<00:00, 88.32it/s]


Processing aspect 3/6 -> Atendimento da equipe
Train samples: 155, Test samples: 67


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 86.72it/s]


Processing aspect 4/6 -> Quarto
Train samples: 303, Test samples: 130


Running APP: 100%|██████████| 4500/4500 [00:54<00:00, 83.10it/s]


Processing aspect 5/6 -> Restaurante/Bar
Train samples: 103, Test samples: 45


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 88.18it/s]


Processing aspect 6/6 -> Áreas comuns
Train samples: 129, Test samples: 56


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 87.58it/s]


Processing aspect 1/6 -> Experiência
Train samples: 113, Test samples: 49


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 88.04it/s]


Processing aspect 2/6 -> Atendimento da equipe
Train samples: 280, Test samples: 120


Running APP: 100%|██████████| 4500/4500 [00:53<00:00, 83.73it/s]


Processing aspect 3/6 -> Áreas comuns
Train samples: 327, Test samples: 141


Running APP: 100%|██████████| 4500/4500 [00:54<00:00, 82.52it/s]


Processing aspect 4/6 -> Quarto
Train samples: 398, Test samples: 171


Running APP: 100%|██████████| 4500/4500 [00:55<00:00, 80.66it/s]


Processing aspect 5/6 -> Café da manhã
Train samples: 144, Test samples: 63


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 87.35it/s]


Processing aspect 6/6 -> Localização
Train samples: 131, Test samples: 57


Running APP: 100%|██████████| 4500/4500 [00:51<00:00, 87.57it/s]


Unnamed: 0,ITERATION,QUANTIFIER,REAL_PREVS,PRED_PREVS,BATCH_SIZE,mae,rae,hotel,aspect
0,1,CC,"[0.0, 1.0]","[0.0, 1.0]",25,0.0,0.0,bourbon,Atendimento da equipe
1,1,PCC,"[0.0, 1.0]","[0.10520459559999999, 0.8947954044]",25,0.1045429,16.67476,bourbon,Atendimento da equipe
2,1,EMQ,"[0.0, 1.0]","[1.6213530263944888e-14, 0.9999999999999838]",25,1.610496e-14,2.568766e-12,bourbon,Atendimento da equipe
3,1,FM,"[0.0, 1.0]","[0.0, 1.0]",25,0.0,0.0,bourbon,Atendimento da equipe
4,1,GPAC,"[0.0, 1.0]","[0.0, 1.0]",25,0.0,0.0,bourbon,Atendimento da equipe
