In [1]:
from Clasificador import ClasificadorNaiveBayes
from Datos import Datos
from EstrategiaParticionado import ValidacionCruzada, ValidacionSimple
from sklearn.naive_bayes import GaussianNB, MultinomialNB, CategoricalNB
from sklearn.model_selection import cross_val_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

german = Datos("ConjuntosDatos/german.data")
ttt = Datos("ConjuntosDatos/tic-tac-toe.data")

# 1. Naive Bayes

## 1.1 Tablas para los distintos porcentajes de Test y K

In [2]:
def test(times:int, testPercentage:int, cruzada:bool, data):
    particionado = ValidacionCruzada(int(100/testPercentage)) if cruzada else ValidacionSimple(testPercentage, int(100/testPercentage))
    cnv1 = ClasificadorNaiveBayes()
    error = []
    errorCLP = []

    for _ in range(times):
        e, eCLP = cnv1.validacion(particionado, data, cnv1)
        error.append(e)
        errorCLP.append(eCLP)

    error = np.array(error)
    errorCLP = np.array(errorCLP)
    return np.mean(error), np.mean(errorCLP), np.std(error), np.std(errorCLP)

def testPrint(times:int, testPercentage:int, german, ttt):
    mediaTTTVC, mediaTTTCLPVC, stdTTTVC, stdTTTCLPVC = test(times, testPercentage, True, ttt)
    mediaTTTVS, mediaTTTCLPVS, stdTTTVS, stdTTTCLPVS = test(times, testPercentage, False, ttt)
    mediaGermanVC, mediaGermanCLPVC, stdGermanVC, stdGermanCLPVC = test(times, testPercentage, True, ttt)
    mediaGermanVS, mediaGermanCLPVS, stdGermanVS, stdGermanCLPVS = test(times, testPercentage, False, ttt)
    print(f"Test {testPercentage}% / K = {int(100/testPercentage)}\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
    print(f"Validacion cruzada\t\t{mediaGermanVC:2f}\t{stdGermanVC:2f}\t{mediaTTTVC:2f}\t\t{stdTTTVC:2f}")
    print(f"Validacion cruzada (Laplace)\t{mediaGermanCLPVC:2f}\t{stdGermanCLPVC:2f}\t{mediaTTTCLPVC:2f}\t\t{stdTTTCLPVC:2f}")
    print(f"Validacion simple\t\t{mediaGermanVS:2f}\t{stdGermanVS:2f}\t{mediaTTTVS:2f}\t\t{stdTTTVS:2f}")
    print(f"Validacion simple (Laplace)\t{mediaGermanCLPVS:2f}\t{stdGermanCLPVS:2f}\t{mediaTTTCLPVS:2f}\t\t{stdTTTCLPVS:2f}\n\n")

for i in range(5, 55, 5):
    testPrint(10, i, german, ttt)

Test 5% / K = 20		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.130376	0.001403	0.130602		0.001284
Validacion cruzada (Laplace)	0.130376	0.001343	0.130330		0.001534
Validacion simple		0.129968	0.005587	0.127524		0.005229
Validacion simple (Laplace)	0.130014	0.005624	0.127569		0.005697


Test 10% / K = 10		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.031989	0.000310	0.031956		0.000457
Validacion cruzada (Laplace)	0.031911	0.000321	0.031934		0.000434
Validacion simple		0.032632	0.001786	0.032166		0.001796
Validacion simple (Laplace)	0.032598	0.001754	0.032211		0.001854


Test 15% / K = 6		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.011305	0.000162	0.011269		0.000180
Validacion cruzada (Laplace)	0.011297	0.000158	0.011238		0.000186
Validacion simple		0.012578	0.000847	0.012539		0.000619
Validacion simple (Laplace)	0.012534	0.000815	0.012534		0.000618


Test 20% 

## 1.2 Análisis

# 2. Scikit-Learn
## MultinomialNB


In [4]:
mnb = MultinomialNB(fit_prior=True)
mnbCLP = MultinomialNB(fit_prior=True, alpha=1)

tttX = ttt.datos[:,[i for i in range(ttt.datos.shape[1]-1)]]
ttty = ttt.datos[:,-1]
germanX = german.datos[:,[i for i in range(german.datos.shape[1]-1)]]
germany = german.datos[:,-1]

print("Validación cruzada")
for i in range(2, 11):
    tttScore = cross_val_score(mnb, tttX, ttty, cv=i)
    tttScoreCLP = cross_val_score(mnbCLP, tttX, ttty, cv=i)
    germanScore = cross_val_score(mnb, germanX, germany, cv=i)
    germanScoreCLP = cross_val_score(mnbCLP, germanX, germany, cv=i)
    print(f"Test K = {i}\t\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
    print(f"Validacion cruzada\t\t{1-germanScore.mean():2f}\t{germanScore.std():2f}\t{1-tttScore.mean():2f}\t\t{tttScore.std():2f}")
    print(f"Validacion cruzada (Laplace)\t{1-germanScoreCLP.mean():2f}\t{germanScoreCLP.std():2f}\t{1-tttScoreCLP.mean():2f}\t\t{tttScoreCLP.std():2f}\n")
    #print(f"Validacion simple\t\t{1-mediaGermanVS:2f}\t{1-stdGermanVS:2f}\t{1-mediaTTTVS:2f}\t\t{1-stdTTTVS:2f}")
    #print(f"Validacion simple (Laplace)\t{1-mediaGermanCLPVS:2f}\t{1-stdGermanCLPVS:2f}\t{1-mediaTTTCLPVS:2f}\t\t{1-stdTTTCLPVS:2f}\n\n")


Validación cruzada
Test K = 2			German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.360000	0.020000	0.366388		0.032359
Validacion cruzada (Laplace)	0.360000	0.020000	0.366388		0.032359

Test K = 3			German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.363028	0.022480	0.351757		0.012084
Validacion cruzada (Laplace)	0.363028	0.022480	0.351757		0.012084

Test K = 4			German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.360000	0.030067	0.350702		0.014643
Validacion cruzada (Laplace)	0.360000	0.030067	0.350702		0.014643

Test K = 5			German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.360000	0.033912	0.345490		0.011413
Validacion cruzada (Laplace)	0.360000	0.033912	0.345490		0.011413

Test K = 6			German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.361993	0.027692	0.346541		0.010727
Validacion cruzada (Laplace)	0.36