In [6]:
from Clasificador import ClasificadorNaiveBayes
from Datos import Datos
from EstrategiaParticionado import ValidacionCruzada, ValidacionSimple
from sklearn.naive_bayes import GaussianNB, MultinomialNB, CategoricalNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

german = Datos("ConjuntosDatos/german.data")
ttt = Datos("ConjuntosDatos/tic-tac-toe.data")

# 1. Naive Bayes

## 1.1 Tablas para los distintos porcentajes de Test y K

In [42]:
def test(times:int, testPercentage:int, cruzada:bool, data):
    particionado = ValidacionCruzada(int(100/testPercentage)) if cruzada else ValidacionSimple(testPercentage, int(100/testPercentage))
    cnv1 = ClasificadorNaiveBayes()
    error = []
    errorCLP = []

    for _ in range(times):
        e, eCLP = cnv1.validacion(particionado, data, cnv1)
        error.append(e)
        errorCLP.append(eCLP)

    error = np.array(error)
    errorCLP = np.array(errorCLP)
    return np.mean(error), np.mean(errorCLP), np.std(error), np.std(errorCLP)

def testPrint(times:int, testPercentage:int, german, ttt):
    mediaTTTVS, mediaTTTCLPVS, stdTTTVS, stdTTTCLPVS = test(times, testPercentage, False, ttt)
    mediaGermanVS, mediaGermanCLPVS, stdGermanVS, stdGermanCLPVS = test(times, testPercentage, False, ttt)
    if testPercentage < 40:
        mediaGermanVC, mediaGermanCLPVC, stdGermanVC, stdGermanCLPVC = test(times, testPercentage, True, ttt)
        mediaTTTVC, mediaTTTCLPVC, stdTTTVC, stdTTTCLPVC = test(times, testPercentage, True, ttt)
        print(f"Test {testPercentage}% / K = {int(100/testPercentage)}\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
        print(f"Validacion cruzada\t\t{mediaGermanVC:2f}\t{stdGermanVC:2f}\t{mediaTTTVC:2f}\t\t{stdTTTVC:2f}")
        print(f"Validacion cruzada (Laplace)\t{mediaGermanCLPVC:2f}\t{stdGermanCLPVC:2f}\t{mediaTTTCLPVC:2f}\t\t{stdTTTCLPVC:2f}")
    else:
        print(f"Test {testPercentage}%\t\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
    print(f"Validacion simple\t\t{mediaGermanVS:2f}\t{stdGermanVS:2f}\t{mediaTTTVS:2f}\t\t{stdTTTVS:2f}")
    print(f"Validacion simple (Laplace)\t{mediaGermanCLPVS:2f}\t{stdGermanCLPVS:2f}\t{mediaTTTCLPVS:2f}\t\t{stdTTTCLPVS:2f}\n\n")

    
def error(datos, pred):
    errores = 0
    for i in range(datos.shape[0]):
        if datos[i][-1] != pred[i]:
            errores += 1
    return (errores/datos.shape[0])


def test_VS(X, y, times, testSize, model):
    mediaError = 0
    errores = np.zeros(times)
    for i in range(times):
        XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=testSize)
        model.fit(XTrain, yTrain)
        yPred = model.predict(XTest)
        errores[i] = error(XTest, yPred)
    return 1-np.mean(errores), np.std(errores)

for i in range(5, 55, 5):
    testPrint(10, i, german, ttt)

Test 5% / K = 20		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.306596	0.003586	0.306915		0.002141
Validacion cruzada (Laplace)	0.306383	0.003528	0.306383		0.002282
Validacion simple		0.299787	0.014562	0.303085		0.015811
Validacion simple (Laplace)	0.299787	0.014271	0.302872		0.016043


Test 10% / K = 10		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.303474	0.003297	0.300842		0.004258
Validacion cruzada (Laplace)	0.302632	0.003304	0.300211		0.004386
Validacion simple		0.310105	0.010860	0.305053		0.011644
Validacion simple (Laplace)	0.309895	0.011398	0.305053		0.012165


Test 15% / K = 6		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.296017	0.006116	0.298323		0.005245
Validacion cruzada (Laplace)	0.295702	0.005956	0.297484		0.005174
Validacion simple		0.307459	0.012551	0.305478		0.016503
Validacion simple (Laplace)	0.305361	0.012767	0.304895		0.016875


Test 20% 

## 1.2 Análisis

# 2. Scikit-Learn
## MultinomialNB


In [43]:
tttX = ttt.datos[:,[i for i in range(ttt.datos.shape[1]-1)]]
ttty = ttt.datos[:,-1]
germanX = german.datos[:,[i for i in range(german.datos.shape[1]-1)]]
germany = german.datos[:,-1]

for i in range(5, 55, 5):
    mnb = MultinomialNB(fit_prior=True)
    mnbCLP = MultinomialNB(fit_prior=True, alpha=1)
    
    tttScoreVS = test_VS(tttX, ttty, 10, i/100, mnb)
    tttScoreVSCLP = test_VS(tttX, ttty, 10, i/100, mnbCLP)
    germanScoreVS = test_VS(germanX, germany, 10, i/100, mnb)
    germanScoreVSCLP = test_VS(germanX, germany, 10, i/100, mnbCLP)
    if i < 40:
        tttScore = cross_val_score(mnb, tttX, ttty, cv=int(100/i))
        tttScoreCLP = cross_val_score(mnbCLP, tttX, ttty, cv=int(100/i))
        germanScore = cross_val_score(mnb, germanX, germany, cv=int(100/i))
        germanScoreCLP = cross_val_score(mnbCLP, germanX, germany, cv=int(100/i))
        print(f"Test {i}% / K = {int(100/i)}\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
        print(f"Validacion cruzada\t\t{1-germanScore.mean():2f}\t{germanScore.std():2f}\t{1-tttScore.mean():2f}\t\t{tttScore.std():2f}")
        print(f"Validacion cruzada (Laplace)\t{1-germanScoreCLP.mean():2f}\t{germanScoreCLP.std():2f}\t{1-tttScoreCLP.mean():2f}\t\t{tttScoreCLP.std():2f}")
    else:
        print(f"Test {i}%\t\t\tGerman - Media\tGerman - std\tTic-Tac-Toe - Media\tTic-Tac-Toe - std")
    print(f"Validacion simple\t\t{1-germanScoreVS[0]:2f}\t{germanScoreVS[1]:2f}\t{tttScoreVS[0]:2f}\t\t{tttScoreVS[1]:2f}")
    print(f"Validacion simple (Laplace)\t{1-germanScoreVSCLP[0]:2f}\t{germanScoreVSCLP[1]:2f}\t{tttScoreVSCLP[0]:2f}\t\t{tttScoreVSCLP[1]:2f}\n\n")


Test 5% / K = 20		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.361000	0.068257	0.342376		0.009842
Validacion cruzada (Laplace)	0.361000	0.068257	0.342376		0.009842
Validacion simple		0.346000	0.074860	0.350000		0.047324
Validacion simple (Laplace)	0.328000	0.056000	0.322917		0.047735


Test 10% / K = 10		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.361000	0.059908	0.343421		0.005261
Validacion cruzada (Laplace)	0.361000	0.059908	0.343421		0.005261
Validacion simple		0.354000	0.040546	0.356250		0.035722
Validacion simple (Laplace)	0.337000	0.045177	0.361458		0.045655


Test 15% / K = 6		German - Media	German - std	Tic-Tac-Toe - Media	Tic-Tac-Toe - std
Validacion cruzada		0.361993	0.027692	0.346541		0.010727
Validacion cruzada (Laplace)	0.361993	0.027692	0.346541		0.010727
Validacion simple		0.336000	0.032823	0.359028		0.024660
Validacion simple (Laplace)	0.350000	0.026708	0.345139		0.025993


Test 20% 