# Files in a Flash API

In [10]:
import math
import os


def files_in_a_flash(path):
    """ Sorts files in the 'unsorted' directory (see notes) by theme.

    Parameters
    ----------
    path : The path to the main directory. (str)

    Notes
    -----
    The main directory should countain :
        - labels.txt
        - a sub-directory named 'sorted' that countains the sorted files to learn from.
        - a sub-directory named 'unsorted' that countains the files to sort.
    """

    #Learning
    print('Learning...')
    frequencies = get_frequencies('%s/sorted' % path)

    #Sorting
    print('Learning done. Sorting...')

    #For each file in unsorted
    file_counter = 1
    nb_files = len(os.listdir('%s/unsorted' % path))
    for current_file in os.listdir('%s/unsorted' % path):

        #Compute the probability that it belongs to each theme
        word_list = get_words('%s/unsorted/%s' % (path, current_file))
        theme_probs = {}
        for theme in frequencies:
            theme_probs[theme] = get_theme_prob(frequencies[theme], word_list)

        #Get the max
        max = list(theme_probs.keys())[0] #Get a theme to begin with
        max_value = theme_probs[max]
        for theme in theme_probs:
            if theme_probs[theme] > max_value:
                max = theme
                max_value = theme_probs[theme]


        #Print result
        print('File %s: %s (%d/%d)' % (current_file, max, file_counter, nb_files))
        file_counter += 1

        #Move the file in the corresponding directory
        if os.path.exists('%s/sorted/%s/%s' % (path, max, current_file)):
            #Conflict
            print('Warning : %s already exists in %s. The file wasn\'t moved.' % (current_file, max))
        else:
            os.rename('%s/unsorted/%s' % (path, current_file), '%s/sorted/%s/%s' % (path, max, current_file))


def get_words(path):
    """ Creates a list of all useful words in the given text file.

    Parameters
    ----------
    path : the path to the .txt file to read (str).

    Returns
    -------
    words : a list of all useful words in the text file.
    """

    # Get text from the given file.
    current_file = open(path, 'r', encoding='ISO-8859-1')
    text = current_file.read().lower()
    current_file.close()

        #Define the useless words/elements.
    useless_elements = ['\n', '/', '\t', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',',
                        '"', '?', '!', '.', ';', '-', '\'', '(', ')', '@', '>', '<', ':', '*', '}',
                        '{', '[', ']', '$', '^', '~', '#', '&', '%', '+', '_', '=', '€', '£',
                        ' you ', ' they ', ' she ', ' he ', ' it ', ' your ', ' their ', ' we ',
                        ' is ', ' are ', ' have ', ' has ', ' or ', ' and ', ' an ', ' the ',
                        ' of ', ' in ', ' to ', ' if ', ' to ', ' for ', ' them ', ' my ', ' me ',
                        ' its ', ' yours ', ' as ', 'from ', ' by ', ' on ', ' will ', ' not ',
                        ' no ', ' yes ', ' any ', ' be ', ' etc ', ' but ', ' would ', ' been ',
                        ' had ', ' this ', ' off ', ' up ', ' down ', ' right ', ' left ', ' per ',
                        ' year ', ' am ', ' all ', ' his ', ' her ', ' our ', ' their ', ' at ',
                        ' who ', ' can ', ' very ', ' much ', ' know ', ' how ', ' get ', ' just ',
                        ' thanks ', ' where ', ' out ', ' that ', ' with ', ' last ', ' few ',
                        ' so ', ' two ', ' one ', ' three ', ' four ', ' five ', ' six ', ' com ',
                        ' seven ', ' eight ', ' nine ', ' ten ', ' eleven ', ' most ', ' must ',
                        ' some ', ' need ', ' most ', ' january ', ' february ', ' march ',
                        ' april ', ' may ', ' june ', ' july ', ' august ', ' september ', ' why ',
                        ' october ', ' november ', ' december ', ' old ', ' re ', ' subject ',
                        ' tough ', ' best ', ' good ', ' was ', ' what ', ' lot ', ' every ',
                        ' other ', ' while ', ' day ', ' ever ', ' about ', ' said ', ' let ',
                        ' since ', ' also ', ' say ', ' did ', ' here ', ' there ', ' nor ',
                        ' going ', ' fact ', ' us ', ' do ', ' feel ', ' only ', ' thing ',
                        ' which ', ' bit ', ' luck ', ' ll ', ' great ', ' enough ', ' first ',
                        ' between ', ' reply ', ' when ', ' does ', ' such ', 'therefore', ' more ',
                        ' less ', ' than ', ' whatever ', ' however ', ' furthermore ', ' these ',
                        ' often ', ' already ', ' never ', ' always ', ' least ', ' go ', ' well ',
                        ' something ', ' somewhere ', ' somewhat ', ' mean ', ' too ', ' into ',
                        ' between ', ' both ', ' over ', ' more ', ' somebody ', ' someone ',
                        ' none ', ' non ', ' same ', ' those ', ' might ', ' monday ', ' don ',
                        ' dunno ', ' wrong ', ' true ', ' false ', ' after ', ' today ', ' itself ',
                        ' myself ', ' yourself ', ' himself ', ' herself ', ' ourselves ',
                        ' themselves ', ' self ', ' yesterday ', ' tomorrow ', ' monday ', 'tuesday ',
                        ' wednesday ', ' because ', ' sorry ', ' thursday ', ' friday ', ' saturday ',
                        ' sunday ', ' time ', ' like ', ' likes ', ' soon ', ' enough ', ' around ',
                        ' next ', ' neither ', ' either ', ' else ', ' anybody ', ' anyone ',
                        ' month ', ' week ', ' weekend ', ' end ', ' start ', ' yet ', ' yep ',
                        ' until ', ' till ', ' then ', ' away ', ' bad ', ' good ', ' high ',
                        ' below ', ' good ', ' bad ', ' years ', ' ago ', ' later ', ' better ',
                        ' same ', ' under ', ' actually ', ' fine ', ' bother ', ' cannot ',
                        ' case ', ' fact ', ' should ', ' wouldn ', ' far ', ' think ', ' thought ',
                        ' without ', ' instead ', ' now ', ' times ', ' great ', ' afternoon ',
                        ' noon ', ' night ', ' midnight ', ' front ', ' several ', ' another ',
                        ' inc ', ' were ', ' earlier ', ' early ', ' ask ', ' asked ', ' asks ',
                        ' make ', ' makes ', ' made ']

    #Replace each useless element by a space.
    for element in useless_elements:
        text = str.replace(text, element, ' ')

    #Put all words in a list.
    words_list = text.split(' ')

    #Create and return a list of every useful words.
    useful_list = []
    for word in words_list:
        if not word in useful_list and len(word) > 1:
            useful_list.append(word)
    return useful_list


def get_frequencies(path):
    """ Creates a dictionary that countains the frequency of each useful word in each theme.

    Parameters
    ----------
    path: the path to the sorted directory (str).

    Returns
    -------
    frequencies : a dictionary of format { theme (str) : theme_frequencies (dict) }.

    Notes
    -----
    Each theme_frequencies dictionary is of format { word (str) : frequency (float) }.
    """

    #Initialize the frequencies dictionary
    frequencies = {}

    #For each theme
    for theme in os.listdir(path):

        #Create the theme_frequencies dictionary
        frequencies[theme] = {}

        #For each word of each file of this theme
        for current_file in os.listdir('%s/%s' % (path, theme)):
            for word in get_words('%s/%s/%s' % (path, theme, current_file)):

                # Add 1 to the number of occurences of word in this theme
                if word in frequencies[theme]:
                    frequencies[theme][word] += 1
                else:
                    frequencies[theme][word] = 1

    #Equalize every theme_frequencies dictionary
    check_differences(frequencies)

    #Divide the frequency of each file in each theme by the number of files in that theme
    for theme in frequencies:
        nb_files = len(os.listdir('%s/%s' % (path, theme)))
        for word in frequencies[theme]:
            frequencies[theme][word] /= nb_files

    return frequencies


def check_differences(frequencies):
    """ Checks the given frequencies in order to have the same word list in each theme.

    Parameters
    ----------
    frequencies : a dictionary of format { theme (str) : theme_frequencies (dict) }.

    Notes
    -----
    The frequencies dictonary is edited: the theme_frequencies dictionaries may be lenghtened.
    All theme_frequencies dictionaries should be of the same size afterwards.
    Each theme_frequencies dictionary is of format { word (str) : frequency (float) }.

    See also
    --------
    get_frequencies to create the frequencies dictionary.
    """

    checked_words = []

    # For each word
    for theme in frequencies:
        for word in frequencies[theme]:

            #Avoid to check the same word several times
            if not word in checked_words:

                #Check every other theme to see if it countains the word
                for other_theme in frequencies:

                    if other_theme != theme and not word in frequencies[other_theme]:
                        #Add the word
                        frequencies[other_theme][word] = 1

                #add the word to the list so it's not checked anymore
                checked_words.append(word)


def get_theme_prob(theme_frequencies, list_words):
    """Computes the probability that the word list matches the given theme.

    Parameters
    ----------
    theme_frequencies: a dictionary of format { word (str) : frequency (float) }
    list_words: a list of useful words from a text file (str).

    Returns
    -------
    probability: the probability that the file belongs to the given theme. (float)

    See also
    --------
    get_words to get the list of the useful words in a text file.
    get_frequencies and check_frequencies to create the theme_frequencies dictionary.

    :Example:
        >>> get_theme_prob({'apple' : 0.5, 'gun' : 0.01, 'banana': 0.2 }, ['apple', 'banana', 'eat'])
        -1.00436480540245
    """

    probability = 0

    #For each word
    for word in theme_frequencies:

        #Add the log of the probability that this word is/isn't in this theme
        if word in list_words:
            probability += math.log10(theme_frequencies[word])
        elif theme_frequencies[word] < 1: #Check if the frequency is in the domain of log
            probability += math.log10(1 - theme_frequencies[word])
        else:
            probability -= 999999 #log(0) -> minus infinity

    return probability


def check_accuracy(path):
    """ Checks if the files are correctly sorted in  and prints the accuracy.

    Parameters
    ----------
    path : The path (str) to the main directory.

    Notes
    -----
    The main directory should countain :
        - labels.txt
        - a sub-directory named 'sorted' that countains the sorted files.
        - a sub-directory named 'unsorted' that countains the files to sort. (should be empty)
    """

    #Read the solution from labels.txt
    label_file = open('%s/labels.txt' % path, 'r', encoding='ISO-8859-1')
    lines = label_file.readlines()
    label_file.close()

    #Initialize a counter
    correct_answers = 0

    #Count every correct answer
    for line in lines:
        solution = line.strip('\n').split(' ')
        if os.path.exists('%s/sorted/%s/%s' % (path, solution[1], solution[0])):
            correct_answers += 1

    #Display result
    print('Accuracy : %.3f' % (correct_answers/len(lines)))

## Testing with doctest 

In [11]:
 #Testing with doctest
import doctest
doctest.testmod()

TestResults(failed=0, attempted=1)

# Example n°1 of Use Case

In [31]:
%%time
#Sort the files in archive 1 (two different themes)
files_in_a_flash('./archive_1')

Learning...
Learning done. Sorting...
File 38758: comp.graphics (1/753)
File 38761: comp.graphics (2/753)
File 38762: comp.graphics (3/753)
File 38763: comp.graphics (4/753)
File 38764: comp.graphics (5/753)
File 38765: comp.graphics (6/753)
File 38766: comp.graphics (7/753)
File 38767: comp.graphics (8/753)
File 38768: comp.graphics (9/753)
File 38769: comp.graphics (10/753)
File 38770: comp.graphics (11/753)
File 38771: comp.graphics (12/753)
File 38772: comp.graphics (13/753)
File 38773: comp.graphics (14/753)
File 38774: comp.graphics (15/753)
File 38775: comp.graphics (16/753)
File 38776: comp.graphics (17/753)
File 38777: comp.graphics (18/753)
File 38778: comp.graphics (19/753)
File 38779: comp.graphics (20/753)
File 38780: comp.graphics (21/753)
File 38781: comp.graphics (22/753)
File 38782: comp.graphics (23/753)
File 38783: comp.graphics (24/753)
File 38784: comp.graphics (25/753)
File 38785: comp.graphics (26/753)
File 38786: comp.graphics (27/753)
File 38787: comp.graphics 

File 39002: comp.graphics (233/753)
File 39003: comp.graphics (234/753)
File 39004: comp.graphics (235/753)
File 39005: comp.graphics (236/753)
File 39006: comp.graphics (237/753)
File 39007: comp.graphics (238/753)
File 39008: comp.graphics (239/753)
File 39009: comp.graphics (240/753)
File 39010: comp.graphics (241/753)
File 39011: comp.graphics (242/753)
File 39012: comp.graphics (243/753)
File 39013: comp.graphics (244/753)
File 39014: comp.graphics (245/753)
File 39015: comp.graphics (246/753)
File 39016: comp.graphics (247/753)
File 39017: comp.graphics (248/753)
File 39018: comp.graphics (249/753)
File 39019: comp.graphics (250/753)
File 39020: comp.graphics (251/753)
File 39021: comp.graphics (252/753)
File 39022: comp.graphics (253/753)
File 39023: comp.graphics (254/753)
File 39024: comp.graphics (255/753)
File 39025: comp.graphics (256/753)
File 39026: comp.graphics (257/753)
File 39027: comp.graphics (258/753)
File 39028: comp.graphics (259/753)
File 39029: comp.graphics (2

File 54622: talk.politics.guns (455/753)
File 54623: talk.politics.guns (456/753)
File 54624: talk.politics.guns (457/753)
File 54625: comp.graphics (458/753)
File 54626: talk.politics.guns (459/753)
File 54627: comp.graphics (460/753)
File 54628: talk.politics.guns (461/753)
File 54634: talk.politics.guns (462/753)
File 54636: talk.politics.guns (463/753)
File 54637: talk.politics.guns (464/753)
File 54638: comp.graphics (465/753)
File 54639: talk.politics.guns (466/753)
File 54640: talk.politics.guns (467/753)
File 54641: talk.politics.guns (468/753)
File 54642: talk.politics.guns (469/753)
File 54643: talk.politics.guns (470/753)
File 54644: talk.politics.guns (471/753)
File 54669: talk.politics.guns (472/753)
File 54670: talk.politics.guns (473/753)
File 54671: talk.politics.guns (474/753)
File 54678: talk.politics.guns (475/753)
File 54682: talk.politics.guns (476/753)
File 54683: talk.politics.guns (477/753)
File 54685: talk.politics.guns (478/753)
File 54687: talk.politics.guns 

File 55083: comp.graphics (658/753)
File 55084: talk.politics.guns (659/753)
File 55085: comp.graphics (660/753)
File 55086: talk.politics.guns (661/753)
File 55087: talk.politics.guns (662/753)
File 55088: comp.graphics (663/753)
File 55089: talk.politics.guns (664/753)
File 55091: talk.politics.guns (665/753)
File 55092: talk.politics.guns (666/753)
File 55093: talk.politics.guns (667/753)
File 55094: talk.politics.guns (668/753)
File 55095: comp.graphics (669/753)
File 55096: talk.politics.guns (670/753)
File 55097: talk.politics.guns (671/753)
File 55098: talk.politics.guns (672/753)
File 55099: comp.graphics (673/753)
File 55101: talk.politics.guns (674/753)
File 55102: talk.politics.guns (675/753)
File 55103: talk.politics.guns (676/753)
File 55104: talk.politics.guns (677/753)
File 55105: talk.politics.guns (678/753)
File 55106: talk.politics.guns (679/753)
File 55107: talk.politics.guns (680/753)
File 55108: talk.politics.guns (681/753)
File 55109: talk.politics.guns (682/753)


In [32]:
%%time
#Print accuracy
check_accuracy('./archive_1')

Accuracy : 0.934
Wall time: 33 ms


# Example n°2 of Use Case

In [14]:
%%time
#Sort the files in archive 4
files_in_a_flash('./archive_4')

Learning...
Learning done. Sorting...
File 103007: rec.autos (1/1571)
File 103008: rec.autos (2/1571)
File 103028: rec.autos (3/1571)
File 103037: comp.sys.mac.hardware (4/1571)
File 103048: rec.autos (5/1571)
File 103057: rec.autos (6/1571)
File 103063: rec.autos (7/1571)
File 103065: rec.autos (8/1571)
File 103066: rec.autos (9/1571)
File 103067: rec.autos (10/1571)
File 103068: rec.autos (11/1571)
File 103069: rec.autos (12/1571)
File 103070: rec.autos (13/1571)
File 103071: rec.autos (14/1571)
File 103072: rec.autos (15/1571)
File 103073: rec.autos (16/1571)
File 103074: comp.sys.ibm.pc.hardware (17/1571)
File 103075: rec.autos (18/1571)
File 103076: rec.autos (19/1571)
File 103077: rec.autos (20/1571)
File 103078: rec.autos (21/1571)
File 103079: rec.autos (22/1571)
File 103080: rec.autos (23/1571)
File 103081: rec.autos (24/1571)
File 103082: rec.autos (25/1571)
File 103083: rec.motorcycles (26/1571)
File 103121: rec.autos (27/1571)
File 103122: rec.autos (28/1571)
File 103123: r

File 103484: rec.autos (231/1571)
File 103491: rec.autos (232/1571)
File 103493: comp.sys.mac.hardware (233/1571)
File 103494: rec.autos (234/1571)
File 103495: rec.autos (235/1571)
File 103496: rec.autos (236/1571)
File 103497: rec.autos (237/1571)
File 103498: rec.autos (238/1571)
File 103499: rec.autos (239/1571)
File 103500: rec.autos (240/1571)
File 103501: rec.autos (241/1571)
File 103502: rec.autos (242/1571)
File 103503: rec.autos (243/1571)
File 103504: comp.sys.ibm.pc.hardware (244/1571)
File 103505: rec.autos (245/1571)
File 103506: rec.autos (246/1571)
File 103507: rec.autos (247/1571)
File 103508: rec.autos (248/1571)
File 103509: comp.sys.mac.hardware (249/1571)
File 103510: rec.autos (250/1571)
File 103511: rec.autos (251/1571)
File 103512: rec.autos (252/1571)
File 103513: rec.autos (253/1571)
File 103514: rec.autos (254/1571)
File 103515: rec.autos (255/1571)
File 103516: rec.autos (256/1571)
File 103517: comp.sys.mac.hardware (257/1571)
File 103518: rec.autos (258/157

File 104751: rec.motorcycles (456/1571)
File 104752: rec.motorcycles (457/1571)
File 104753: rec.motorcycles (458/1571)
File 104754: rec.motorcycles (459/1571)
File 104755: rec.motorcycles (460/1571)
File 104756: rec.motorcycles (461/1571)
File 104757: rec.motorcycles (462/1571)
File 104758: rec.motorcycles (463/1571)
File 104759: comp.sys.ibm.pc.hardware (464/1571)
File 104760: rec.motorcycles (465/1571)
File 104762: rec.motorcycles (466/1571)
File 104763: rec.motorcycles (467/1571)
File 104764: rec.motorcycles (468/1571)
File 104765: rec.motorcycles (469/1571)
File 104766: rec.motorcycles (470/1571)
File 104767: rec.motorcycles (471/1571)
File 104768: rec.motorcycles (472/1571)
File 104769: rec.motorcycles (473/1571)
File 104770: rec.motorcycles (474/1571)
File 104771: rec.motorcycles (475/1571)
File 104772: rec.motorcycles (476/1571)
File 104773: rec.motorcycles (477/1571)
File 104777: rec.motorcycles (478/1571)
File 104778: rec.motorcycles (479/1571)
File 104779: rec.motorcycles (4

File 105079: rec.motorcycles (660/1571)
File 105098: rec.motorcycles (661/1571)
File 105099: rec.motorcycles (662/1571)
File 105100: rec.motorcycles (663/1571)
File 105101: rec.motorcycles (664/1571)
File 105102: rec.motorcycles (665/1571)
File 105103: rec.motorcycles (666/1571)
File 105104: rec.motorcycles (667/1571)
File 105105: rec.motorcycles (668/1571)
File 105106: rec.motorcycles (669/1571)
File 105107: rec.autos (670/1571)
File 105108: rec.motorcycles (671/1571)
File 105109: rec.motorcycles (672/1571)
File 105110: rec.motorcycles (673/1571)
File 105111: comp.sys.ibm.pc.hardware (674/1571)
File 105112: rec.motorcycles (675/1571)
File 105113: rec.motorcycles (676/1571)
File 105114: rec.autos (677/1571)
File 105115: rec.motorcycles (678/1571)
File 105116: rec.motorcycles (679/1571)
File 105117: rec.motorcycles (680/1571)
File 105118: rec.motorcycles (681/1571)
File 105119: rec.motorcycles (682/1571)
File 105120: rec.motorcycles (683/1571)
File 105121: rec.motorcycles (684/1571)
Fil

File 52012: comp.sys.mac.hardware (858/1571)
File 52013: comp.sys.mac.hardware (859/1571)
File 52014: comp.sys.mac.hardware (860/1571)
File 52015: comp.sys.mac.hardware (861/1571)
File 52016: comp.sys.mac.hardware (862/1571)
File 52017: comp.sys.mac.hardware (863/1571)
File 52018: comp.sys.ibm.pc.hardware (864/1571)
File 52019: comp.sys.mac.hardware (865/1571)
File 52020: comp.sys.ibm.pc.hardware (866/1571)
File 52021: comp.sys.mac.hardware (867/1571)
File 52022: comp.sys.mac.hardware (868/1571)
File 52023: comp.sys.mac.hardware (869/1571)
File 52024: comp.sys.mac.hardware (870/1571)
File 52025: comp.sys.mac.hardware (871/1571)
File 52026: comp.sys.mac.hardware (872/1571)
File 52027: comp.sys.mac.hardware (873/1571)
File 52028: comp.sys.mac.hardware (874/1571)
File 52029: comp.sys.mac.hardware (875/1571)
File 52030: comp.sys.mac.hardware (876/1571)
File 52032: comp.sys.mac.hardware (877/1571)
File 52033: comp.sys.mac.hardware (878/1571)
File 52034: comp.sys.mac.hardware (879/1571)
File

File 52209: comp.sys.mac.hardware (1039/1571)
File 52210: comp.sys.mac.hardware (1040/1571)
File 52211: comp.sys.mac.hardware (1041/1571)
File 52212: comp.sys.mac.hardware (1042/1571)
File 52213: comp.sys.mac.hardware (1043/1571)
File 52214: comp.sys.mac.hardware (1044/1571)
File 52215: comp.sys.mac.hardware (1045/1571)
File 52216: comp.sys.mac.hardware (1046/1571)
File 52217: comp.sys.mac.hardware (1047/1571)
File 52218: comp.sys.mac.hardware (1048/1571)
File 52219: comp.sys.mac.hardware (1049/1571)
File 52220: comp.sys.mac.hardware (1050/1571)
File 52221: comp.sys.mac.hardware (1051/1571)
File 52222: comp.sys.mac.hardware (1052/1571)
File 52224: comp.sys.mac.hardware (1053/1571)
File 52225: comp.sys.mac.hardware (1054/1571)
File 52226: comp.sys.mac.hardware (1055/1571)
File 52227: comp.sys.ibm.pc.hardware (1056/1571)
File 52228: comp.sys.ibm.pc.hardware (1057/1571)
File 52229: comp.sys.mac.hardware (1058/1571)
File 52230: comp.sys.mac.hardware (1059/1571)
File 52231: comp.sys.mac.har

File 60803: comp.sys.ibm.pc.hardware (1217/1571)
File 60804: comp.sys.ibm.pc.hardware (1218/1571)
File 60805: comp.sys.ibm.pc.hardware (1219/1571)
File 60806: comp.sys.ibm.pc.hardware (1220/1571)
File 60807: comp.sys.ibm.pc.hardware (1221/1571)
File 60808: comp.sys.ibm.pc.hardware (1222/1571)
File 60809: comp.sys.ibm.pc.hardware (1223/1571)
File 60810: comp.sys.mac.hardware (1224/1571)
File 60811: comp.sys.ibm.pc.hardware (1225/1571)
File 60812: comp.sys.ibm.pc.hardware (1226/1571)
File 60813: comp.sys.ibm.pc.hardware (1227/1571)
File 60814: comp.sys.ibm.pc.hardware (1228/1571)
File 60815: comp.sys.ibm.pc.hardware (1229/1571)
File 60816: comp.sys.mac.hardware (1230/1571)
File 60817: comp.sys.ibm.pc.hardware (1231/1571)
File 60818: comp.sys.ibm.pc.hardware (1232/1571)
File 60819: comp.sys.ibm.pc.hardware (1233/1571)
File 60821: comp.sys.ibm.pc.hardware (1234/1571)
File 60822: comp.sys.mac.hardware (1235/1571)
File 60823: comp.sys.ibm.pc.hardware (1236/1571)
File 60824: comp.sys.ibm.pc.h

File 61162: comp.sys.ibm.pc.hardware (1555/1571)
File 61163: comp.sys.ibm.pc.hardware (1556/1571)
File 61164: comp.sys.ibm.pc.hardware (1557/1571)
File 61165: comp.sys.ibm.pc.hardware (1558/1571)
File 61166: comp.sys.ibm.pc.hardware (1559/1571)
File 61167: comp.sys.ibm.pc.hardware (1560/1571)
File 61168: comp.sys.ibm.pc.hardware (1561/1571)
File 61169: comp.sys.ibm.pc.hardware (1562/1571)
File 61170: comp.sys.ibm.pc.hardware (1563/1571)
File 61171: comp.sys.ibm.pc.hardware (1564/1571)
File 61172: comp.sys.mac.hardware (1565/1571)
File 61173: comp.sys.ibm.pc.hardware (1566/1571)
File 61174: comp.sys.mac.hardware (1567/1571)
File 61175: comp.sys.mac.hardware (1568/1571)
File 61176: comp.sys.ibm.pc.hardware (1569/1571)
File 61177: comp.sys.ibm.pc.hardware (1570/1571)
File 61178: comp.sys.ibm.pc.hardware (1571/1571)
Wall time: 3min 44s


In [15]:
%%time
#Print accuracy
check_accuracy('./archive_4')

Accuracy : 0.901
Wall time: 79 ms


# Example n°3 of Use Case

In [33]:
%%time
#Sort the files of all archives together (extreme test with 11 different themes)
files_in_a_flash('./archive_tout')

Learning...
Learning done. Sorting...
File 103007: rec.autos (1/3794)
File 103008: rec.autos (2/3794)
File 103028: sci.electronics (3/3794)
File 103037: comp.sys.mac.hardware (4/3794)
File 103048: rec.autos (5/3794)
File 103057: rec.autos (6/3794)
File 103063: rec.autos (7/3794)
File 103065: rec.autos (8/3794)
File 103066: rec.autos (9/3794)
File 103067: rec.autos (10/3794)
File 103068: rec.autos (11/3794)
File 103069: rec.autos (12/3794)
File 103070: rec.autos (13/3794)
File 103071: rec.autos (14/3794)
File 103072: rec.autos (15/3794)
File 103073: rec.autos (16/3794)
File 103074: comp.sys.ibm.pc.hardware (17/3794)
File 103075: rec.autos (18/3794)
File 103076: rec.autos (19/3794)
File 103077: rec.autos (20/3794)
File 103078: rec.autos (21/3794)
File 103079: sci.space (22/3794)
File 103080: rec.autos (23/3794)
File 103081: rec.autos (24/3794)
File 103082: rec.autos (25/3794)
File 103083: rec.motorcycles (26/3794)
File 103121: rec.autos (27/3794)
File 103122: rec.autos (28/3794)
File 103

File 103491: rec.autos (232/3794)
File 103493: comp.graphics (233/3794)
File 103494: rec.autos (234/3794)
File 103495: rec.autos (235/3794)
File 103496: rec.autos (236/3794)
File 103497: rec.autos (237/3794)
File 103498: rec.autos (238/3794)
File 103499: rec.autos (239/3794)
File 103500: rec.autos (240/3794)
File 103501: rec.autos (241/3794)
File 103502: rec.autos (242/3794)
File 103503: rec.autos (243/3794)
File 103504: comp.sys.ibm.pc.hardware (244/3794)
File 103505: rec.autos (245/3794)
File 103506: rec.autos (246/3794)
File 103507: rec.autos (247/3794)
File 103508: rec.autos (248/3794)
File 103509: comp.sys.mac.hardware (249/3794)
File 103510: rec.autos (250/3794)
File 103511: rec.autos (251/3794)
File 103512: rec.autos (252/3794)
File 103513: rec.autos (253/3794)
File 103514: rec.autos (254/3794)
File 103515: rec.autos (255/3794)
File 103516: rec.autos (256/3794)
File 103517: comp.sys.mac.hardware (257/3794)
File 103518: rec.autos (258/3794)
File 103519: rec.autos (259/3794)
File 

File 104685: rec.motorcycles (452/3794)
File 104686: rec.motorcycles (453/3794)
File 104687: rec.motorcycles (454/3794)
File 104688: rec.motorcycles (455/3794)
File 104689: rec.motorcycles (456/3794)
File 104690: rec.motorcycles (457/3794)
File 104691: rec.motorcycles (458/3794)
File 104692: rec.motorcycles (459/3794)
File 104693: rec.motorcycles (460/3794)
File 104694: rec.motorcycles (461/3794)
File 104695: rec.motorcycles (462/3794)
File 104697: rec.motorcycles (463/3794)
File 104698: rec.motorcycles (464/3794)
File 104699: rec.motorcycles (465/3794)
File 104700: rec.motorcycles (466/3794)
File 104701: rec.motorcycles (467/3794)
File 104702: rec.motorcycles (468/3794)
File 104703: rec.motorcycles (469/3794)
File 104704: rec.motorcycles (470/3794)
File 104705: rec.sport.baseball (471/3794)
File 104706: rec.sport.baseball (472/3794)
File 104707: rec.sport.baseball (473/3794)
File 104708: rec.sport.baseball (474/3794)
File 104709: rec.sport.baseball (475/3794)
File 104710: rec.sport.ba

File 104926: rec.motorcycles (655/3794)
File 104927: rec.motorcycles (656/3794)
File 104932: rec.motorcycles (657/3794)
File 104934: rec.sport.baseball (658/3794)
File 104935: rec.sport.baseball (659/3794)
File 104936: rec.sport.baseball (660/3794)
File 104937: rec.sport.baseball (661/3794)
File 104938: rec.sport.baseball (662/3794)
File 104939: rec.sport.baseball (663/3794)
File 104940: rec.sport.baseball (664/3794)
File 104941: rec.sport.baseball (665/3794)
File 104942: comp.sys.ibm.pc.hardware (666/3794)
File 104943: rec.sport.baseball (667/3794)
File 104944: rec.autos (668/3794)
File 104945: rec.sport.baseball (669/3794)
File 104946: rec.sport.baseball (670/3794)
File 104947: rec.sport.baseball (671/3794)
File 104948: rec.motorcycles (672/3794)
File 104949: rec.motorcycles (673/3794)
File 104950: rec.motorcycles (674/3794)
File 104951: rec.motorcycles (675/3794)
File 104952: rec.motorcycles (676/3794)
File 104953: rec.motorcycles (677/3794)
File 104954: rec.motorcycles (678/3794)
F

File 105148: rec.motorcycles (854/3794)
File 105149: rec.motorcycles (855/3794)
File 105150: rec.motorcycles (856/3794)
File 105151: rec.motorcycles (857/3794)
File 105152: rec.motorcycles (858/3794)
File 105153: rec.motorcycles (859/3794)
File 105154: comp.sys.ibm.pc.hardware (860/3794)
File 105155: rec.motorcycles (861/3794)
File 105156: comp.sys.ibm.pc.hardware (862/3794)
File 105157: rec.motorcycles (863/3794)
File 105158: rec.motorcycles (864/3794)
File 105159: rec.motorcycles (865/3794)
File 105160: rec.motorcycles (866/3794)
File 105161: comp.sys.mac.hardware (867/3794)
File 105162: rec.motorcycles (868/3794)
File 105163: rec.sport.baseball (869/3794)
File 105164: rec.sport.baseball (870/3794)
File 105167: rec.sport.baseball (871/3794)
File 105202: rec.motorcycles (872/3794)
File 105203: rec.motorcycles (873/3794)
File 105204: rec.motorcycles (874/3794)
File 105205: rec.motorcycles (875/3794)
File 105206: rec.motorcycles (876/3794)
File 105207: comp.graphics (877/3794)
File 1052

File 38884: comp.graphics (1063/3794)
File 38885: comp.graphics (1064/3794)
File 38886: comp.graphics (1065/3794)
File 38887: comp.graphics (1066/3794)
File 38888: comp.graphics (1067/3794)
File 38889: comp.graphics (1068/3794)
File 38890: comp.graphics (1069/3794)
File 38891: comp.graphics (1070/3794)
File 38892: comp.graphics (1071/3794)
File 38893: comp.graphics (1072/3794)
File 38894: comp.graphics (1073/3794)
File 38895: comp.graphics (1074/3794)
File 38896: comp.graphics (1075/3794)
File 38897: comp.graphics (1076/3794)
File 38898: comp.graphics (1077/3794)
File 38899: comp.graphics (1078/3794)
File 38900: comp.graphics (1079/3794)
File 38901: comp.graphics (1080/3794)
File 38902: comp.graphics (1081/3794)
File 38903: comp.sys.mac.hardware (1082/3794)
File 38904: comp.graphics (1083/3794)
File 38905: comp.sys.mac.hardware (1084/3794)
File 38906: comp.graphics (1085/3794)
File 38907: sci.space (1086/3794)
File 38908: comp.graphics (1087/3794)
File 38909: comp.graphics (1088/3794)


File 39500: comp.graphics (1271/3794)
File 39615: comp.graphics (1272/3794)
File 39617: comp.graphics (1273/3794)
File 39618: comp.graphics (1274/3794)
File 39619: comp.graphics (1275/3794)
File 39620: sci.electronics (1276/3794)
File 39621: comp.graphics (1277/3794)
File 39622: comp.sys.mac.hardware (1278/3794)
File 39623: comp.sys.mac.hardware (1279/3794)
File 39624: comp.graphics (1280/3794)
File 39625: comp.graphics (1281/3794)
File 39626: comp.sys.ibm.pc.hardware (1282/3794)
File 39627: sci.electronics (1283/3794)
File 39628: comp.graphics (1284/3794)
File 39629: sci.med (1285/3794)
File 39630: comp.graphics (1286/3794)
File 39631: comp.graphics (1287/3794)
File 39632: comp.graphics (1288/3794)
File 39633: comp.graphics (1289/3794)
File 39634: comp.graphics (1290/3794)
File 39635: comp.sys.mac.hardware (1291/3794)
File 39636: comp.graphics (1292/3794)
File 39637: comp.graphics (1293/3794)
File 39638: comp.graphics (1294/3794)
File 39639: comp.graphics (1295/3794)
File 39640: comp.

File 52075: comp.sys.mac.hardware (1459/3794)
File 52076: comp.sys.mac.hardware (1460/3794)
File 52077: comp.sys.mac.hardware (1461/3794)
File 52078: comp.sys.mac.hardware (1462/3794)
File 52079: comp.sys.mac.hardware (1463/3794)
File 52080: comp.sys.mac.hardware (1464/3794)
File 52081: comp.sys.mac.hardware (1465/3794)
File 52082: comp.sys.mac.hardware (1466/3794)
File 52083: comp.sys.ibm.pc.hardware (1467/3794)
File 52084: comp.sys.mac.hardware (1468/3794)
File 52085: comp.sys.mac.hardware (1469/3794)
File 52087: comp.sys.mac.hardware (1470/3794)
File 52088: comp.sys.mac.hardware (1471/3794)
File 52089: comp.sys.mac.hardware (1472/3794)
File 52090: comp.sys.mac.hardware (1473/3794)
File 52091: rec.motorcycles (1474/3794)
File 52092: comp.sys.mac.hardware (1475/3794)
File 52096: comp.sys.mac.hardware (1476/3794)
File 52097: comp.sys.mac.hardware (1477/3794)
File 52098: comp.sys.mac.hardware (1478/3794)
File 52099: comp.sys.ibm.pc.hardware (1479/3794)
File 52100: comp.sys.mac.hardware 

File 52267: comp.sys.mac.hardware (1638/3794)
File 52268: comp.sys.mac.hardware (1639/3794)
File 52269: comp.sys.mac.hardware (1640/3794)
File 52270: comp.sys.mac.hardware (1641/3794)
File 52271: comp.sys.mac.hardware (1642/3794)
File 52272: comp.sys.mac.hardware (1643/3794)
File 52273: comp.sys.mac.hardware (1644/3794)
File 52274: comp.sys.mac.hardware (1645/3794)
File 52275: comp.sys.mac.hardware (1646/3794)
File 52276: comp.sys.ibm.pc.hardware (1647/3794)
File 52283: comp.sys.mac.hardware (1648/3794)
File 52284: comp.sys.mac.hardware (1649/3794)
File 52285: comp.sys.mac.hardware (1650/3794)
File 52286: comp.sys.mac.hardware (1651/3794)
File 52287: comp.sys.ibm.pc.hardware (1652/3794)
File 52288: comp.sys.mac.hardware (1653/3794)
File 52289: comp.sys.mac.hardware (1654/3794)
File 52290: comp.sys.mac.hardware (1655/3794)
File 52291: comp.sys.mac.hardware (1656/3794)
File 52292: comp.sys.mac.hardware (1657/3794)
File 52293: comp.sys.mac.hardware (1658/3794)
File 52294: sci.electronics 

File 54049: rec.sport.hockey (1827/3794)
File 54050: rec.sport.hockey (1828/3794)
File 54051: rec.sport.hockey (1829/3794)
File 54052: rec.sport.hockey (1830/3794)
File 54053: rec.sport.hockey (1831/3794)
File 54054: rec.sport.hockey (1832/3794)
File 54055: sci.electronics (1833/3794)
File 54056: sci.electronics (1834/3794)
File 54057: sci.electronics (1835/3794)
File 54058: sci.electronics (1836/3794)
File 54059: rec.sport.hockey (1837/3794)
File 54060: rec.sport.hockey (1838/3794)
File 54061: rec.sport.hockey (1839/3794)
File 54062: rec.sport.baseball (1840/3794)
File 54063: rec.sport.hockey (1841/3794)
File 54064: rec.sport.hockey (1842/3794)
File 54065: rec.sport.baseball (1843/3794)
File 54066: rec.sport.hockey (1844/3794)
File 54067: rec.sport.hockey (1845/3794)
File 54068: rec.sport.hockey (1846/3794)
File 54069: rec.sport.hockey (1847/3794)
File 54070: sci.electronics (1848/3794)
File 54071: sci.electronics (1849/3794)
File 54072: rec.sport.hockey (1850/3794)
File 54073: rec.sp

File 54250: rec.sport.hockey (2026/3794)
File 54251: rec.sport.hockey (2027/3794)
File 54252: sci.electronics (2028/3794)
File 54253: comp.sys.mac.hardware (2029/3794)
File 54254: sci.electronics (2030/3794)
File 54255: rec.sport.hockey (2031/3794)
File 54256: rec.sport.hockey (2032/3794)
File 54257: rec.sport.hockey (2033/3794)
File 54258: rec.sport.hockey (2034/3794)
File 54259: rec.sport.hockey (2035/3794)
File 54260: rec.sport.hockey (2036/3794)
File 54261: rec.sport.hockey (2037/3794)
File 54262: rec.sport.hockey (2038/3794)
File 54263: rec.sport.hockey (2039/3794)
File 54264: rec.sport.hockey (2040/3794)
File 54265: comp.sys.ibm.pc.hardware (2041/3794)
File 54266: sci.electronics (2042/3794)
File 54267: sci.electronics (2043/3794)
File 54268: sci.electronics (2044/3794)
File 54269: sci.electronics (2045/3794)
File 54270: sci.electronics (2046/3794)
File 54271: sci.electronics (2047/3794)
File 54272: rec.sport.hockey (2048/3794)
File 54273: comp.sys.mac.hardware (2049/3794)
File 5

File 54564: rec.sport.hockey (2223/3794)
File 54575: talk.politics.guns (2224/3794)
File 54576: talk.politics.guns (2225/3794)
File 54577: talk.politics.guns (2226/3794)
File 54578: talk.politics.guns (2227/3794)
File 54579: comp.sys.mac.hardware (2228/3794)
File 54580: talk.politics.guns (2229/3794)
File 54581: sci.electronics (2230/3794)
File 54582: talk.politics.guns (2231/3794)
File 54583: talk.politics.guns (2232/3794)
File 54584: talk.politics.guns (2233/3794)
File 54585: talk.politics.guns (2234/3794)
File 54586: talk.politics.guns (2235/3794)
File 54587: talk.politics.guns (2236/3794)
File 54588: talk.politics.guns (2237/3794)
File 54589: talk.politics.guns (2238/3794)
File 54590: talk.politics.guns (2239/3794)
File 54591: talk.politics.guns (2240/3794)
File 54592: talk.politics.guns (2241/3794)
File 54593: talk.politics.guns (2242/3794)
File 54596: talk.politics.guns (2243/3794)
File 54597: talk.politics.guns (2244/3794)
File 54598: talk.politics.guns (2245/3794)
File 54599: t

File 54866: talk.politics.guns (2415/3794)
File 54868: talk.politics.guns (2416/3794)
File 54869: talk.politics.guns (2417/3794)
File 54870: talk.politics.guns (2418/3794)
File 54871: talk.politics.guns (2419/3794)
File 54872: talk.politics.guns (2420/3794)
File 54873: talk.politics.guns (2421/3794)
File 54874: talk.politics.guns (2422/3794)
File 54875: comp.sys.ibm.pc.hardware (2423/3794)
File 54876: sci.space (2424/3794)
File 54877: talk.politics.guns (2425/3794)
File 54878: talk.politics.guns (2426/3794)
File 54879: comp.sys.mac.hardware (2427/3794)
File 54880: talk.politics.guns (2428/3794)
File 54882: talk.politics.guns (2429/3794)
File 54883: talk.politics.guns (2430/3794)
File 54884: comp.sys.mac.hardware (2431/3794)
File 54885: talk.politics.guns (2432/3794)
File 54886: talk.politics.guns (2433/3794)
File 54887: talk.politics.guns (2434/3794)
File 54888: talk.politics.guns (2435/3794)
File 54889: talk.politics.guns (2436/3794)
File 54890: talk.politics.guns (2437/3794)
File 548

File 55485: talk.politics.guns (2607/3794)
File 55486: rec.motorcycles (2608/3794)
File 55487: rec.sport.baseball (2609/3794)
File 55488: talk.politics.guns (2610/3794)
File 55489: rec.autos (2611/3794)
File 55490: rec.autos (2612/3794)
File 59225: sci.med (2613/3794)
File 59229: sci.med (2614/3794)
File 59231: sci.med (2615/3794)
File 59234: sci.med (2616/3794)
File 59236: sci.med (2617/3794)
File 59237: sci.med (2618/3794)
File 59238: sci.med (2619/3794)
File 59241: sci.med (2620/3794)
File 59242: sci.med (2621/3794)
File 59243: sci.med (2622/3794)
File 59245: sci.med (2623/3794)
File 59246: sci.med (2624/3794)
File 59247: sci.med (2625/3794)
File 59248: sci.med (2626/3794)
File 59249: comp.graphics (2627/3794)
File 59251: sci.med (2628/3794)
File 59252: sci.med (2629/3794)
File 59253: sci.med (2630/3794)
File 59254: comp.graphics (2631/3794)
File 59255: sci.med (2632/3794)
File 59256: sci.med (2633/3794)
File 59257: sci.med (2634/3794)
File 59258: comp.sys.ibm.pc.hardware (2635/3794

File 59466: sci.electronics (2837/3794)
File 59467: comp.graphics (2838/3794)
File 59468: sci.med (2839/3794)
File 59469: sci.med (2840/3794)
File 59470: sci.med (2841/3794)
File 59471: sci.med (2842/3794)
File 59472: sci.med (2843/3794)
File 59473: sci.electronics (2844/3794)
File 59474: sci.med (2845/3794)
File 59475: sci.med (2846/3794)
File 59476: sci.med (2847/3794)
File 59477: comp.graphics (2848/3794)
File 59478: sci.med (2849/3794)
File 59479: rec.motorcycles (2850/3794)
File 59480: sci.med (2851/3794)
File 59481: comp.graphics (2852/3794)
File 59482: sci.med (2853/3794)
File 59483: rec.motorcycles (2854/3794)
File 59484: sci.med (2855/3794)
File 59485: sci.electronics (2856/3794)
File 59486: sci.med (2857/3794)
File 59487: rec.motorcycles (2858/3794)
File 59488: sci.med (2859/3794)
File 59489: sci.med (2860/3794)
File 59490: sci.med (2861/3794)
File 59491: rec.autos (2862/3794)
File 59492: comp.graphics (2863/3794)
File 59493: comp.graphics (2864/3794)
File 59494: sci.electron

File 60808: comp.sys.ibm.pc.hardware (3051/3794)
File 60809: comp.sys.ibm.pc.hardware (3052/3794)
File 60810: comp.sys.ibm.pc.hardware (3053/3794)
File 60811: comp.sys.ibm.pc.hardware (3054/3794)
File 60812: comp.sys.ibm.pc.hardware (3055/3794)
File 60813: comp.sys.ibm.pc.hardware (3056/3794)
File 60814: comp.graphics (3057/3794)
File 60815: comp.sys.ibm.pc.hardware (3058/3794)
File 60816: comp.sys.mac.hardware (3059/3794)
File 60817: comp.sys.ibm.pc.hardware (3060/3794)
File 60818: comp.sys.ibm.pc.hardware (3061/3794)
File 60819: comp.sys.ibm.pc.hardware (3062/3794)
File 60821: comp.sys.ibm.pc.hardware (3063/3794)
File 60822: comp.sys.ibm.pc.hardware (3064/3794)
File 60823: comp.sys.ibm.pc.hardware (3065/3794)
File 60824: comp.sys.ibm.pc.hardware (3066/3794)
File 60825: comp.sys.ibm.pc.hardware (3067/3794)
File 60826: comp.sys.ibm.pc.hardware (3068/3794)
File 60827: comp.sys.ibm.pc.hardware (3069/3794)
File 60829: comp.sys.mac.hardware (3070/3794)
File 60830: comp.sys.ibm.pc.hardware 

File 60991: comp.sys.mac.hardware (3221/3794)
File 60992: comp.sys.ibm.pc.hardware (3222/3794)
File 60993: comp.sys.ibm.pc.hardware (3223/3794)
File 60994: comp.sys.ibm.pc.hardware (3224/3794)
File 60995: comp.sys.ibm.pc.hardware (3225/3794)
File 60996: comp.sys.ibm.pc.hardware (3226/3794)
File 60997: comp.sys.ibm.pc.hardware (3227/3794)
File 60998: comp.sys.ibm.pc.hardware (3228/3794)
File 60999: comp.sys.ibm.pc.hardware (3229/3794)
File 61000: comp.sys.ibm.pc.hardware (3230/3794)
File 61002: comp.sys.ibm.pc.hardware (3231/3794)
File 61003: comp.sys.ibm.pc.hardware (3232/3794)
File 61004: comp.sys.ibm.pc.hardware (3233/3794)
File 61005: comp.sys.ibm.pc.hardware (3234/3794)
File 61006: comp.graphics (3235/3794)
File 61007: comp.sys.ibm.pc.hardware (3236/3794)
File 61008: comp.sys.ibm.pc.hardware (3237/3794)
File 61010: sci.electronics (3238/3794)
File 61012: sci.electronics (3239/3794)
File 61013: comp.sys.ibm.pc.hardware (3240/3794)
File 61014: comp.sys.ibm.pc.hardware (3241/3794)
Fil

File 61169: comp.sys.ibm.pc.hardware (3391/3794)
File 61170: comp.sys.ibm.pc.hardware (3392/3794)
File 61171: comp.sys.ibm.pc.hardware (3393/3794)
File 61172: comp.sys.mac.hardware (3394/3794)
File 61173: comp.sys.ibm.pc.hardware (3395/3794)
File 61174: comp.sys.mac.hardware (3396/3794)
File 61175: sci.electronics (3397/3794)
File 61176: comp.sys.ibm.pc.hardware (3398/3794)
File 61177: comp.sys.ibm.pc.hardware (3399/3794)
File 61178: comp.sys.ibm.pc.hardware (3400/3794)
File 61242: comp.graphics (3401/3794)
File 61243: sci.space (3402/3794)
File 61245: comp.graphics (3403/3794)
File 61246: sci.space (3404/3794)
File 61248: sci.space (3405/3794)
File 61251: rec.autos (3406/3794)
File 61253: sci.space (3407/3794)
File 61254: sci.electronics (3408/3794)
File 61255: sci.space (3409/3794)
File 61256: sci.space (3410/3794)
File 61257: sci.space (3411/3794)
File 61258: comp.sys.ibm.pc.hardware (3412/3794)
File 61259: sci.space (3413/3794)
File 61260: sci.space (3414/3794)
File 61261: sci.spac

File 61479: sci.electronics (3623/3794)
File 61480: sci.space (3624/3794)
File 61481: sci.space (3625/3794)
File 61482: sci.space (3626/3794)
File 61483: sci.electronics (3627/3794)
File 61484: sci.space (3628/3794)
File 61485: sci.electronics (3629/3794)
File 61486: sci.space (3630/3794)
File 61487: sci.space (3631/3794)
File 61488: sci.space (3632/3794)
File 61489: sci.space (3633/3794)
File 61490: sci.space (3634/3794)
File 61491: sci.space (3635/3794)
File 61492: sci.space (3636/3794)
File 61493: sci.space (3637/3794)
File 61494: sci.space (3638/3794)
File 61495: sci.space (3639/3794)
File 61496: sci.space (3640/3794)
File 61497: talk.politics.guns (3641/3794)
File 61498: sci.space (3642/3794)
File 61499: sci.space (3643/3794)
File 61500: sci.space (3644/3794)
File 61501: sci.space (3645/3794)
File 61502: sci.space (3646/3794)
File 61503: comp.graphics (3647/3794)
File 61504: sci.space (3648/3794)
File 61505: sci.space (3649/3794)
File 61506: sci.space (3650/3794)
File 61507: sci.s

In [35]:
%%time
#Print accuracy
check_accuracy('./archive_tout')

Accuracy : 0.755
Wall time: 204 ms


In [None]:
#This is an extreme test which proves that the program can be optimized