In [None]:
# from flask import Flask, jsonify, render_template,request
# from flask_cors import CORS, cross_origin
# import logging
# from tableIndex import getTableIndex
# from getBooksApi import getBooksData, getBooksThread
# from cosine import cosineSearchWord, getMatrixCloseness
# from jaccard import jaccardSimilarity
import time, concurrent.futures, json, requests, re
from threading import Lock
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import networkx as nx



In [None]:
suggestionObject = dict({"data": [],"status" : True})
lastSearchObject = dict({"data": [],"status" : True})
rankingObject = dict({"data": [],"status" : True})
mostReadObject = dict({"data": [],"status" : True})
booksInfoObject = dict({"data": [],"status" : True})
allBooksoObject = dict({"data": [],"status" : True})
closenessDataObject = dict({"data": [],"status" : True})
tableIndexDataObject =   dict({"data": dict(),"status" : True})
loadingBack = dict({"status": True})
lastReadingBook = dict({"bookId": None, "data": "", "link": ""})

In [None]:
#############################################
# ----- CODE FROM FILE back/thread.py ----- #
#############################################

# If we want to return data => returnStatus = True 
# If not => funnction will show empty list
# type = 1 => list , type = 2 => object
def baseThreadPool(loopList, callback, returnStatus=False, type = 1):
    data = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for item in loopList:
            futures.append(executor.submit(callback, item))
        if returnStatus:
            for future in concurrent.futures.as_completed(futures):
                if type == 1:
                    data.append(future.result())
                else: 
                    data += future.result()
    return data

In [None]:
##################################################
# ----- CODE FROM FILE back/getBooksApi.py ----- #
##################################################

# request to get books
def getBooksThread(bookId, timeout=10):
    response_API = requests.get('https://gutendex.com/books/{}'.format(bookId), timeout=timeout)
    data = response_API.text
    parse_json = json.loads(data)
    if parse_json.get('detail') != None:
        # print(bookId)
        return 'NOT_FOUND'
    return parse_json 

def getBooksData(listBooks):
    print("RUNNING function getBooksData")
    threaded_start = time.time()
    booksData = baseThreadPool(listBooks, getBooksThread, True)
    print("END function getBooksData - {}".format(time.time() - threaded_start))
    return booksData

def getListBooks(listBooks):
    print("RUNNING function getListBooks")
    threaded_start = time.time()
    def transformData(d):
        res = []
        if d.get('formats')!=None:
            for t in d['formats'].keys():
                checkEnd = d['formats'][t].split('.').pop()
                if checkEnd == 'txt':
                    res.append({
                        'id': d['id'],
                        'text_url': d['formats'][t]
                    })
        return res
    allBooks = getBooksData(listBooks)    
    result = baseThreadPool(allBooks, transformData, True, 2) 
    print("END function getListBooks", time.time() - threaded_start)
    return result, allBooks

In [None]:
#############################################
# ----- CODE FROM FILE back/cosine.py ----- #
#############################################

# this function use cosine similarity
def cosineSearchWord(historyWords, tableIndexData):
    print("RUNNING function cosineSearchWord")
    threaded_start = time.time()
    # Init variable
    result = dict()
    booksData = dict({'history':historyWords})
    for word in historyWords.keys():
        if word in tableIndexData:
            for b in tableIndexData[word].keys(): 
                if b in booksData:
                    booksData[b].update(dict({word:tableIndexData[word][b]}))
                else:
                    booksData.update(dict({b: dict({word:tableIndexData[word][b]})}))
    bookDF = pd.DataFrame(booksData.values(),
        index=booksData.keys()).fillna(0)
    for cs in list(booksData.keys())[1:]:
        result[cs] = cosine_similarity(bookDF.loc["history":"history"],bookDF.loc[cs:cs])[0][0]

    sortedBooks = dict(sorted(result.items(),key=lambda x:x[1], reverse=True))

    print("END function cosineSearchWord - {}".format(time.time() - threaded_start))
    return sortedBooks

def getMatrixCloseness(tableIndexData):
    print("RUNNING function getMatrixCloseness")
    threaded_start = time.time()
    # Init variable
    booksData = dict()
    def transformTableCloseness(word):
        for b in tableIndexData[word].keys(): 
            if b in booksData:
                booksData[b].update(dict({word:tableIndexData[word][b]}))
            else:
                booksData.update(dict({b: dict({word:tableIndexData[word][b]})}))
    baseThreadPool(tableIndexData, transformTableCloseness)

    bookDF = pd.DataFrame(booksData.values(),
        index=booksData.keys()).fillna(0)

    matrixCloseness = []

    def getCloseness(b1,b2):
        if b1 != b2:
            res = cosine_similarity(bookDF.loc[b1:b1],bookDF.loc[b2:b2])[0][0]
            if res*100 > 50: # > 50% -> add edge
                matrixCloseness.append((b1,b2))

    # Loop 1 thread
    def closenessThread1(b1):
        # print("Running closenessThread1:")
        # threaded_closeness_1= time.time()
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = []
            for b2 in list(booksData.keys()):
                futures.append(executor.submit(getCloseness, b1,b2))
        # print("End closenessThread1", time.time() - threaded_closeness_1)
    baseThreadPool(list(booksData.keys()), closenessThread1)

    # Create the graph representing the reading app
    G = nx.Graph()
    G.add_edges_from(matrixCloseness)
    closenessData = []

    # Compute the closeness centrality of each node in the graph
    closeness_centrality = nx.closeness_centrality(G)

    # Print the closeness centrality of each node
    for node, closeness in closeness_centrality.items():
        closenessData.append({"bookId": node, "closeness":closeness })

    sortedClosenessData = sorted(closenessData, key=lambda d: d['closeness'], reverse=True) 
    print("END function getMatrixCloseness - {}".format(time.time() - threaded_start))
    return sortedClosenessData

In [None]:
#################################################
# ----- CODE FROM FILE back/tableIndex.py ----- #
#################################################


# get table index for all book and each book
def getTableIndex(listBooks):
    print('RUNNING function getTableIndex')
    tableIndex = dict()
    booksInfo = []
    listBooksData, allBooks = getListBooks(listBooks)

    def readBook(book):
        response_API = requests.get(book['text_url'])
        data = response_API.text
        lock = Lock()

        #### Option 1: Prendre seulement des mots avec carateres de 4 à 10
        words = re.findall(r"[A-Za-z]{4,10}\w+", data)
        occurentCounts = dict()

        def filterBooks(word):
            lock.acquire()
            w = word.lower()
            # Count for table index all books
            if w in tableIndex:
                if book['id'] in tableIndex[w]:
                    tableIndex[w][book['id']] += 1
                else:
                    tableIndex[w][book['id']] = 1
            else:
                tableIndex[w] = dict({book['id']: 1})

            # Count for table index for each book
            if w in occurentCounts:
                occurentCounts[w] += 1
            else:
                occurentCounts[w] = 1
            lock.release()

        # print("START Thread filterBooks")
        # threaded_filter_book = time.time()
        baseThreadPool(words, filterBooks, False)
        # print("END Thread filterBooks:", time.time() - threaded_filter_book)

        return {
            "bookId": book['id'],
            "words": occurentCounts,
            "totalWords": len(words),
            "totalWordsWithOccur": len(occurentCounts)
        }
    
    print("START Thread readBook")
    threaded_start = time.time()

    booksInfo = baseThreadPool(listBooksData, readBook, True)
    print("END Thread readBook:", time.time() - threaded_start)

    print('END function getTableIndex')
    return tableIndex, booksInfo, allBooks



In [None]:
#####################################################
# ----- CODE FOR ROUTE http://127.0.0.1:5000/ ----- #
#####################################################

def index():
    print('START LOADING DATA')
    loading_time = time.time()
    tableIndexDataObject['data'], booksInfoObject['data'], allBooksoObject['data'] = getTableIndex(listBooks)
    tableIndexDataObject['status'] = False
    booksInfoObject['status'] = False
    allBooksoObject['status'] = False
    closenessDataObject['data'] = getMatrixCloseness(tableIndexDataObject['data'])
    closenessDataObject['status'] = False
    loadingBack['status'] = False
    print('END LOADING DATA - {}'.format(time.time() - loading_time))