In [None]:
# Libraries
from pyvis.network import Network
from pyvis import network as net
import networkx as nx
import urllib.request, urllib.error
import urllib.parse
import xml.etree.ElementTree as ET
import datetime

In [None]:
# INSERT_PARAMETER
MATOMO_ANALYTICS_URL = "https://_"
MATOMO_TOKEN = "_"
ID_SITE = "_"
START_DATE = "YYYY-MM-DD" 
END_DATE = "YYYY-MM-DD"

In [None]:
def generateDates(d1, d2):
    
    dates = []
    start = datetime.datetime.strptime(d1, "%Y-%m-%d")
    end = datetime.datetime.strptime(d2, "%Y-%m-%d")
    date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

    for date in date_generated:
        dates.append(date.strftime("%Y-%m-%d"))
    dates.append(end.strftime("%Y-%m-%d")) 
    
    return dates

In [None]:
DATES = generateDates(START_DATE, END_DATE)

In [None]:
# Paths
Matomo_Paths = []

In [None]:
def getMatomoData(DATE, MATOMO_TOKEN):
    
    request_url = MATOMO_ANALYTICS_URL + '?module=API&method=Live.getLastVisitsDetails&idSite=' + ID_SITE + '&filter_limit=-1&period=day&date=' + DATE + '&format=xml&token_auth=' + MATOMO_TOKEN
                    
    try:
        opener = urllib.request.build_opener()
        tree = ET.parse(opener.open(request_url))
        root = tree.getroot()

        click_paths = []
        
        # Log
        number_of_visitors = len(root)
        print("On " + str(DATE) + " a total of " 
              + str(number_of_visitors)
              + " were found.")

        # Counter
        candidate_number = 0

        for visiter_log in root:

            idVisit = visiter_log.find("idVisit").text
            actionDetails = visiter_log.find('actionDetails')
            
            path = []

            for row in actionDetails:

                if (row.find("type").text == "action"):
                        path.append(row.find('url').text)
                else:
                    continue
                                       
            path = [p.replace('http://', 'https://') for p in path]
    
            Matomo_Paths.append(path)

            candidate_number = candidate_number + 1
            print(str(candidate_number) + " / " + str(number_of_visitors) + " done")

            
    except: # There might be some corrupt API pages
        print(str(DATE) + " Error: API Output. Skip.")
        

In [None]:
def removeDuplicates(lst):
    return [t for t in (set(tuple(i) for i in lst))]

In [None]:
def deDuplicate(list_of_tuples):
    
    toupleTable = [] 
    for t in list_of_tuples:        
        toupleTable.append([t[0], t[1], list_of_tuples.count(t)])
           
    seen = set()

    no_dups = [] 
    
    for lst in toupleTable:

        current = tuple(lst)
        if current not in seen:
            no_dups.append(lst)
            seen.add(current)
                 
    return no_dups

In [None]:
def graphCreator(ids, knoten, lookupTable, kanten):
    
    g = net.Network(notebook=True, directed=True)
    g.show_buttons(filter_=['physics'])
    
    g.add_nodes(ids, label=knoten)
    
    lookupTable = dict(lookupTable)
    
    for e in kanten:
        
        x = lookupTable[e[0]]
        y = lookupTable[e[1]]
        z = e[2]
        
        g.add_edge(x, y, weight=z, value=z, label=str(z))
        
        
    file_name = "pyVisGraph.html"
    g.show(file_name)
    

In [None]:
def getLinkGraph(paths):
    
    uniqueNodes = []
    NodesStrength = []    
    lookupTable = []
    ids = []
    
    p_id = 1
    
    total_tuples = [] # all Edges (X,Y)
                
    for path in paths:
        
        for p in path:     
                    
            if not (p in uniqueNodes):
                uniqueNodes.append(p)
                ids.append(p_id)
                lookupTable.append([p, p_id])
                p_id = p_id + 1
             
        iter = 0
        while (iter < len(path) - 1):
            total_tuples.append([path[iter], path[iter+1]])
            iter = iter + 1
    
    no_dups = deDuplicate(total_tuples)

    # Generate Graph
    graphCreator(ids, uniqueNodes, lookupTable, no_dups)

In [None]:
for D in DATES:
    getMatomoData(D, MATOMO_TOKEN)   

In [None]:
getLinkGraph(Matomo_Paths)