[View in Colaboratory](https://colab.research.google.com/github/marcocaserta/ecco/blob/master/eccoApp_v2_0.ipynb)

# ECCO Dataset Analysis

The `Overlap` parameter allows to select sentences with a specified number of common words between the query and the sentences found by the algorith.

This app uses two steps:


1.   Solution file selection
2.   Analysis and plots



In [0]:
#@title 

from google.colab import widgets
from google.colab import output   
import matplotlib.pyplot as plt
from __future__ import print_function
import itertools
from google.colab import files
import numpy as np
import pandas as pd
import io, os
import re, string
import textwrap
import html
from IPython.display import display, HTML
import sys
sys.path.append('local_modules/ecco_modules/')
import ecco

#@title
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn])))
  
dfAux = pd.read_csv(io.StringIO(uploaded[fn].decode('utf-8')))

# separate query and results
dfQuery = dfAux.iloc[-1]
dfAux = dfAux.iloc[:-1]
df, dfQuery, lenQuery = ecco.createDataFrame(dfAux, dfQuery)

#@title Parameters :
Overlap = 0 #@param {type:"slider", min:0, max:10, step:1}

global df, dfQuery
lenQuery = len(dfQuery.tokenized)
if Overlap > lenQuery:
  print("o vs l", Overlap, " ", lenQuery)
  Overlap = lenQuery
  
nTop = len(df)
periods = np.arange(1796,1801)

def create_tab(location): 
  tb = widgets.TabBar(['Analysis', 'Charts'], location=location)
  with tb.output_to('Analysis'):
    ddf  = df[ df.nShared== Overlap]
    listBlock = ecco.createBlocks(ddf)
    txt=("<h4><p><b>[" + str(np.round(Overlap/lenQuery,2)) + " ] Found " + str(len(ddf)) + "/" + str(len(df)) + " sentences : </b></p></h4>"   )
    display(HTML(txt))
    for w in listBlock:
      #print(textwrap.fill(w,width=Text_width,  initial_indent='', subsequent_indent='    '))
      display(HTML(w))
  with tb.output_to(1,select=False):
    table     = ecco.createTable(df, lenQuery)
    tableYear = ecco.createTableYear(df,periods)
    tableWord = ecco.createTableWords(df,nTop)

    grid = widgets.Grid(2,3,header_row=False, header_column=False, style=output)
    with grid.output_to(0,0):
           display(HTML("<center><u>Frequence of Overlaps</u></center>" ))
           display(table)
    with grid.output_to(0,1):
           display(HTML("<u>Frequence Per Year</u>" ))
           display(tableYear)
    with grid.output_to(0,2):
           display(HTML("<center><u>Words Distribution</u></center>" ))
           display(tableWord)
    with grid.output_to(1,0):
      ecco.createChart1(df, lenQuery)
    with grid.output_to(1,1):
      ecco.createChart2(df, lenQuery)
    with grid.output_to(1,2):
      ecco.createChart3(tableWord, lenQuery)

         

title=ecco.setTitle(dfQuery)
display(HTML(title))
create_tab("top")


  