In [None]:
import digitalhub as dh

project = dh.get_or_create_project("test-taxonomy")

## Prepare base artifacts

In [None]:
import pandas as pd 

di1 = project.log_dataitem('taxonomy_nomi_di_comuni', kind="table", data=pd.read_csv("data-P1/nomi_di_comuni.txt", names=["name"]))
di2 = project.log_dataitem('taxonomy_stopwords', kind="table", data=pd.read_csv("data-P1/stopwords.txt", names=["word"]))
di3 = project.log_dataitem('taxonomy_termini_specifici', kind="table", data=pd.read_csv("data-P1/termini_specifici.txt", names=["termine"]))

In [None]:
import pandas as pd

tassonomia_df = pd.read_csv('data-P1/tassonomia_comuni.csv', sep=';')
di = project.log_dataitem(name="tassonomia_comuni", kind="table", data=tassonomia_df)

In [None]:
tassonomia_df = pd.read_csv('data-P1/tassonomia_aziende.csv', sep=';')
di = project.log_dataitem(name="tassonomia_aziende", kind="table", data=tassonomia_df)

## Compute azioni comuni

In [None]:
func = project.new_function(name="compute-azioni-comuni",
                            kind="python",
                            python_version="PYTHON3_10",
                            code_src="SuggerimentiComuni.py",
                            handler="taxonomy_suggestions",
                            requirements=["tqdm==4.66.6", "fuzzywuzzy==0.18.0", "python-Levenshtein==0.26.1", "simplemma==1.1.2", "unidecode==1.4.0"]
                           )

In [None]:
run = func.run(action="job",
              inputs={
                  'piani_comunali': project.get_dataitem('piani_comunali').key, 
                  'tassonomia': project.get_dataitem('tassonomia_comuni').key, 
                  'stopwords': project.get_dataitem('taxonomy_stopwords').key, 
                  'nomi': project.get_dataitem('taxonomy_nomi_di_comuni').key, 
                  'termini': project.get_dataitem('taxonomy_termini_specifici').key
              },
              wait=True)

## Service

In [None]:
comune_service = project.new_function(
        name="comune-taxonomy-service", 
        kind="python", 
        python_version="PYTHON3_10", 
        code_src="serve_comuni.py",     
        handler="serve",
        init_function="init"
    )

In [None]:
serve_run = comune_service.run(
    action="serve"
)

In [None]:
import requests

service_url = serve_run.refresh().status.service["url"]

res = requests.get(f"http://{service_url}/to_delete?threshold=3")
res.text

In [None]:
res = requests.get(f"http://{service_url}/to_split?threshold=500")
res.text

In [None]:
res = requests.get(f"http://{service_url}/to_merge")
res.text

## Compute Azioni aziende

In [None]:
func = project.new_function(name="compute-azioni-aziende",
                            kind="python",
                            python_version="PYTHON3_10",
                            code_src="SuggerimentiAziende.py",
                            handler="taxonomy_suggestions",
                            requirements=["tqdm==4.66.6", "fuzzywuzzy==0.18.0", "python-Levenshtein==0.26.1", "simplemma==1.1.2", "unidecode==1.4.0"]
                           )

In [None]:
run = func.run(action="job",
              inputs={
                  'piani_aziendali': project.get_dataitem('piani_aziendali').key, 
                  'tassonomia': project.get_dataitem('tassonomia_aziende').key, 
                  'stopwords': project.get_dataitem('taxonomy_stopwords').key, 
                  'nomi': project.get_dataitem('taxonomy_nomi_di_comuni').key, 
                  'termini': project.get_dataitem('taxonomy_termini_specifici').key
              },
              wait=True)

## Service

In [None]:
aziende_service = project.new_function(
        name="aziende-taxonomy-service", 
        kind="python", 
        python_version="PYTHON3_10", 
        code_src="serve_aziende.py",     
        handler="serve",
        init_function="init"
    )

In [None]:
serve_run = aziende_service.run(
    action="serve"
)

In [None]:
import requests

service_url = serve_run.refresh().status.service["url"]

res = requests.get(f"http://{service_url}/to_delete?frequency_threshold=3")
res.text

In [None]:
res = requests.get(f"http://{service_url}/to_split?frequency_threshold=500")
res.text

In [None]:
res = requests.get(f"http://{service_url}/to_merge")
res.text