In [None]:
import os
import pandas as pd
import digitalhub as dh

In [None]:
project = dh.get_or_create_project('daticomuni')

In [None]:
new_folder ='src'
if not os.path.exists(new_folder):
    os.makedirs(new_folder)

In [None]:
# di = project.new_artifact(name="daticomuni",kind="artifact", path='/daticomuni.zip')

In [None]:
di = project.get_artifact('daticomuni')
di.key

In [None]:
%%writefile "src/convert-all.py"

import pandas as pd
from os import path, makedirs
import zipfile
        
file_basepath = "daticomuni"

def convert_all(project, source_artifact):
    data_dir = f"{file_basepath}/data"
    try:
        shutil.rmtree(data_dir)
    except:
        print("Error deleting data dir")
                
    # Create the directory for the data
    if not path.exists(data_dir):
        makedirs(data_dir)

    try:
        archiveFile =source_artifact.download(data_dir) # this must change in the function
        with zipfile.ZipFile(archiveFile, 'r') as zip_ref:
            zip_ref.extractall(data_dir)    
    except:
        print("Error downloading data")
        
         
    for ds_name in ["azioni", "campi", "macroambiti", "piani", "tassonomia"]:
        source_url = data_dir + '/' + ds_name + ".txt"        
        df = pd.read_csv(source_url, encoding="windows-1251", delimiter=";")
        df.reset_index(drop=True, inplace=True)
        project.log_dataitem(ds_name, data=df, kind='table', index=False)        

    # comuni: process name and dates
    source_url =  data_dir + '/' + "comuni" + ".txt"
    df = pd.read_csv(source_url, encoding="windows-1251", delimiter=";")
    df["comune"] = df["NomeOrganizzazione"].str.replace("COMUNE DI ", "").str.upper()
    df["Data_det_assegnazione"] = pd.to_datetime(df["Data_det_assegnazione"], format="%d/%m/%Y %H:%M:%S", errors="ignore")
    df["Data_det_revoca"] = df["Data_det_revoca"].fillna("")
    df["Data_det_revoca"] = pd.to_datetime(df["Data_det_revoca"], format="%d/%m/%Y %H:%M:%S", errors="ignore")
    df.reset_index(drop=True, inplace=True)
    project.log_dataitem("comuni", data=df, kind='table', index=False)

    # valutazioni: process dates
    source_url =  data_dir + '/' + "valutazioni" + ".txt"
    df = pd.read_csv(source_url, encoding="windows-1251", delimiter=";")
    df["data_pub"] = pd.to_datetime(df["data_pub"], format="%d/%m/%Y %H:%M:%S", errors="ignore")
    df.reset_index(drop=True, inplace=True)
    project.log_dataitem("valutazioni", data=df, kind='table', index=False)

In [None]:
func_convert_all = project.new_function(name="convert_all",
                         kind="python",
                         python_version="PYTHON3_10",
                         source={"source": "src/convert-all.py", "handler": "convert_all"})

In [None]:
run_convert_all = func_convert_all.run(action="job",inputs={"source_artifact": di.key},outputs={}, local_execution=False)

In [None]:
%%writefile "src/convert-aziendali.py"

import pandas as pd
from os import path, makedirs
import zipfile
import numpy as np
        
file_basepath = "daticomuni"

def convert_aziendali(project, source_artifact):
    converters={
        '2024_06_25 PIANI AZIENDALI': {'IDorganizzazione': np.int64, 'ANNUALITA': np.int64, 'Versione': np.int64, 'AnnoCompilazione': np.int64, 'CodiceCampoAzione': np.int64, 'CodiceTassonomiaAzione': np.int64, 'BeneF': np.int64, 'BeneM': np.int64, 'IDdettaglioAccorpamento': np.int64},
        'NuovaTassonomia': {},
        'T_NuovaTassonomia_DettaglioRev': {},
    }

    data_dir = f"{file_basepath}/data"
    try:
        shutil.rmtree(data_dir)
    except:
        print("Error deleting data dir")
                
    # Create the directory for the data
    if not path.exists(data_dir):
        makedirs(data_dir)
        
    try:
        archiveFile =source_artifact.download(data_dir) # this must change in the function
        with zipfile.ZipFile(archiveFile, 'r') as zip_ref:
            zip_ref.extractall(data_dir)    
    except:
        print("Error downloading data")
        
    for ds_name in ["PIANI_AZIENDALI", "NuovaTassonomia", "T_NuovaTassonomia_DettaglioRev"]: 
        source_url = data_dir + '/' + ds_name + ".csv"
        df = pd.read_csv(source_url, encoding="windows-1251", delimiter=",")
        # df = pd.read_excel(input_data.get(), sheet_name=0, header=0, converters=converters[ds_name])
        df.reset_index(drop=True, inplace=True)
        project.log_dataitem(ds_name, data=df, kind='table', index=False)        



In [None]:
func_az = project.new_function(name="convert_aziendali",
                         kind="python",
                         python_version="PYTHON3_10",
                         source={"source": "src/convert-aziendali.py", "handler": "convert_aziendali"})

In [None]:
run_convert_az = func_az.run(action="job", inputs={"source_artifact": di.key}, outputs={}, local_execution=False)

In [None]:
run_convert_az.refresh().status.state