# 1. create a project

In [None]:
import digitalhub as dh
import os

project = dh.get_or_create_project(f"my-test-project-{os.environ['USER']}")
project

# 2. Create a function and run it locally

In [None]:
%%writefile "hellojob.py"

def hello(project):
    print("Hello Job!")

In [None]:
func = project.new_function(name="hello-job",
                            kind="python",
                            python_version="PYTHON3_10",
                            code_src="hellojob.py",
                            handler="hello")

In [None]:
run = func.run("job", wait=True, local_execution=True)

# 3. More complex example - geodata 

Do some geodata exploration using public data

In [None]:
%pip install geopandas contextily

In [None]:
import geopandas 


url = "https://dati.meteotrentino.it/service.asmx/getHumidexGeoJson"
df = geopandas.read_file(url)
df.head()

In [None]:
import contextily as cx

ax = df.plot()
cx.add_basemap(ax, crs=df.crs)
ax.figure.savefig('foo.pdf')

## 3.1. Create and run function locally

In [None]:
%%writefile "hellogeo.py"

import geopandas 
import contextily as cx

def geoprocessing(project):
    url = "https://dati.meteotrentino.it/service.asmx/getHumidexGeoJson"
    df = geopandas.read_file(url)
    df.head()
    ax = df.plot()
    cx.add_basemap(ax, crs=df.crs)
    ax.figure.savefig('foo.pdf')

In [None]:
func = project.new_function(name="geo-job",
                            kind="python",
                            python_version="PYTHON3_10",
                            code_src="hellogeo.py",
                            handler="geoprocessing"
                           )

In [None]:
run = func.run("job", wait=True, local_execution=False)

# 4. Resource management

In [None]:
%%writefile "hellores.py"

import pandas as pd

def resources(project):
    # Define the size of the dataset
    num_rows = 40000000  # 40 million rows
    
    # Example DataFrame with inefficient datatypes
    data = {'A': [1, 2, 3, 4],
            'B': [5.0, 6.0, 7.0, 8.0]}
    df = pd.DataFrame(data)
    
    # Replicate the DataFrame to create a larger dataset
    df_large = pd.concat([df] * (num_rows // len(df)), ignore_index=True)

In [None]:
func = project.new_function(name="resource-job",
                            kind="python",
                            python_version="PYTHON3_10",
                            code_src="hellores.py",
                            handler="resources"
                           )

In [None]:
run = func.run("job", wait=True, local_execution=False)

In [None]:
## try changing the resources: resources={"mem": "8Gi"}

In [None]:
## append file storage operation

# url ="https://huggingface.co/datasets/kitofrank/RFUAV/resolve/main/DEVENTION%20DEVO.rar"

# import urllib.request
# urllib.request.urlretrieve(url, "download.rar")