In [15]:
import pandas as pd
import mlflow.pyfunc
from zipfile import ZipFile


In [16]:
pip freeze | grep pandas


pandas==1.4.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
inputDF = pd.read_csv("/home/dorian/punchplatform/demos/starter-platform/resources/radar/data/train-radar.csv")

# Prediction function implementation and test

In [5]:
# Fonction pour déterminer la prédiction
def determine_prediction(row):
    if row['is_ocean'] == 1:
        return 'bateau'
    elif row['alt'] > 0:
        return 'avion'
    else:
        return 'véhicule terrestre'

# Appliquer la fonction à chaque ligne
inputDF['prediction'] = inputDF.apply(determine_prediction, axis=1)

In [6]:
inputDF.head()

Unnamed: 0,id,lon,lat,alt,frequence,is_ocean,timestamp,prediction
0,8,56,55,0,21638,1,Nov 28 11:18:45,bateau
1,9,120,21,3705,6192,1,Nov 28 11:18:45,bateau
2,8,98,163,0,2847,1,Nov 28 11:18:45,bateau
3,8,37,123,3792,12060,0,Nov 28 11:18:45,avion
4,7,109,90,0,7741,1,Nov 28 11:18:45,bateau


# MLFOW model builder

In [5]:
import mlflow.pyfunc
from zipfile import ZipFile
import os
import shutil
import datetime



group = "io.model"
name = "vehicle-predictor"
version = "1.0"

PUNCH_MODEL_CREATION_DIR = f'/tmp/punch_artifact_env/python/{group.replace(".", "/")}/{name}/{version}/'

shutil.rmtree(PUNCH_MODEL_CREATION_DIR, ignore_errors=True)
os.makedirs(PUNCH_MODEL_CREATION_DIR, exist_ok=True)

class vehiclePredictorWrapper(mlflow.pyfunc.PythonModel):

# Fonction pour déterminer la prédiction
    def predict(self, context, model_input):
        def determine_prediction(row):
            if row['is_ocean'] == 1:
                return 'bateau'
            elif row['alt'] > 0:
                return 'avion'
            else:
                return 'véhicule terrestre'

        return model_input.apply(determine_prediction, axis=1)

# Supposez que `model` est votre modèle scikit-learn
model_wrapper = vehiclePredictorWrapper()

# Maintenant, sauvegardez le wrapper
mlflow.pyfunc.save_model(python_model=model_wrapper, path=PUNCH_MODEL_CREATION_DIR)

with ZipFile(f'{PUNCH_MODEL_CREATION_DIR}/{name}_{version}.zip', 'w') as zipObj:
    for folderName, subfolders, filenames in os.walk(PUNCH_MODEL_CREATION_DIR):
        for filename in filenames:
            if filename != f'{name}_{version}.zip':
                filePath = os.path.join(folderName, filename)
                zipObj.write(filePath, filename)

# create metadata file
metadata = (f'type: model\n'
            f'group: {group}\n'
            f'logo: https://punchplatform.com/wp-content/uploads/2020/01/logos_png_logo_punch_color2019_marge.png\n'
            f'version: {version}\n'
            f'artifact: {name}\n'
            f'displayName: {name}\n'
            f'createdAt: {int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000)}\n'
            f'description: Vehicle predictor model pandas(2.0.0), sklearn(1.1.1)')

with open(f'{PUNCH_MODEL_CREATION_DIR}/metadata.yml', 'w') as f:
    f.write(metadata)
    
# make artifact zip
with ZipFile(f'{PUNCH_MODEL_CREATION_DIR}/artifact_{name}_{version}.zip', 'w') as zipObj:
    zipObj.write(f'{PUNCH_MODEL_CREATION_DIR}/{name}_{version}.zip', f'{name}_{version}.zip')
    zipObj.write(f'{PUNCH_MODEL_CREATION_DIR}/metadata.yml', 'metadata.yml')


# LOADING TEST

In [12]:
ppf_get_model = group+":"+name+":"+version
group, artifact, version = ppf_get_model.split(":")
model_path = f'/tmp/punch_artifact_env/python/{group.replace(".", "/")}/{artifact}/{version}/'
print(model_path)
output = model_path

/tmp/punch_artifact_env/python/io/models/vehicle-predictor/1.0/


In [13]:
loaded_model = mlflow.pyfunc.load_model(model_path)

In [16]:
testDF = pd.read_csv("/home/dorian/punchplatform/demos/starter-platform/resources/radar/data/train-radar.csv")
testDF["prediction"] = loaded_model.predict(testDF)

In [17]:
testDF.head()

Unnamed: 0,id,lon,lat,alt,frequence,is_ocean,timestamp,prediction
0,8,56,55,0,21638,1,Nov 28 11:18:45,bateau
1,9,120,21,3705,6192,1,Nov 28 11:18:45,bateau
2,8,98,163,0,2847,1,Nov 28 11:18:45,bateau
3,8,37,123,3792,12060,0,Nov 28 11:18:45,avion
4,7,109,90,0,7741,1,Nov 28 11:18:45,bateau


In [17]:
compressedDF = pd.read_csv("/home/dorian/Downloads/train-radar.csv.gz", encoding="UTF-8", sep=",", compression="gzip")

In [18]:
compressedDF.head()

Unnamed: 0,id,lon,lat,alt,frequence,is_ocean,timestamp
0,8,56,55,0,21638,1,Nov 28 11:18:45
1,9,120,21,3705,6192,1,Nov 28 11:18:45
2,8,98,163,0,2847,1,Nov 28 11:18:45
3,8,37,123,3792,12060,0,Nov 28 11:18:45
4,7,109,90,0,7741,1,Nov 28 11:18:45
