In [None]:
import pickle
import pandas as pd
from constants import MODEL_PATH, CATEGORICAL, DATA_PATH, OUTPUT_PATH

In [2]:
def load_model(model_path):
    with open('model.bin', 'rb') as f_in:
        dv, model = pickle.load(f_in)
    return dv, model

def read_data(filename):
    df = pd.read_parquet(filename)

    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
    df['duration'] = df.duration.dt.total_seconds() / 60

    df = df[(df.duration >= 1) & (df.duration <= 60)].copy()

    df[CATEGORICAL] = df[CATEGORICAL].fillna(-1).astype('int').astype('str')

    return df

def upload_predictions(df, y_pred, output_file):

    df_result = pd.DataFrame()
    df_result['ride_id'] = df['ride_id']
    df_result['predictions'] = y_pred

    df_result.to_parquet(
        output_file,
        engine='pyarrow',
        compression=None,
        index=False
    )

In [None]:
def run(year, month):
    dv, model = load_model(MODEL_PATH)

    df = read_data(DATA_PATH)
    df['ride_id'] = f'{year}/{month}_' + df.index.astype('str')

    dicts = df[CATEGORICAL].to_dict(orient='records')
    X_val = dv.transform(dicts)
    y_pred = model.predict(X_val)

    upload_predictions(df, y_pred, OUTPUT_PATH)