In [19]:
import argparse
import os
import os.path
import sys
import pandas as pd
import pickle
import json

import unittest

# Find the first file that matches the pattern.


def find_file(suffix):
    current_dir = os.path.dirname(os.path.abspath("../../development/models/Sklearn_GBT/Sklearn_GBT.pickle"))
    for file in os.listdir(current_dir):
        if file.endswith(suffix):
            filename = file
            return os.path.join(current_dir, filename)

    return None


def load_var_names(filename):
    var_file = find_file(filename)
    if var_file is None:
        return None
    if os.path.isfile(var_file):
        with open(var_file) as f:
            json_object = json.load(f)

        names = []
        for row in json_object:
            names.append(row["name"])
        return names
    else:
        print('Didnot find file: ', filename)
        return None


def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3


def load_data_by_input_vars(data):
    names = load_var_names('inputVar.json')
    if names is None:
        return data
    else:
        newcolumns = intersection(list(data.columns), names)
        return data[newcolumns]
    
def run(model_file, input_file, output_file):

    if model_file is None:
        print('Not found Python pickle file!')
        sys.exit()

    if not os.path.isfile(input_file):
        print('Not found input file', input_file)
        sys.exit()

    inputDf = pd.read_csv(input_file).fillna(0)

    output_vars = load_var_names('outputVar.json')

    in_dataf = load_data_by_input_vars(inputDf)

    model = open(model_file, 'rb')
    pkl_model = pickle.load(model)
    model.close()

    outputDf = pd.DataFrame(pkl_model.predict_proba(in_dataf)).round(1)
    print(outputDf.head())

    if output_vars is None:
        outputcols = map(lambda x: 'P_' + str(x), list(pkl_model.classes_))
    else:
        outputcols = map(lambda x: output_vars[x], list(pkl_model.classes_))
    
    outputDf.columns = outputcols


    # merge with input data
    outputDf = pd.merge(inputDf, outputDf, how='inner',
                        left_index=True, right_index=True)
    print('printing first few lines...')
    print(outputDf.head())

In [20]:
import sklearn
sklearn.__version__

'1.0.2'

In [21]:
run("../../development/models/Sklearn_GBT/Sklearn_GBT.pickle","../../data/test.csv","result.csv")

     0    1
0  0.2  0.8
printing first few lines...
   BAD  LOAN  MORTDUE  VALUE   REASON    JOB   YOJ  DEROG  DELINQ      CLAGE  \
0    1  1100    25860  39025  HomeImp  Other  10.5      0       0  94.366667   

   NINQ  CLNO  DEBTINC  P_BAD0  P_BAD1  
0     1     9      0.0     0.2     0.8  
