In [21]:
import pandas as pd
import numpy as np
import pickle
import os
import sys
import logging
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import json


In [11]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [15]:
with open('src/config.json','r') as f:
    config = json.load(f) 

model_path = os.path.join(config['output_model_path']) 
test_data_path = os.path.join(config['test_data_path']) 

In [24]:
def score_model():
    # 1. read test data 
    logging.info("Loading testdata.csv")
    test_df = pd.read_csv(os.path.join(test_data_path, 'testdata.csv'))
    # 2. load model 
    logging.info("Loading trained model")
    model = pickle.load(open( os.path.join(model_path, 'trainedmodel.pkl'),'rb'))
    # 3. prepare test data  
    logging.info("Preparing test data")
    y_true = test_df.pop('exited')
    X_df = test_df.drop(['corporation'], axis=1)
    # 4. predict test data  
    logging.info("Predicting test data")
    y_pred = model.predict(X_df)
    f1 = f1_score(y_true, y_pred)
    print(f"f1 score = {f1}")
    # 5. record information 
    logging.info("Saving scores to text file")
    with open(os.path.join(model_path, 'latestscore.txt'), 'w') as file:
        file.write(f"f1 score = {f1}")

In [25]:
if __name__ == '__main__':
    logging.info("Running scoring.py")
    score_model()

INFO:root:Running scoring.py
INFO:root:Loading testdata.csv
INFO:root:Loading trained model
INFO:root:Preparing test data
INFO:root:Predicting test data
f1 score = 0.5714285714285715
INFO:root:Saving scores to text file
