In [1]:
#Download the fraud dataset, preprocess it and save three files train_a.csv, train_b.csv and test.csv
#Train a model with train_a.csv, test it on test.csv and save the model and metrics
#Train a model with train_b.csv, test it on test.csv and save the model and metrics
#Switch back to the model created with train_a.csv, and verify that its metrics on test match the computed metrics

In [34]:
import pandas as pd
import numpy as np

In [35]:
#Read split_1.csv, build a simple linear regression model and test on split_3.csv
data = pd.read_csv('split_1.csv')

In [36]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', MinMaxScaler(), ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']),
        ('cat', OneHotEncoder(), ['type'])
    ])
preprocessor.fit(data.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))

In [37]:
model_input = preprocessor.transform(data.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))

In [38]:
target = data['isFraud']

In [39]:
from sklearn.linear_model import LogisticRegression
reg = LogisticRegression(penalty='l1', C=1.0, solver='liblinear')
reg.fit(model_input, target)

In [40]:
test = pd.read_csv('split_3.csv')

In [41]:
test_input = preprocessor.transform(test.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))

In [42]:
test_target = test['isFraud']

In [43]:
test_predict = reg.predict(test_input)

In [44]:
from sklearn.metrics import accuracy_score
print('Accuracy with split_1 : ', accuracy_score(test_target, test_predict))

Accuracy with split_1 :  0.9177638587072308


In [17]:
import pickle 
with open('model.p', 'wb') as f:
    pickle.dump(reg, f)

In [45]:
NEPTUNE_KEY = ''

In [46]:
import neptune
run = neptune.init_run(project='pnuthanakalva/DVTest', api_token=NEPTUNE_KEY)

run["datasets/train"].track_files('split_1.csv')
run["datasets/test"].track_files('split_3.csv')
run["metrics/test_score"] = accuracy_score(test_target, test_predict)
run['models/model'].track_files('model.p')

run.stop()

  run = neptune.init_run(project='pnuthanakalva/DVTest', api_token=NEPTUNE_KEY)


https://app.neptune.ai/pnuthanakalva/DVTest/e/DVTES-1
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.
All 4 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/pnuthanakalva/DVTest/e/DVTES-1/metadata


In [23]:
#Repeat the same with the second model

In [50]:
data = pd.read_csv('split_1.csv')
preprocessor = ColumnTransformer(
    transformers=[
        ('num', MinMaxScaler(), ['amount', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']),
        ('cat', OneHotEncoder(), ['type'])
    ])
preprocessor.fit(data.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))
model_input = preprocessor.transform(data.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))
target = data['isFraud']
reg = LogisticRegression(penalty='l1', C=1.0, solver='liblinear')
reg.fit(model_input, target)
test = pd.read_csv('split_3.csv')
test_input = preprocessor.transform(test.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))
test_target = test['isFraud']
test_predict = reg.predict(test_input)
print('Accuracy with split_2 : ', accuracy_score(test_target, test_predict))

Accuracy with split_2 :  0.8496740380928033


In [51]:
with open('model.p', 'wb') as f:
    pickle.dump(reg, f)

In [52]:
import neptune
run = neptune.init_run(project='pnuthanakalva/DVTest', api_token=NEPTUNE_KEY)

run["datasets/train"].track_files('split_1.csv')
run["datasets/test"].track_files('split_3.csv')
run["metrics/test_score"] = accuracy_score(test_target, test_predict)
run['models/model'].track_files('model.p')

run.stop()

https://app.neptune.ai/pnuthanakalva/DVTest/e/DVTES-2
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.
All 4 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/pnuthanakalva/DVTest/e/DVTES-2/metadata


In [54]:
model = neptune.init_model(with_id="DVTES-1",project='pnuthanakalva/DVTest', api_token=NEPTUNE_KEY)

# The path exists in the local representation
if model.exists("models/model"):
    # However, the tracking call may have not reached Neptune servers yet
    model["models/model"].download()  # Error: the field does not exist

model.wait()

ModelNotFound: Model pnuthanakalva/DVTest/DVTES-1 not found.

In [31]:
with open('model.p', 'rb') as f:
    reg = pickle.load(f)

In [33]:
test = pd.read_csv('split_3.csv')
test_input = preprocessor.transform(test.drop(columns=['step', 'nameOrig', 'nameDest', 'isFlaggedFraud']))
test_target = test['isFraud']
test_predict = reg.predict(test_input)
print('Accuracy: ', accuracy_score(test_target, test_predict))

Accuracy:  0.9308023349950999
