In [102]:
from mysklearn import myclassifiers, myevaluation, mypytable, myutils
import importlib

In [103]:
importlib.reload(mypytable)
# first we are going to import the dataset into a mypytable object
mytable = mypytable.MyPyTable()
mytable.load_from_file("Fraud_chop.csv")

print(mytable.column_names)

print(mytable.data[0])

# we know from my datachoping notebook what each column is and does, so for this example we are going to 
# ignore nameOrig (index of 3) nameDest (index of 6)
# we will also be dropping isFlaggedFraud (index of -1)
mytable.drop_cols(['step','type','nameOrig', 'oldbalanceOrg', 'newbalanceOrig', 'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFlaggedFraud'])

print(mytable.data[0])
print(mytable.column_names)


['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig', 'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud', 'isFlaggedFraud']
[241.0, 'CASH_OUT', 325470.07, 'C570536992', 325470.07, 0.0, 'C437423112', 19771.15, 345241.22, 1.0, 0.0]
[325470.07, 1.0]
['amount', 'isFraud']


In [104]:
data = mytable.data
headers = mytable.column_names

# we also will make x and y
X = []
y = []
for row in data:
    X.append(row[0:len(row)-1])
    y.append(row[-1])
print(len(y))
print(len(X))

1642
1642


In [105]:
X_train_folds_indexes, X_test_folds_indexes = myevaluation.kfold_cross_validation(X,13)

X_test_folds,X_train_folds,y_test_folds,y_train_folds = myutils.indexes_to_fold(X_test_folds_indexes, X_train_folds_indexes, X, y)
X_test,X_train,y_test,y_train = myutils.folds_to_train_test(X_test_folds,X_train_folds,y_test_folds,y_train_folds)

In [106]:
dummy_clf = myclassifiers.MyDummyClassifier()
dummy_clf.fit(X_train, y_train)
dummy_Y_predicted = dummy_clf.predict(X_test)


In [107]:
dummy_accuracy = myevaluation.accuracy_score(y_test, dummy_Y_predicted)
dummy_BinaryF1 = myevaluation.binary_f1_score(y_test, dummy_Y_predicted)
dummy_Binary_precision = myevaluation.binary_precision_score(y_test, dummy_Y_predicted)
dummy_Binary_recall = myevaluation.binary_recall_score(y_test, dummy_Y_predicted)

print("Dummy accuracy:", dummy_accuracy)
print("Dummy Binary F1:", dummy_BinaryF1)
print("Dummy Binary precision:", dummy_Binary_precision)
print("Dummy Binary recall:", dummy_Binary_recall)

Dummy accuracy: 0.5006090133982948
Dummy Binary F1: 0
Dummy Binary precision: 0
Dummy Binary recall: 0


In [108]:
NaiveBayes_clf = myclassifiers.MyNaiveBayesClassifier()
NaiveBayes_clf.fit(X_train, y_train)
NaiveBayes_Y_predicted = NaiveBayes_clf.predict(X_test)

In [109]:
NB_accuracy = myevaluation.accuracy_score(y_test, NaiveBayes_Y_predicted, normalize=True)
NB_BinaryF1 = myevaluation.binary_f1_score(y_test, NaiveBayes_Y_predicted)
NB_Binary_precision = myevaluation.binary_precision_score(y_test, NaiveBayes_Y_predicted)
NB_Binary_recall = myevaluation.binary_recall_score(y_test, NaiveBayes_Y_predicted)

print("Naive Bayes accuracy:", NB_accuracy)
print("Naive Bayes Binary F1:", NB_BinaryF1)
print("Naive Bayes Binary precision:", NB_Binary_precision)
print("Naive Bayes Binary recall:", NB_Binary_recall)

Naive Bayes accuracy: 0.5006090133982948
Naive Bayes Binary F1: 0
Naive Bayes Binary precision: 0
Naive Bayes Binary recall: 0


In [110]:
tree_clf = myclassifiers.MyDecisionTreeClassifier()
tree_clf.fit(X_train, y_train)
tree_Y_predicted = tree_clf.predict(X_test)

In [111]:
tree_accuracy = myevaluation.accuracy_score(y_test, tree_Y_predicted, normalize=True)
tree_BinaryF1 = myevaluation.binary_f1_score(y_test, tree_Y_predicted)
tree_Binary_precision = myevaluation.binary_precision_score(y_test, tree_Y_predicted)
tree_Binary_recall = myevaluation.binary_recall_score(y_test, tree_Y_predicted)

print("Decision Tree accuracy:", tree_accuracy)
print("Decision Tree Binary F1:", tree_BinaryF1)
print("Decision Tree Binary precision:", tree_Binary_precision)
print("Decision Tree Binary recall:", tree_Binary_recall)

Decision Tree accuracy: 1.0
Decision Tree Binary F1: 1.0
Decision Tree Binary precision: 1.0
Decision Tree Binary recall: 1.0


In [112]:
print(X[:10])

[[325470.07], [223730.4], [1191183.65], [6512846.44], [251832.96], [1773151.32], [346183.99], [376367.26], [40486.25], [1638865.05]]


In [113]:
reg_clf = myclassifiers.MySimpleLinearRegressor()
reg_clf.fit(X_train, y_train)
reg_y_predicted = reg_clf.predict(X_test)

In [115]:
reg_y_predicted_rounded = []
for val in reg_y_predicted:
    reg_y_predicted_rounded.append(round(val))

reg_accuracy = myevaluation.accuracy_score(y_test, reg_y_predicted_rounded)
reg_BinaryF1 = myevaluation.binary_f1_score(y_test, reg_y_predicted_rounded)
reg_Binary_precision = myevaluation.binary_precision_score(y_test, reg_y_predicted_rounded)
reg_Binary_recall = myevaluation.binary_recall_score(y_test, reg_y_predicted_rounded)

print("Linear Regressor accuracy:", reg_accuracy)
print("Linear Regressor Binary F1:", reg_BinaryF1)
print("Linear Regressor Binary precision:", reg_Binary_precision)
print("Linear Regressor Binary recall:", reg_Binary_recall)

Linear Regressor accuracy: 0.669305724725944
Linear Regressor Binary F1: 0.7456674473067915
Linear Regressor Binary precision: 0.6053231939163498
Linear Regressor Binary recall: 0.9707317073170731
