In [42]:
# ignore warning
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [43]:
import numpy as np

# Load training samples and class labels from file
def loadFeatureData(data_file):
    with open(data_file) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    X = []
    y = []
    for i in range(len(content)):
        parts = content[i].split("\t")
        y.append(int(parts[0]))
        vector = parts[1].split(" ")
        vector = [int(x.strip()) for x in vector]
        X.append(vector)
    return X, y

X_train, y_train = loadFeatureData("data/feature_vector_training_data.txt")
# print("Training data shape: " + str(X_train.shape))
# print("Training label shape: " + str(y_train.shape))
print(X_train[:2])
print(y_train[:2])

[[9, 1, 47083, 73162, 0, 47083, 47083, 60157, 0, 73161], [9, 1, 73162, 73162, 0, 47083, 0, 47083, 40046, 45559]]
[1, 1]


In [44]:
# Generating Model

# Initialize a SVM model
from sklearn import svm

model = svm.SVC(random_state=0)
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

In [46]:
# Save the model
from sklearn.externals import joblib

joblib.dump(model, "model/svm_model.sav")

['model/svm_model.sav']

In [48]:
# Evaluating the Model
X_test, y_test = loadFeatureData("data/feature_vector_test_data.txt")
y_pred = model.predict(X_test)

In [51]:
from sklearn import metrics

print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("F1-score:", metrics.f1_score(y_test, y_pred))

('Accuracy:', 0.6754002911208151)
('Precision:', 1.0)
('Recall:', 0.07851239669421488)
('F1-score:', 0.14559386973180077)


In [52]:
from sklearn import tree

model = tree.DecisionTreeClassifier()
model = model.fit(X_train, y_train)

joblib.dump(model, "model/decision_tree_model.sav")

y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("F1-score:", metrics.f1_score(y_test, y_pred))

('Accuracy:', 0.6593886462882096)
('Precision:', 0.518348623853211)
('Recall:', 0.4669421487603306)
('F1-score:', 0.49130434782608695)


In [53]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)

joblib.dump(model, "model/naive_bayes_model.sav")

y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("F1-score:", metrics.f1_score(y_test, y_pred))

('Accuracy:', 0.43377001455604075)
('Precision:', 0.3675675675675676)
('Recall:', 0.8429752066115702)
('F1-score:', 0.5119196988707654)
