In [None]:
import re
from urllib.parse import urlparse
from joblib import dump, load
import ast
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score

In [None]:
def preprocess_text(text):
  return " ".join(text.lower().split(" "))

def event_to_text(event):
  return preprocess_text(str(event.app) + " " + str(event.title) + " " + str(event.url))

In [None]:
df = pd.read_csv("data/labeled_events.csv", parse_dates=["start", "end"], converters={'topics': ast.literal_eval})
df["x"] = df.apply(event_to_text, axis=1)
df["y"] = df["topics"].str.len() > 0
df

In [None]:
# Step 1: Load the data
df = df.copy()
x = df["x"]
y = df["y"]

# Step 2: Split the model into Train and Test dataset
Train_X, Test_X, Train_Y, Test_Y = model_selection.train_test_split(x, y,test_size=0.3)

# Step 3: Encode the data
Encoder = LabelEncoder()
Train_Y = Encoder.fit_transform(Train_Y)
Test_Y = Encoder.fit_transform(Test_Y)

# Step 4: Vectorize the words by using the TF-IDF vectorizer
vectorizer = TfidfVectorizer(max_features=5000)
vectorizer.fit(x.astype(str))

Train_X_vector = vectorizer.transform(Train_X)
Test_X_vector = vectorizer.transform(Test_X)

# Step 5: Train the model
svc = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
svc.fit(Train_X_vector, Train_Y)

# Step 6: Predict the labels on the validation dataset
predictions = svc.predict(Test_X_vector)

# Step 7: Evaluate the accuracy of the model
print("Support Vector Classifier accuracy: ", accuracy_score(predictions, Test_Y))

# Step 8: Save the vectorizer and model
dump(vectorizer, 'vectorizer.joblib')
dump(svc, 'svc.joblib') 

In [None]:
svc = load('data/svc.joblib')
vectorizer = load('data/vectorizer.joblib')

def predict_svm(docs):
  processed_docs = map(preprocess_text, docs)
  vector = vectorizer.transform(docs)
  return svc.predict(vector)

predict_svm([
             "Calculus", 
             "Amazon Shopping", 
             "WGU", 
             "Facebook", 
             "JavaFX", 
             "youtube.com", 
             "my.wgu.edu", 
             "Twitter", 
             "Examity",
             "Instagram",
             "zyBooks",
             "news.ycombinator.com",
             "uCertify",
             "Amazon Shopping",
             "devenv.exe",
             "Reddit",
             "WINWORD.exe",
             "Gmail",
             "C191 Operating Systems"
             ])