In [11]:
from joblib import dump, load
import pickle
# SKLEARN SVM PORTION

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern
import numpy as np
from IPython.display import clear_output, display, HTML, Javascript

# Prepare Data

In [4]:
path = 'validation_df_072523.pkl'
X = pickle.load(open(path,'rb'))# X = X[0:10]
# y = X[0:10]['label']
y =  X[:]['label']

In [5]:
# add median time column
X['median_time'] = 0 # Initialize all median_time column to 0 for all rows 

ids = np.unique(X['vid_id']) # Returns all video_id's of the dataset - sorted and without repetitions

for id in ids:  # loops through all video_id's
    vid_match_idx = X['vid_id'] == id  # boolean expression: T or F 0 - returns all indexes of entries in data with matching id
    times = X.loc[vid_match_idx]['time'] # gets the times of all data samples with matching video ID
    X.loc[vid_match_idx, 'median_time'] = np.median(times) # Now we take the median of all the times and for all videos with matching video id, 
                                                        #the median_time column gets populated with the actual median of the time stamp


# make new training and test set from X
train_idx = (X['time'] <= X['median_time'])  # Here, if the current time for each sample is less than its median_time, then the sample index is put into train_idx
test_idx = (X['time'] > X['median_time'])    # Otherwise, if the sample's time is larger than the median_time, then it goes into the test_idx

X_train = X.loc[train_idx]  #Here we build the train dataset using the train indexes from the previous section
y_train = y.loc[train_idx] # Get the matching y labels and put them into the train
X_test = X.loc[test_idx]  # Get samples with matching test idx and put it into a test data set
y_test = y.loc[test_idx] # Get y labels with corresponding indexes that match the test_idx

In [6]:
#TRAIN DATA

train_imgpath = X_train['img_path']
train_originallabel = X_train['original_label']
train_metadata = X_train['meta_data']
train_bboxs = X_train['bboxs']
X_train = X_train.drop(['label','original_label', 'img_path','meta_data','bboxs','median_time'], axis=1)


#VALIDATION DATA
val_imgpath = X_test['img_path']
val_originallabel = X_test['original_label']
val_metadata = X_test['meta_data']
val_bboxs = X_test['bboxs']
X_test = X_test.drop(['label','original_label', 'img_path','meta_data','bboxs', 'median_time'], axis=1)


In [7]:
data = X_test.copy()
data['label'] = y_test
data['original_label'] = val_originallabel
data['img_path'] = val_imgpath
data['metadata'] = val_metadata
data['bboxs'] = val_bboxs

# Fit and Save Models

In [8]:
#### ---------- BASE FUNCTIONS -------------

kDP_m1_feat = DotProduct(sigma_0 = 0.4)

def m1(X, Y):
    '''
    m1: 
    k = 0.8 * kDP_feat(sigma_0 = 0.4)
    '''
    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    gram_matrix = 0.8 * kDP_m1_feat(X1,Y1)
    return gram_matrix


'''
m2:
k = 2 * kRBF_feat(lengthscale = 10)
'''
kRBF_m2_feat = RBF(length_scale = 10)

def m2(X,Y):
    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    gram_matrix = 2 * kRBF_m2_feat(X1,Y1)
    return gram_matrix


In [9]:
svm1 = make_pipeline(StandardScaler(), SVC(kernel=m1, C = 0.08))
svm1.fit(X_train, y_train)

display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [14]:
# Save the fitted SVM model to a file using joblib
dump(svm1, './saved_models/svm_m1.joblib')

['./saved_models/svm_m1.joblib']

In [15]:
svm2 = make_pipeline(StandardScaler(), SVC(kernel=m2, C = 0.2))
svm2.fit(X_train, y_train)


display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [16]:
# Save the fitted SVM model to a file using joblib
dump(svm2, './saved_models/svm_m2.joblib')

['./saved_models/svm_m2.joblib']

In [17]:
#### ---------- BASE FUNCTIONS -------------


'''
m3: 
C = 0.16
k = 0.5 * kDP_feat(sigma_0 = 0.9) + 3 * kRBF_time(lengthscale = 15) * kRBF_vidid(lengthscale = 1e-7)
'''

kDP_m3_feat = DotProduct(sigma_0 = 0.9)
kRBF_m3_time = RBF(length_scale = 15)
kRBF_m3_vidid = RBF(length_scale = 1e-7)

def m3(X, Y):

    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    Xt = np.reshape(X[:,256],(-1,1))
    Yt = np.reshape(Y[:,256],(-1,1))
    Xv = np.reshape(X[:,257],(-1,1))
    Yv = np.reshape(Y[:,257],(-1,1))
    gram_matrix = 0.5 * kDP_m3_feat(X1,Y1) + 3 * kRBF_m3_time(Xt,Yt) * kRBF_m3_vidid(Xv,Yv)
    return gram_matrix


'''
m4: 
C = 0.2
k = 3 * kRBF_feat(lengthscale = 10) + 1 * kRBF_time(lengthscale = 10) * kRBF_vidid(lengthscale = 1e-7)
'''

kRBF_m4_feat = RBF(length_scale = 10)
kRBF_m4_time = RBF(length_scale = 10)
kRBF_m4_vidid = RBF(length_scale = 1e-7)

def m4(X, Y):

    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    Xt = np.reshape(X[:,256],(-1,1))
    Yt = np.reshape(Y[:,256],(-1,1))
    Xv = np.reshape(X[:,257],(-1,1))
    Yv = np.reshape(Y[:,257],(-1,1))
    
    gram_matrix = 3 * kRBF_m4_feat(X1,Y1) + 1 * kRBF_m4_time(Xt,Yt) * kRBF_m4_vidid(Xv,Yv)
    return gram_matrix


'''
m5:
C = 0.03
k = 0.8 * kDP_feat(sigma_0 = 0.9) + 1 * kDP_feat(sigma_0 = 0.9) * kRBF_time(lengthscale = 30) * kRBF_vidid(lengthscale = 1e-7)
'''

kDP_m5_feat = DotProduct(sigma_0 = 0.9)
kRBF_m5_time = RBF(length_scale = 30)
kRBF_m5_vidid = RBF(length_scale = 1e-7)

def m5(X, Y):

    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    Xt = np.reshape(X[:,256],(-1,1))
    Yt = np.reshape(Y[:,256],(-1,1))
    Xv = np.reshape(X[:,257],(-1,1))
    Yv = np.reshape(Y[:,257],(-1,1))
    
    gram_matrix = 0.8 * kDP_m5_feat(X1,Y1) + 1 * kDP_m5_feat(X1,Y1) * kRBF_m5_time(Xt,Yt) * kRBF_m5_vidid(Xv,Yv)

    return gram_matrix

'''
m6:
C = 0.1
c1 = 1.5 * kRBF_feat(lengthscale = 2.0) + 1.0 * kDP_feat(sigma_0 = 1.0) * kRBF_time(lengthscale = 25) * kRBF_vidid(lengthscale = 1e-7)
'''

kRBF_m6_feat = RBF(length_scale = 2.0)
kDP_m6_feat = DotProduct(sigma_0 = 1.0)
kRBF_m6_time = RBF(length_scale = 25)
kRBF_m6_vidid = RBF(length_scale = 1e-7)

def m6(X, Y):

    X1 = X[:,0:256]
    Y1 = Y[:,0:256]
    Xt = np.reshape(X[:,256],(-1,1))
    Yt = np.reshape(Y[:,256],(-1,1))
    Xv = np.reshape(X[:,257],(-1,1))
    Yv = np.reshape(Y[:,257],(-1,1))
    
    gram_matrix = 1.5 * kRBF_m6_feat(X1,Y1) + 1.0 * kDP_m6_feat(X1,Y1) * kRBF_m6_time(Xt,Yt) * kRBF_m6_vidid(Xv,Yv)

    return gram_matrix


In [18]:
'''
m3: 
C = 0.16
'''
svm3 = make_pipeline(StandardScaler(), SVC(kernel=m3, C = 0.16))
svm3.fit(X_train, y_train)

display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [19]:
# Save the fitted SVM model to a file using joblib
dump(svm3, './saved_models/svm_m3.joblib')

['./saved_models/svm_m3.joblib']

In [20]:
'''
m4: 
C = 0.2
'''

svm4 = make_pipeline(StandardScaler(), SVC(kernel=m4,C = 0.2))
svm4.fit(X_train, y_train)

display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [26]:
# Save the fitted SVM model to a file using joblib
dump(svm4,'./saved_models/svm_m4.joblib')

['./saved_models/svm_m4.joblib']

In [23]:
svm5 = make_pipeline(StandardScaler(), SVC(kernel=m5, C = 0.03))
svm5.fit(X_train, y_train)

display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [27]:
# Save the fitted SVM model to a file using joblib
dump(svm5,'./saved_models/svm_m5.joblib')

['./saved_models/svm_m5.joblib']

In [25]:
svm6 = make_pipeline(StandardScaler(), SVC(kernel=m6, C = 0.1))
svm6.fit(X_train, y_train)

display(Javascript("""
  var msg = new # Save the fitted SVM model to a file using joblib
dump(svm5,'./saved_models/svm_m5joblib')SpeechSynthesisUtterance();
  msg.text = "Model done fitting.";
  window.speechSynthesis.speak(msg);
"""))

<IPython.core.display.Javascript object>

In [28]:
# Save the fitted SVM model to a file using joblib
dump(svm6,'./saved_models/svm_m6.joblib')

['./saved_models/svm_m6.joblib']