In [None]:
import numpy as np
import os
import _pickle as pkl
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import chi2_kernel
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
FEATURE_DIR = "features"
EARLY_FUSION_DIR = "features/early_fusion"
with open("list/all.video") as f:
    all_videos = [l.strip() for l in f.readlines()]

with open('list/train.video') as f:
    train_videos = [l.strip() for l in f.readlines()]
    
with open('list/val.video') as f:
    val_videos = [l.strip() for l in f.readlines()]

with open('../all_test.video') as f:
    test_videos = [l.strip() for l in f.readlines()]
    
# train labels
train_labels = {}
with open("../all_trn.lst") as f:
    for line in f.readlines():
        file, label = line.strip().split()
        train_labels[file] = label
        
# val labels
val_labels = {}
with open("../all_val.lst") as f:
    for line in f.readlines():
        file, label = line.strip().split()
        val_labels[file] = label

## Generate and store early fusion features

Early fusion features are generated by concatenating the SoundNet and Resnet50 features in to one single vector

In [None]:
for file in tqdm(all_videos[:]):
    soundnet_file = os.path.join(FEATURE_DIR,"soundnet",file+".feats")
    resnet_file = os.path.join(FEATURE_DIR,"resnet50",file+".npy")
    places_file = os.path.join(FEATURE_DIR,"places",file+".npy")
    
    with open(soundnet_file,"rb") as f:
        soundnet_feature = np.loadtxt(f,delimiter=";")
    
    with open(resnet_file,"rb") as f:
        resnet_feature = np.load(f)
        
    with open(places_file,"rb") as f:
        places_feature = np.load(f)
        
    early_fusion = np.concatenate((soundnet_feature,resnet_feature,places_feature))
    dest_path = os.path.join(EARLY_FUSION_DIR,file+".npy")
    np.save(dest_path, early_fusion)

In [None]:
soundnet_feature.shape

### Create train and validation data for training classifiers

In [None]:
# Train data
def load_data(video_list,labels_map=None):
    features = []
    labels = []
    for file in tqdm(video_list):
        with open(os.path.join(EARLY_FUSION_DIR,file+".npy"),'rb') as f:
            feature = np.load(f)
            features.append(feature)
            
            if labels_map:
                label = labels_map[file]
                labels.append(label)
            
    features = np.vstack(features)
    if labels_map:
        return features,np.array(labels)
    else:
        return features
    
train_x, train_y = load_data(train_videos,train_labels)
val_x, val_y = load_data(val_videos,val_labels)    

## Downsample NULL class to equal that of events P00x

In [None]:
# Combine train and validation set into a single train dataset
combine = True
if combine:
    all_train_x = np.vstack([train_x,val_x])
    all_train_y = np.concatenate((train_y,val_y))

    # Shuffle dataset
    idxs = [i for i in range(all_train_x.shape[0])]
    np.random.shuffle(idxs)

    all_train_x = all_train_x[idxs]
    all_train_y = all_train_y[idxs]
else:
    all_train_x = train_x
    all_train_y = train_y
    
positive_examples = all_train_x[all_train_y!='NULL']
negative_examples = all_train_x[all_train_y=='NULL']

positive_labels = all_train_y[all_train_y!='NULL']
negative_labels = all_train_y[all_train_y == 'NULL']

np.random.shuffle(negative_examples)
negative_examples = negative_examples[:len(positive_labels)]
negative_labels = negative_labels[:len(positive_labels)]

combined_data = np.vstack((positive_examples,negative_examples))
combined_labels = np.concatenate((positive_labels,negative_labels))

# Shuffle the dataset
idxs = [i for i in range(len(combined_data))]
np.random.shuffle(idxs)

combined_data = combined_data[idxs]
combined_labels = combined_labels[idxs]

In [None]:
# Train 3 separate models for each event
# MLP parameters
MODEL_USED = 'rf'
MODEL_DIR = "models"
MODEL_NAME = 'ef_exp1'
EVENTS = ["NULL","P001","P002","P003"]

hidden_layers = (2048)
lr = 1e-3
batch_size = 150
alpha = 1e-5
loss = []


# Random Forest params
num_estimators = 500
max_depth = 30

for event in tqdm(EVENTS[:]):
    # Train for event
    event_y = (combined_labels==event).astype('int')
    if MODEL_USED == 'rf':
        model = RandomForestClassifier(n_estimators=num_estimators,max_depth=max_depth)
#         model = GradientBoostingClassifier(n_estimators=num_estimators,max_depth=max_depth)
    elif MODEL_USED == 'mlp':
        model = MLPClassifier(hidden_layer_sizes=hidden_layers,
                         alpha=alpha,
                         batch_size=batch_size,
                         learning_rate_init=lr,
                         max_iter=2000,
                            verbose=False,
                         )
    model.fit(combined_data,event_y)
    if MODEL_USED== 'mlp':
        loss.append(model.loss_)
        print("Event {} loss : {} ".format(event,model.loss_))
    # Save model
    model_path = os.path.join(MODEL_DIR,MODEL_NAME+"_" +event+".model")
    with open(model_path,"wb") as o:
        pkl.dump(model,o)

In [None]:
PRED_DIR = 'pred/EF'
MODE = 'EF'

for event in EVENTS[:]:
    # Load model for event and predict
    model_path = os.path.join(MODEL_DIR,MODEL_NAME+"_" +event+".model")
    with open(model_path,"rb") as l:
        model = pkl.load(l,encoding='bytes')
        val_pred = os.path.join(PRED_DIR,MODE+"_val_"+event+".csv")
        test_pred = os.path.join(PRED_DIR,MODE+"_"+event+".csv")
        
        with open(test_pred,"w") as o: 
            for video in tqdm(test_videos):
                feat_path = os.path.join(EARLY_FUSION_DIR,video+".npy")
                with open(feat_path,"rb") as f:
                    feat  = np.load(f,encoding='bytes')
                    feat = np.reshape(feat,(1,-1))
#                     print(feat.shape)

                    prediction = model.predict_proba(feat)
                    o.write("{}\n".format(prediction[0][1]))
#                     prediction = model.decision_function(feat)
#                     o.write("{}\n".format(prediction[0]))


                    
        with open(val_pred,"w") as o: 
            for video in tqdm(val_videos):
                feat_path = os.path.join(EARLY_FUSION_DIR,video+".npy")
                with open(feat_path,"rb") as f:
                    feat  = np.load(f,encoding='bytes')
                    feat = np.reshape(feat,(1,-1))
                    
                    prediction = model.predict_proba(feat)
                    o.write("{}\n".format(prediction[0][1]))
#                     prediction = model.decision_function(feat)
#                     o.write("{}\n".format(prediction[0]))
                     
p0 = open(PRED_DIR + "/" + MODE + "_val_NULL.csv").readlines()                    
p1 = open(PRED_DIR + "/" + MODE + "_val_P001.csv").readlines()
p2 = open(PRED_DIR + "/" + MODE + "_val_P002.csv").readlines()
p3 = open(PRED_DIR + "/" + MODE + "_val_P003.csv").readlines()

p0 = [float(p.strip()) for p in p0]
p1 = [float(p.strip()) for p in p1]
p2 = [float(p.strip()) for p in p2]
p3 = [float(p.strip()) for p in p3]

acc = [0,0,0,0]
total = [0,0,0,0]

THRESHOLD = 0
correct = 0
with open("../all_val.lst") as f:
    lines = f.readlines()
    for idx,line in enumerate(tqdm(lines)):
        filename, truth = line.strip().split()
        label = [p0[idx],p1[idx],p2[idx],p3[idx]]
        pred = np.argmax(label)
        if EVENTS[pred] == truth:
            acc[pred] += 1
        total[EVENTS.index(truth)] += 1
            
acc = np.array(acc)/np.array(total)
print("Event NULL val accuracy : {0:1.2f}\nEvent P001 val accuracy : {0:1.2f}\nEvent P002 val accuracy : {1:1.2f}\nEvent P003 val accuracy : {2:1.2f}\n"
      .format(acc[0],acc[1],acc[2],acc[3]))

# Write params in log
with open("logs.txt","a") as log:
    if MODEL_USED == 'mlp':
        log.write("{0}#{1}#{2}#{3}#{4}#{5:0.2f}#{6:0.2f}#{7:0.2f}#{8:0.2f}#{9:0.2f}#{10:0.2f}\n".format(MODEL_USED,hidden_layers,lr,batch_size,
                                                       alpha,loss[0],loss[1],loss[2],acc[0],acc[1],acc[2]))
    elif MODEL_USED == 'rf':
        log.write("{0}#{1}#{2}#{3:0.2f}#{4:0.2f}#{5:0.2f}\n".format(MODEL_USED,num_estimators,max_depth,acc[0],acc[1],acc[2]))


In [None]:
!python eval.py list/P001_val_label pred/EF/EF_val_P001.csv
!python eval.py list/P002_val_label pred/EF/EF_val_P002.csv
!python eval.py list/P003_val_label pred/EF/EF_val_P003.csv

In [None]:
p0 = open(PRED_DIR + "/" + MODE + "_NULL.csv").readlines()  
p1 = open(PRED_DIR + "/" + MODE + "_P001.csv").readlines()
p2 = open(PRED_DIR + "/" + MODE + "_P002.csv").readlines()
p3 = open(PRED_DIR + "/" + MODE + "_P003.csv").readlines()

p0 = [float(p.strip()) for p in p0]
p1 = [float(p.strip()) for p in p1]
p2 = [float(p.strip()) for p in p2]
p3 = [float(p.strip()) for p in p3]

THRESHOLD = 0
with open((MODE+"_kaggle_prediction.csv"),"w") as o:
    o.write("VideoID,Label\n")
    for idx,video in enumerate(tqdm(test_videos)):
        label = [p0[idx],p1[idx],p2[idx],p3[idx]]
        idx = np.argmax(label)
        
        pred = idx
            
        o.write("{},{}\n".format(video,pred))