In [1]:
import numpy as np
import os
import _pickle as pkl
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import chi2_kernel
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
FEATURE_DIR = "features"
FUSION_DIR = "features/late_fusion"
with open("list/all.video") as f:
    all_videos = [l.strip() for l in f.readlines()]

with open('list/train.video') as f:
    train_videos = [l.strip() for l in f.readlines()]
    
with open('list/val.video') as f:
    val_videos = [l.strip() for l in f.readlines()]

with open('../all_test.video') as f:
    test_videos = [l.strip() for l in f.readlines()]
    
# train labels
train_labels = {}
with open("../all_trn.lst") as f:
    for line in f.readlines():
        file, label = line.strip().split()
        train_labels[file] = label
        
# val labels
val_labels = {}
with open("../all_val.lst") as f:
    for line in f.readlines():
        file, label = line.strip().split()
        val_labels[file] = label

## Generate individual features and train classifiers

In [3]:
def load_features(video_list,feature_type):
    features = [] 
    for file in tqdm(video_list):
        if feature_type == 'soundnet':
            feature_file = os.path.join(FEATURE_DIR,"soundnet",file+".feats")
            with open(feature_file,"rb") as f:
                features.append(np.loadtxt(f,delimiter=';'))
        elif feature_type == 'resnet50':
            feature_file = os.path.join(FEATURE_DIR,"resnet50",file+".npy")   
            with open(feature_file,"rb") as f:
                features.append(np.load(f))
        else:
            feature_file = os.path.join(FEATURE_DIR,"places",file+".npy")   
            with open(feature_file,"rb") as f:
                features.append(np.load(f))
                
    features = np.vstack(features)
    return features

def load_labels(video_list,label_map):
    labels = []
    for video in tqdm(video_list):
        labels.append(label_map[video])
        
    return np.array(labels)

train_soundnet_features = load_features(train_videos,'soundnet')
train_resnet_features = load_features(train_videos,'resnet50')
train_places_features = load_features(train_videos,'places')

train_y = load_labels(train_videos,train_labels)

val_soundnet_features = load_features(val_videos,'soundnet')
val_resnet_features = load_features(val_videos,'resnet50')
val_places_features = load_features(val_videos,'places')

val_y = load_labels(val_videos,val_labels)

test_soundnet_features = load_features(test_videos,'soundnet')
test_resnet_features = load_features(test_videos,'resnet50')
test_places_features = load_features(test_videos,'places')

100%|██████████| 836/836 [00:00<00:00, 1049.74it/s]
100%|██████████| 836/836 [00:00<00:00, 3271.30it/s]
100%|██████████| 836/836 [00:00<00:00, 3240.58it/s]
100%|██████████| 836/836 [00:00<00:00, 766267.08it/s]
100%|██████████| 400/400 [00:00<00:00, 1030.24it/s]
100%|██████████| 400/400 [00:00<00:00, 3240.33it/s]
100%|██████████| 400/400 [00:00<00:00, 3272.84it/s]
100%|██████████| 400/400 [00:00<00:00, 282730.30it/s]
100%|██████████| 1699/1699 [00:01<00:00, 1049.17it/s]
100%|██████████| 1699/1699 [00:00<00:00, 3169.17it/s]
100%|██████████| 1699/1699 [00:00<00:00, 3240.69it/s]


In [22]:
def downsample_data(train_x,train_y,val_x,val_y,combine):
#     print("Downsampling")
# Combine train and validation set into a single train dataset
    if combine:
        all_train_x = np.vstack([train_x,val_x])
        all_train_y = np.concatenate((train_y,val_y))

        # Shuffle dataset
        idxs = [i for i in range(all_train_x.shape[0])]
        np.random.shuffle(idxs)

        all_train_x = all_train_x[idxs]
        all_train_y = all_train_y[idxs]
    else:
        all_train_x = train_x
        all_train_y = train_y

    positive_examples = all_train_x[all_train_y!='NULL']
    negative_examples = all_train_x[all_train_y=='NULL']

    positive_labels = all_train_y[all_train_y!='NULL']
    negative_labels = all_train_y[all_train_y == 'NULL']

    np.random.shuffle(negative_examples)
    negative_examples = negative_examples[:len(positive_labels)]
    negative_labels = negative_labels[:len(positive_labels)]

    combined_data = np.vstack((positive_examples,negative_examples))
    combined_labels = np.concatenate((positive_labels,negative_labels))
    
    return combined_data,combined_labels

COMBINE = True
soundnet_x,soundnet_y = downsample_data(train_soundnet_features,train_y,val_soundnet_features,val_y,combine=COMBINE)
resnet_x, resnet_y = downsample_data(train_resnet_features,train_y,val_resnet_features,val_y,combine=COMBINE)
places_x, places_y = downsample_data(train_places_features,train_y,val_places_features,val_y,combine=COMBINE)

# Shuffle the dataset
idxs = [i for i in range(len(soundnet_x))]
np.random.shuffle(idxs)

soundnet_x = soundnet_x[idxs]
soundnet_y = soundnet_y[idxs]

resnet_x = resnet_x[idxs]
resnet_y = resnet_y[idxs]

places_x = places_x[idxs]
places_y = places_y[idxs]

print("Downsampling complete")

Downsampling complete


In [None]:
MODEL_USED = 'mlp'
MODEL_DIR = "models"
MODEL_NAME = 'lf_exp1'
EVENTS = ["NULL","P001","P002","P003"]
EVENTS_MAP = {"NULL":0,"P001":1,"P002":2,"P003":3}

def train_classifier(combined_data,combined_labels):
# Train 3 separate models for each event
# MLP parameters


    hidden_layers = (2048,4096,2048)
    lr = 1e-3
    batch_size = 100
    alpha = 1e-5
    loss = []


    # Random Forest params
    num_estimators = 500
    max_depth = 30
    event_y = np.array([EVENTS_MAP[k] for k in combined_labels])
    
    if MODEL_USED == 'rf':
        model = RandomForestClassifier(n_estimators=num_estimators,max_depth=max_depth)
#         model = GradientBoostingClassifier(n_estimators=num_estimators,max_depth=max_depth)
    elif MODEL_USED == 'mlp':
        model = MLPClassifier(hidden_layer_sizes=hidden_layers,
                         alpha=alpha,
                         batch_size=batch_size,
                         learning_rate_init=lr,
                         max_iter=2000,
                            verbose=False,
                         )
    model.fit(combined_data,event_y)
    if MODEL_USED== 'mlp':
        loss.append(model.loss_)
        print("Loss : {} ".format(model.loss_))
        
    return model

soundnet_classifier = train_classifier(soundnet_x,soundnet_y)
resnet_classifier = train_classifier(resnet_x,resnet_y)
places_classifier = train_classifier(places_x,places_y)


In [None]:
soundnet_post_features = soundnet_classifier.predict_log_proba(soundnet_x)
resnet_post_features = resnet_classifier.predict_log_proba(resnet_x)
places_post_features = places_classifier.predict_log_proba(places_x)

In [None]:
post_features = np.hstack((soundnet_post_features,resnet_post_features,places_post_features))
post_labels = soundnet_y

## Train final - late fusion classifier

In [None]:
# Train 3 separate models for each event
# MLP parameters
MODEL_USED = 'mlp'
MODEL_DIR = "models"
MODEL_NAME = 'lf_exp1'
EVENTS = ["NULL","P001","P002","P003"]

hidden_layers = (1024)
lr = 1e-3
batch_size = 100
alpha = 1e-5
loss = []


# Random Forest params
num_estimators = 500
max_depth = 20

for event in tqdm(EVENTS[:]):
    # Train for event
    event_y = (post_labels==event).astype('int')
    if MODEL_USED == 'rf':
        model = RandomForestClassifier(n_estimators=num_estimators,max_depth=max_depth)
#         model = GradientBoostingClassifier(n_estimators=num_estimators,max_depth=max_depth)
    elif MODEL_USED == 'mlp':
        model = MLPClassifier(hidden_layer_sizes=hidden_layers,
                         alpha=alpha,
                         batch_size=batch_size,
                         learning_rate_init=lr,
                         max_iter=2000,
                            verbose=False,
                         )
    model.fit(post_features,event_y)
    if MODEL_USED== 'mlp':
        loss.append(model.loss_)
        print("Event {} loss : {} ".format(event,model.loss_))
    # Save model
    model_path = os.path.join(MODEL_DIR,MODEL_NAME+"_" +event+".model")
    with open(model_path,"wb") as o:
        pkl.dump(model,o)

## Predict on test and validation set

In [64]:
PRED_DIR = 'pred/LF'
MODE = 'LF'

for event in EVENTS[:]:
    # Load model for event and predict
    model_path = os.path.join(MODEL_DIR,MODEL_NAME+"_" +event+".model")
    with open(model_path,"rb") as l:
        model = pkl.load(l,encoding='bytes')
        val_pred = os.path.join(PRED_DIR,MODE+"_val_"+event+".csv")
        test_pred = os.path.join(PRED_DIR,MODE+"_"+event+".csv")
        
        with open(test_pred,"w") as o: 
            test_soundnet_post_features = soundnet_classifier.predict_log_proba(test_soundnet_features)
            test_resnet_post_features = resnet_classifier.predict_log_proba(test_resnet_features)
            test_places_post_features = places_classifier.predict_log_proba(test_places_features)
            
            test_post_features = np.hstack((test_soundnet_post_features,test_resnet_post_features,test_places_post_features))
            
            prediction = model.predict_proba(test_post_features)
            for p in tqdm(prediction):
#                 print(p)
                o.write("{}\n".format(p[1]))
#           prediction = model.decision_function(feat)
#           o.write("{}\n".format(prediction[0]))


                    
        with open(val_pred,"w") as o: 
            val_soundnet_post_features = soundnet_classifier.predict_log_proba(val_soundnet_features)
            val_resnet_post_features = resnet_classifier.predict_log_proba(val_resnet_features)
            val_places_post_features = places_classifier.predict_log_proba(val_places_features)
            
            val_post_features = np.hstack((val_soundnet_post_features,val_resnet_post_features,val_places_post_features))
            
            prediction = model.predict_proba(val_post_features)
            for p in tqdm(prediction):
                o.write("{}\n".format(p[1]))
#               prediction = model.decision_function(feat)
#               o.write("{}\n".format(prediction[0]))
                     
p0 = open(PRED_DIR + "/" + MODE + "_val_NULL.csv").readlines()                    
p1 = open(PRED_DIR + "/" + MODE + "_val_P001.csv").readlines()
p2 = open(PRED_DIR + "/" + MODE + "_val_P002.csv").readlines()
p3 = open(PRED_DIR + "/" + MODE + "_val_P003.csv").readlines()

p0 = [float(p.strip()) for p in p0]
p1 = [float(p.strip()) for p in p1]
p2 = [float(p.strip()) for p in p2]
p3 = [float(p.strip()) for p in p3]

acc = [0,0,0,0]
total = [0,0,0,0]

THRESHOLD = 0
correct = 0
with open("../all_val.lst") as f:
    lines = f.readlines()
    for idx,line in enumerate(tqdm(lines)):
        filename, truth = line.strip().split()
        label = [p0[idx],p1[idx],p2[idx],p3[idx]]
        pred = np.argmax(label)
        
        if EVENTS[pred] == truth:
            acc[pred] += 1
        total[EVENTS.index(truth)] += 1
            
acc = np.array(acc)/np.array(total)
print("Event NULL val accuracy : {0:1.2f}\nEvent P001 val accuracy : {0:1.2f}\nEvent P002 val accuracy : {1:1.2f}\nEvent P003 val accuracy : {2:1.2f}\n"
      .format(acc[0],acc[1],acc[2],acc[3]))

# Write params in log
with open("logs.txt","a") as log:
    if MODEL_USED == 'mlp':
        log.write("{0}#{1}#{2}#{3}#{4}#{5:0.2f}#{6:0.2f}#{7:0.2f}#{8:0.2f}#{9:0.2f}#{10:0.2f}\n".format(MODEL_USED,hidden_layers,lr,batch_size,
                                                       alpha,loss[0],loss[1],loss[2],acc[0],acc[1],acc[2]))
    elif MODEL_USED == 'rf':
        log.write("{0}#{1}#{2}#{3:0.2f}#{4:0.2f}#{5:0.2f}\n".format(MODEL_USED,num_estimators,max_depth,acc[0],acc[1],acc[2]))


100%|██████████| 1699/1699 [00:00<00:00, 312042.85it/s]
100%|██████████| 400/400 [00:00<00:00, 264166.52it/s]
100%|██████████| 1699/1699 [00:00<00:00, 292653.90it/s]
100%|██████████| 400/400 [00:00<00:00, 278136.87it/s]
100%|██████████| 1699/1699 [00:00<00:00, 301839.23it/s]
100%|██████████| 400/400 [00:00<00:00, 276395.65it/s]
100%|██████████| 1699/1699 [00:00<00:00, 305370.35it/s]
100%|██████████| 400/400 [00:00<00:00, 286496.17it/s]
100%|██████████| 400/400 [00:00<00:00, 112147.17it/s]

Event NULL val accuracy : 0.84
Event P001 val accuracy : 0.84
Event P002 val accuracy : 1.00
Event P003 val accuracy : 1.00






In [None]:
!python eval.py list/P001_val_label pred/LF/LF_val_P001.csv
!python eval.py list/P002_val_label pred/LF/LF_val_P002.csv
!python eval.py list/P003_val_label pred/LF/LF_val_P003.csv

In [66]:
p0 = open(PRED_DIR + "/" + MODE + "_NULL.csv").readlines()
p1 = open(PRED_DIR + "/" + MODE + "_P001.csv").readlines()
p2 = open(PRED_DIR + "/" + MODE + "_P002.csv").readlines()
p3 = open(PRED_DIR + "/" + MODE + "_P003.csv").readlines()

p0 = [float(p.strip()) for p in p0]
p1 = [float(p.strip()) for p in p1]
p2 = [float(p.strip()) for p in p2]
p3 = [float(p.strip()) for p in p3]

THRESHOLD = 0
with open((MODE+"_kaggle_prediction.csv"),"w") as o:
    o.write("VideoID,Label\n")
    for idx,video in enumerate(tqdm(test_videos)):
        label = [p0[idx],p1[idx],p2[idx],p3[idx]]
        idx = np.argmax(label)
        pred = idx
        o.write("{},{}\n".format(video,pred))

100%|██████████| 1699/1699 [00:00<00:00, 89532.03it/s]
