In [1]:
%run imports.py
%matplotlib inline
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard

In [2]:
EXP="exp2"
VIDEO="925"

In [3]:
with open("exps/default_config.yaml", "r") as f:
    default_config = yaml.load(f, Loader=yaml.SafeLoader)
exp_config = dict()
if os.path.isfile("exps/" + EXP + "/config.yaml"):
    with open("exps/" + EXP + "/config.yaml", "r") as f:
        exp_config = yaml.load(f, Loader=yaml.SafeLoader)
config = dict()
config.update(default_config)
config.update(exp_config)
print(config)

{'dataset': {'frames_per_clip': 10, 'step_between_clips': 10, 'batch_size': 32, 'transform': {'Normalize': {'mean': 0.449, 'std': 0.226}, 'HorizontalFlip': True, 'Cutout': False}, 'transformOF': {'Normalize': None, 'HorizontalFlip': True, 'Cutout': False}}, 'model': {'phase1': {'epochs': 10, 'lr': 0.005, 'step_size': 5, 'gamma': 0.5}, 'phase2': {'epochs': 45, 'lr': 2e-05, 'step_size': 15, 'gamma': 0.5}}}


In [4]:
tansform_param = config["dataset"]["transform"]

tensor_aug = [transforms.ToTensor()]
if tansform_param["Normalize"]:
    norm = tansform_param["Normalize"]
    tensor_aug.append(transforms.Normalize(norm["mean"],norm["std"]))
    
%run preprocessing/CustomTransform.py
tansformOF_param = config["dataset"]["transformOF"]

tensor_augOF = [transforms.ToTensor()]
if tansformOF_param["Normalize"]:
    norm = tansformOF_param["Normalize"]
    tensor_augOF.append(transforms.Normalize(norm["mean"],norm["std"]))
    
alb_rescale = alb.Resize(244, 244, always_apply=True)


test_transforms = transforms.Compose(
  [AlbuWrapperNumpy(alb_rescale), *tensor_aug])


test_transformsOF = transforms.Compose(
  [AlbuWrapperNumpy(alb_rescale), *tensor_augOF])

In [5]:
import torchvision.models as models
mobilenet = models.mobilenet_v2(pretrained=True)
mobilenet
# average input channels of conv layer and return new model
def replace_conv(model, inplanes):
    conv_weights = list(model.parameters())[0].clone().detach().numpy()
    new_conv_weights = conv_weights.mean(axis=1)
    new_conv_weights = np.repeat(new_conv_weights[:, np.newaxis, :, :], inplanes, axis=1)
    new_conv = torch.nn.Conv2d(inplanes, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    new_conv.weight = torch.nn.Parameter(torch.from_numpy(new_conv_weights))
    model._modules['features'][0][0] = new_conv
    return model

def replace_classifier(model, outplanes):
    classifier = torch.nn.Linear(in_features=1280, out_features=outplanes, bias=True)
    model._modules['classifier'][1] = classifier
    return model

def get_model(freeze=True, inplanes=10, outplanes=2, pretrained=True,
              start_model=None, model_name="model"):
    model = start_model
    if start_model is None:
        model = models.mobilenet_v2(pretrained=pretrained)

    if inplanes != model._modules["features"][0][0].in_channels:
        model = replace_conv(model, inplanes)
        
    if outplanes != model._modules["classifier"][1].out_features:
        model = replace_classifier(model, outplanes)
        
    for p in model.features.parameters():
        p.requires_grad = (not freeze)
    input_batch = torch.ones((batch_size, inplanes, 32, 32))
    writer = SummaryWriter(log_dir=f"exps/{EXP}/logs/{model_name}")
    writer.add_graph(model, input_batch)
    return model

def toggle_freeze(model, freeze):
    for p in model.features.parameters():
        p.requires_grad = (not freeze)
    return model

In [63]:
frames_per_clip = config["dataset"]["frames_per_clip"]
step_between_clips = config["dataset"]["step_between_clips"]
batch_size = config["dataset"]["batch_size"]

OUTPUT="project2.mp4"

PATH = f"exps/{EXP}/"
MODEL_NAME="of_model"
MODEL_FILE=f"{MODEL_NAME}_best.pth"
# MODEL_FILE=f"{MODEL_NAME}_final.pth"

classes = ["Empty", "Standing", "Sitting", "Lying", "Bending", "Crawling"]
model = get_model(inplanes=frames_per_clip*2, outplanes=6, model_name=MODEL_NAME)
if os.path.isfile(os.path.join(PATH, MODEL_FILE)):
    model.load_state_dict(torch.load(os.path.join(PATH, MODEL_FILE)))

    
    
def put_text(img, label, prob):
    font                   = cv2.FONT_HERSHEY_SIMPLEX
    bottomLeftCornerOfText = (10,200)
    fontScale              = 1
    fontColor              = (255,255,0)
    lineType               = 2

    cv2.putText(img,f'{label} {round(prob * 100,2)}%', 
        bottomLeftCornerOfText, 
        font, 
        fontScale,
        fontColor,
        lineType)
    return img

img_array = []
BASE_RGB_PATH = f"data/FD/{VIDEO}/rgb"
BASE_FLOW_PATH = f"data/FD/{VIDEO}/flow"
BASE_CLIP_PATH = f"data/FD/{VIDEO}/clip_{frames_per_clip}_{step_between_clips}"
out = cv2.VideoWriter(OUTPUT,cv2.VideoWriter_fourcc(*'MP4V'), 20, (320,240))
label, conf = 0, 0
for filename in os.listdir(BASE_RGB_PATH):
    filepath = os.path.join(BASE_RGB_PATH, filename)
    if not os.path.isfile(filepath):
        continue
    idx = int(filename[-8:-4])
    img = cv2.imread(filepath)
    height, width, layers = img.shape
#     size = (width,height)
    if idx % 10 == 1:
        sample_file = os.path.join(BASE_CLIP_PATH, str(idx//10 * 10) + ".npz")
        if os.path.isfile(sample_file):
            data = np.load(sample_file)["arr_0"].astype(np.float32)
            data = test_transformsOF.__call__(data)
            data = data.unsqueeze(0)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            prob = torch.nn.functional.softmax(output)
            label = classes[pred.item()]
            conf = prob[0][pred.item()].item()
            print(label, conf)
    img_array.append(put_text(img, label, conf))


for i in range(len(img_array)):
    out.write(img_array[i])
out.release()

Empty 0.32879552245140076
Empty 0.5249453186988831
Empty 0.40442293882369995
Empty 0.3521553575992584
Empty 0.3650844395160675
Empty 0.4276222288608551
Empty 0.4924636483192444
Empty 0.4962003827095032
Empty 0.5867840647697449
Standing 0.48491495847702026
Standing 0.43587663769721985
Standing 0.4332454204559326
Standing 0.4376181662082672
Standing 0.4845809042453766
Sitting 0.5160655975341797
Sitting 0.3991531729698181
Sitting 0.4148784875869751
Sitting 0.37118563055992126
Sitting 0.41199666261672974
Sitting 0.5983976721763611
Sitting 0.36824700236320496
Sitting 0.4902385473251343
Standing 0.5197858810424805
Standing 0.5240158438682556
Standing 0.4002998173236847
Standing 0.45862990617752075
Standing 0.35471057891845703
Empty 0.4100712537765503
Empty 0.543353796005249
Standing 0.4462784230709076
Standing 0.6744210720062256
Standing 0.39024409651756287
Sitting 0.45892533659935
Sitting 0.4997294843196869
Lying 0.5590689182281494
Lying 0.3891603648662567
Lying 0.6464033126831055
Lying 0.3