In [3]:
#import all necessary library

import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from torchvision.datasets.utils import download_file_from_google_drive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import json

#run the model predictions 
device = 'cuda'

# 1. Authenticate and create the PyDrive client
# request access to google drive 
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

#specify the architecture to download and use for predictions 
arch = 'resnet18'
#run the model predictions on the GPU
device = 'cuda'

# load the pre-trained weights
model_file = '%s_places365.pth.tar' % arch
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

#model_file = '/content/NormalizedResnet18.pth.tar'
#load the model
model = models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)
model.to(device)
model.eval()


# image transformer to crop, normalize and, and convert to tensor
centre_crop = trn.Compose([
        trn.Resize((256,256)),
        trn.CenterCrop(224),
        trn.ToTensor(),
        trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])



# load the class label with mulse media encoding
file_name = 'categories_places365.txt'

if not os.access(file_name, os.W_OK):
    synset_url = 'https://raw.githubusercontent.com/sextonj5/MulseRepos/master/categories_places365.txt'
    os.system('wget ' + synset_url)
classes = list()


with open(file_name) as class_file:
    for line in class_file:
        classes.append(line.strip().split(' '))
classes = tuple(classes)



In [4]:
!pip install pydub
!mkdir Olfaction/
!mkdir Haptic/

Collecting pydub
  Downloading https://files.pythonhosted.org/packages/7b/d1/fbfa79371a8cd9bb15c2e3c480d7e6e340ed5cc55005174e16f48418333a/pydub-0.24.1-py2.py3-none-any.whl
Installing collected packages: pydub
Successfully installed pydub-0.24.1


In [5]:

def writeHapJSON(start,duration,HapticNum):    
#structure of the JSON output 
    values = {
  "start": HapticNum*10,
  "type": "real",
  "number": HapticNum,
  "effect": "haptic",
  "haptic_effects": [
	{
      "start": start,
      "description":{
          "pattern":[
                     {
                      "type": "custom",
                      "length-ms": duration   
                     }
          ],
          "rate": {
              "frequency":0
          }
      }
    }
  ]
}

#write "values" JSON string to the JSON file 
    with open('Haptic/Hnewt_' + str(HapticNum) + '.json', 'w') as f:
        json.dump(values, f, sort_keys=False, indent=2)


In [12]:
#number of json files produced
#one produced every 10 seconds
jsonNum = 0
#whats in each olfaction slot
fans = ['o','x','d','a']
#list of 10 detections when full sent to the json file function
HapticNum = 0
Olfaction = list()
    
#this function is called when a ten second block of Olfaction data has been 
#created, the 10 seconds of media is passed to this function and stores
#the mulsemedia content to a Json file
def writeOlfJSON(smell,jsonNum):
#arrays detailing how long the smell should be played for and when the smell 
#starts playing inside the 10 second window
    fanCount = [0,0,0,0]    
    fanFirst = [0,0,0,0]
    n = 0
    for i in range(0,10):
        if (smell[i] == fans[0]):
            fanCount[0]+=1
            if (fanFirst[0] == 0):
                fanFirst[0]=i
        elif(smell[i] == fans[1]):
            fanCount[1]+=1
            if (fanFirst[1] == 0):
                fanFirst[1]=i
        elif(smell[i] == fans[2]):
            fanCount[2]+=1
            if (fanFirst[2] == 0):
                fanFirst[2]=i
        elif(smell[i] == fans[3]):
            fanCount[3]+=1
            if (fanFirst[3] == 0):
                fanFirst[3]=i
        else:
            n+=1
    
    #fan number, start, duration all noted for JSON file string
    index = list.index(fanCount,max(fanCount))
    start = fanFirst[index]*1000
    duration = fanCount[index]*1000
    fannumber = index

#structure of the JSON output 
    values = {
  "start": jsonNum*10,
  "type": "real",
  "number": jsonNum,
  "effect": "olfaction",
  "olfaction_effects": [
	{
      "start": start,
      "duration": str(duration),
      "fan_number": str(fannumber)
    }
  ]
}


#write "values" file to the JSON file 
    with open('Olfaction/Onewt_' + str(jsonNum) + '.json', 'w') as f:
        json.dump(values, f, sort_keys=False, indent=2)


#splits up fram into six and makes combined prediction
def Split_Prediction(img):
#the width and height of the cropped images
    h = int(img.size[0]/3)
    w = int(img.size[1]/2)


#crop the image 4 times adding 
    for j in range(0,3):
      for k in range(0,2):
      
        im1 = img.crop((h*(j) , w*k, h*(j+1) , w*(k+1)))
      
        input_img = V(centre_crop(im1).unsqueeze(0))
        
        #flip the back face to the right orientation
        if(j==1 and k==1):
          angle = 90
          im1 = im1.rotate(angle)
          
        # forward pass
        logit = model.forward(input_img.cuda())
        if(j == 0 and k == 0):
          h_xtemp = logit
        
        #Discard the ground and sky faces of the cube map
        elif((j==0 or j == 2) and k==1):
          pass
        #The other 4 faces get passed through the network
        else:
          h_xtemp = logit.add(h_xtemp)

    #the softmax is applied on the final vector
    h_x = F.softmax(h_xtemp, 1).data.squeeze()
    return h_x
    #return h_xtemp


#gets a frame at the time in millisec (sec) passes frame to prediction function

def getFrame(sec):
    #the array that contains 10 seconds worth of olfaction sensory information
    global Olfaction
    #reads a frame from the video at the time "sec" 
    vidcap.set(cv2.CAP_PROP_POS_MSEC,sec)
    hasFrames,image = vidcap.read()
    #image converted from an array to a PIL image 
    im1 = Image.fromarray(image)

    if hasFrames:
        #passes the frame to the prediction function which crops and makes predictions on that frame
        probs, idx = Split_Prediction(im1).sort(0, True)
        #prints the name of the video and the time the frame was taken
        #print('{} prediction on {}'.format(arch,fname) + ' at ' + str(sec/1000) + ' seconds')
        #the smell encoding from this prediction is added to the olfaction array
        Olfaction.extend(classes[idx[0]][2])

        #when 10 predictions have been made dump it to the json file and start
        #filling the olfaction array again. 
        #10 predictions is ten seconds worth of information

        if(len(Olfaction) == 10):
            print(Olfaction)
            #10 seconds of olfaction information is written to a JSON file to store data
            writeOlfJSON(Olfaction,int(sec/10000))
            #once the information is stored in JSON the olfaction array is emptied
            Olfaction = []

        # output the prediction
        #for i in range(0, 3):
         #   print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

    return hasFrames


 

1.   File is loaded in from drive directory (projectFourthYear)
2.   A frame is parsed every second using the getFrame function

1.   This passes the frame to the prediction function
2.   the prediction is associated with one of the following


Ocean = o
Oak = a
Candy = c
Chococlate = x
Diesel = d
None = n







In [13]:
from moviepy.editor import *
import librosa 
import pydub
import time


#the array that contains 10 seconds worth of olfaction sensory information
Olfaction = list()
HapticNum = 0

local_download_path = os.path.expanduser('~/data')
try:
  os.makedirs(local_download_path)
except: pass

#video in drive folder
file_list = drive.ListFile(
    {'q': "'1V6VLEBW-cyJLBFJe937oDrayvfasg9rO' in parents"}).GetList()
for f in file_list:
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)

################################################################################
#read in the video and extract audio
  video = VideoFileClip(fname)
  audio = video.audio 
  audio.write_audiofile("audio.wav") 

#read in audio file
  aud = pydub.AudioSegment.from_wav("audio.wav")
  lengthAud = aud.duration_seconds - aud.duration_seconds%10
  i = 0

  while(i<lengthAud):
#save file into 10 second chunks
    temp = aud[i*1000:(i+10)*1000]
    z = "Haptic/Sound_At_"+str(i) + "_Seconds" + ".wav"
    temp.export( z, format="wav")

    #videoaud is the audio sampled at 40000 hz
    #z is the input sound signal file
    videoaud, sr = librosa.core.load(z,sr=40000)


    # g is a 1x100 vector with the "loudness" of the signal every tenth of a second
    g = librosa.feature.rms(y=videoaud,frame_length=4000, hop_length=4000,center =False)
    start = round((np.argmax(np.squeeze(g))/len(np.squeeze(g)))*10 +i)
    
    #the threshold of the max value must be calibrated
    if(max(np.squeeze(g))>0.51):
      dur =  1000
    else: 
      dur = 0
    
    #writes the haptic data to a JSON file
    HapticNum = HapticNum+1 
    writeHapJSON(start,dur,HapticNum)
    
    i= i+10

################################################################################
  starttime = time.perf_counter()
  vidcap = cv2.VideoCapture(fname)


  #work out duration of video to know when to stop
  fps = vidcap.get(cv2.CAP_PROP_FPS)  
  frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
  duration = int(1000*(frame_count/fps))

  sec = 0
  print(vidcap.get(cv2.CAP_PROP_FPS))
  frameRate = 1000 #1 frame per second
  sec = 1000
  
  while (sec<(duration) and getFrame(sec)):
      #getFrame(sec)
      sec = sec + frameRate
      sec = round(sec, 2)
  

  print("stopped detection")
  endtime = time.perf_counter()
  print(endtime - starttime)
  break

title: RedwoodsWalkAmongGiants(360 Video).mp4, id: 1un6NRGE5cjE1x2gRifzaOgieK5o4d9kO
downloading to /root/data/RedwoodsWalkAmongGiants(360 Video).mp4
[MoviePy] Writing audio in audio.wav


100%|██████████| 2655/2655 [00:00<00:00, 2844.53it/s]

[MoviePy] Done.





30.0083176282629
['n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'x']
['n', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'n', 'n', 'n', 'n', 'n', 'n']
stopped detection
193.00090177700008


In [None]:
print(sec)
print(duration)