In [1]:
import glob
import os.path
import torch.utils.data as data

In [6]:
class GenericDataset(data.Dataset):
    """
        A generic dataset folder where elements are organized this way :
            folder/class1/element1
            folder/class1/element2
                ...
            folder/class2/element1
                ...
        
        Provide fileOperation and targetOperation to open file and target accordingly
    """
    
    
    def __init__(self, folderName, exts=['.avi'], fileOperation=None, targetOperation=None):
        self.fileOperation=fileOperation
        self.targetOperation=targetOperation
        
        self.seqList = []
        self.classList = {}
        
        for ext in exts:
            for f in glob.glob(os.path.join(folderName, '*')):
                if os.path.isdir(f):
                    c = str(os.path.basename(f))
                    for s in glob.glob(os.path.join(f, '*'+ext)):
                        self.seqList.append(s)
                        self.classList[s] = c
        
    def __getitem__(self, i):
        e = self.seqList[i]
        c = self.classList[self.seqList[i]]
        
        if self.fileOperation is not None : 
            e = self.fileOperation(e)
        if self.targetOperation is not None : 
            c = self.targetOperation(c)
            
        return e,c 
        
    def __len__(self):
        return len(self.seqList)
    
    def __str__(self):
        s = "SequenceDataset("
        if len(self) < 5 :
            for seq, c in self:
                s += seq +':'+ str(c) + '\n'
        else :
            s += str(self[0]) + '\n'*2 + str(self[1]) 
            s += '\n'*2 + ' ... ' + '\n'*2
            s += str(self[-1])
        return s + ')'

In [1]:
class GenericDatasetFromFile(data.Dataset):
    """
        
    """
    def __init__(self, fileName, baseDir, parsingOperation, fileOperation, targetOperation):
        self.fileOperation=fileOperation
        self.targetOperation=targetOperation
        self.baseDir = baseDir
        
        self.fList = []
        self.classList = {}
        
        for line in open(fileName).read().splitlines():
            lparse = parsingOperation(line)
            if len(lparse) < 2:
                print("Error reagind line : ", line)
                continue
            fName, className = lparse[0], lparse[1]
            self.fList.append(fName)
            self.classList[fName] = className
        
    def __getitem__(self, i):
        e = self.fList[i]
        c = self.classList[e]
        
        return self.fileOperation(os.path.join(self.baseDir, e)), self.targetOperation(c)
        
    def __len__(self):
        return len(self.fList)
    
    def __str__(self):
        s = "SequenceDataset("
        if len(self) < 5 :
            for seq, c in self:
                s += seq +':'+ str(c) + '\n'
        else :
            s += str(self[0]) + '\n'*2 + str(self[1]) 
            s += '\n'*2 + ' ... ' + '\n'*2
            s += str(self[-1])
        return s + ')'

NameError: name 'data' is not defined

In [13]:
def testDatasetFromFile():
    fileName         = "/video/twentybn/jester-v1-train.csv"
    baseDir          = "/video/twentybn/20bn-jester-v1/"
    parsingOperation = lambda x:x.split(";")
    fileOperation    = lambda x:x
    targetOperation  = lambda x:x
    d = GenericDatasetFromFile("/video/twentybn/jester-v1-train.csv", baseDir, parsingOperation, fileOperation, targetOperation)
    print(d)

In [12]:
if __name__=='__main__':
    testDatasetFromFile()

SequenceDataset(('/video/twentybn/20bn-jester-v1/34870', 'Drumming Fingers')

('/video/twentybn/20bn-jester-v1/56557', 'Sliding Two Fingers Right')

 ... 

('/video/twentybn/20bn-jester-v1/4502', 'Sliding Two Fingers Up'))
