# File Authentication System
- GOAL: build a file authentication system that lets browsers authenticate and play video chunks without having to wait for the entire file.

In [1]:
from hashlib import sha256
import os

In [2]:
def read_block_reverse(filepath, buffersize):

    with open(filepath, 'rb') as f:
        
        f.seek(0, os.SEEK_END)
        filesize = f.tell()
        firstchunk = filesize % buffersize

        if firstchunk != 0: 
            f.seek(filesize - firstchunk)
            yield f.read(firstchunk)
        
        f.seek(-firstchunk-buffersize, os.SEEK_END)
        move = -2*buffersize
        
        while True:
            yield f.read(buffersize)
            if f.tell() <= buffersize: break
            f.seek(move, 1)

In [3]:
def compute_first_hash(filepath, buffersize=1024):
    gen = read_block_reverse(filepath, buffersize)
    h = bytes()
    for i in gen:
        h = sha256(i + h).digest()
    return h

In [4]:
def write_all_hashes(filepath, hashfilepath=None, buffersize=1024):
    if hashfilepath is None:
        hashfilepath = filepath + "hash"
    f = open(hashfilepath,"wb")
    gen = read_block_reverse(filepath, buffersize)
    h = bytes()
    for i in gen:
        h = sha256(i + h).digest()
        f.write(h)
    f.close()

    
def read_block_hash(filepath, hashfilepath=None, buffersize=1024, hashsize=32):
    
    if hashfilepath is None:
        hashfilepath = filepath + "hash"
        
    with open(hashfilepath, "rb") as hashfile:
        
        hashfile.seek(0, os.SEEK_END)
        hashfile.seek(-hashsize, os.SEEK_END)
        yield hashfile.read(hashsize)
        move = -2 * hashsize
    
        with open(filepath, 'rb') as file:
            
            while True:
                hashfile.seek(move, 1)
                yield file.read(buffersize) + hashfile.read(hashsize)
                if hashfile.tell() == hashsize:                    
                    yield file.read(buffersize)
                    break

In [5]:
answer1 = '5b96aece304a1422224f9a41b228416028f9ba26b0d1058f400200f06a589949'
answer2 = "03c08f4ee0b576fe319338139c045c89c3e8e9409633bea29442e21425006ea8"

filepath1 = './data/6.1.intro.mp4_download'
filepath2 = './data/6.2.birthday.mp4_download'

h1 = compute_first_hash(filepath1)
assert h1.hex() == answer1

h2 = compute_first_hash(filepath2)
assert h2.hex() == answer2

In [6]:
def is_authentic(filepath, hashsize=32):
    gen = read_block_hash(filepath)
    h = next(gen)
    for chunk in gen:
        if sha256(chunk).digest() != h:
            return False
        h = chunk[-hashsize:]
    return True

In [7]:
write_all_hashes(filepath1)
write_all_hashes(filepath2)

In [8]:
assert is_authentic(filepath1)
assert is_authentic(filepath2)