## Load Packages

In [None]:
import hashlib
import os
import json
import datetime as date
import pandas as pd

## Creating Helper Functions

In [16]:
def hash_file(filename):
    BLOCKSIZE = 65536
    hasher = hashlib.sha256()
    with open(filename, 'rb') as afile:
        buf = afile.read(BLOCKSIZE)
        while len(buf) > 0:
            hasher.update(buf)
            buf = afile.read(BLOCKSIZE)
    return(hasher.hexdigest())

In [17]:
## maybe do not need the timestamp and name included?

def hash_block(block):
    sha = hashlib.sha256()
    sha.update(str(block['name']) + 
               str(block['timestamp']) + 
               str(block['data']) + 
               str(block['previous_hash']))
    return sha.hexdigest()

## Hash Input File

This can be used as an id of the file.  It is based off of the file contents and therefore if anything in the file changes so does the hash.

In [18]:
hash_file('python/example/iris.csv')

'396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2'

## Testing Script
This part is a small development portion to be used in the making of a pthon script below.

In [8]:
iris = pd.read_csv('python/example/iris.csv')
iris_group = iris.groupby('Species').mean()
iris_group.to_csv('iris_group.csv')

## Creating Script

In [9]:
%%writefile iris_groupmeans.py

import pandas as pd

iris = pd.read_csv('python/example/iris.csv')
iris_group = iris.groupby('Species').mean()
iris_group.to_csv('iris_group.csv')

Overwriting iris_groupmeans.py


In [10]:
hash_file('iris_groupmeans.py')

'0178e3f70e90c491f44c7a9179c44f35ee45f3a28ded3be5a4f55c3e8409fa05'

In [11]:
hash_file('iris_group.csv')

'fd570681461a0990b120ae70ca2bddd196a908e3c67dd0b5478dbda8afe7d54f'

In [12]:
block = {'name': 'iris_group.csv', 
         'data': hash_file('iris_group.csv'), 
         'timestamp': str(date.datetime.now()), 
         'previous_hash': [hash_file('python/example/iris.csv'), hash_file('iris_groupmeans.py')], 
         'hash': ''}

In [13]:
block

{'data': 'fd570681461a0990b120ae70ca2bddd196a908e3c67dd0b5478dbda8afe7d54f',
 'hash': '',
 'name': 'iris_group.csv',
 'previous_hash': ['396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2',
  '0178e3f70e90c491f44c7a9179c44f35ee45f3a28ded3be5a4f55c3e8409fa05'],
 'timestamp': '2018-09-17 18:10:32.108900'}

In [14]:
block['hash'] = hash_block(block)

In [15]:
block

{'data': 'fd570681461a0990b120ae70ca2bddd196a908e3c67dd0b5478dbda8afe7d54f',
 'hash': '54fadd7ff3782fe1431709dd53b9c6f5f0659b0e5433ee95d272d7157cb4a87c',
 'name': 'iris_group.csv',
 'previous_hash': ['396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2',
  '0178e3f70e90c491f44c7a9179c44f35ee45f3a28ded3be5a4f55c3e8409fa05'],
 'timestamp': '2018-09-17 18:10:32.108900'}