## Load Packages

In [None]:
import hashlib
import os
import json
import datetime as date
import pandas as pd

## Creating Helper Functions

In [None]:
def hash_file(filename):
    BLOCKSIZE = 65536
    hasher = hashlib.sha256()
    with open(filename, 'rb') as afile:
        buf = afile.read(BLOCKSIZE)
        while len(buf) > 0:
            hasher.update(buf)
            buf = afile.read(BLOCKSIZE)
    return(hasher.hexdigest())

In [None]:
## maybe do not need the timestamp and name included?

def hash_block(block):
    sha = hashlib.sha256()
    sha.update(str(block['name']) + 
               str(block['timestamp']) + 
               str(block['data']) + 
               str(block['previous_hash']))
    return sha.hexdigest()

## Hash Input File

This can be used as an id of the file.  It is based off of the file contents and therefore if anything in the file changes so does the hash.

In [None]:
hash_file('iris.csv')

## Testing Script
This part is a small development portion to be used in the making of a python script used below.  

In [None]:
iris = pd.read_csv('iris.csv')
iris_group = iris.groupby('Species').mean()
iris_group.to_csv('iris_group.csv')

## Creating Script
The following cell acutally creates the python script to calculate and then save the group means to a csv file.  

In [None]:
%%writefile iris_groupmeans.py

import pandas as pd

iris = pd.read_csv('iris.csv')
iris_group = iris.groupby('Species').mean()
iris_group.to_csv('iris_group.csv')

## Hashing Created Python Script
Hashing the script file in order to get the content hash to use in the block creation.

In [None]:
hash_file('iris_groupmeans.py')

## Hashing Output File
Hashing the created output file to get the content hash to use in the block creation.

In [None]:
hash_file('iris_group.csv')

## Creating Block

In [None]:
block = {'name': 'iris_group.csv', 
         'data': hash_file('iris_group.csv'), 
         'timestamp': str(date.datetime.now()), 
         'previous_hash': [hash_file('iris.csv'), hash_file('iris_groupmeans.py')], 
         'hash': ''}

In [None]:
block

In [None]:
block['hash'] = hash_block(block)

In [None]:
block

## Loading merkledag package

testing out functionality of package

In [None]:
!pip install /Users/kgosik/Documents/Projects/MerkleDAGWorkflow/python/pypi_package/merkledag

In [1]:
from merkledag import *

In [2]:
init()

In [3]:
iris_block = create_genesis_block('iris.csv')

In [4]:
iris_block.hash

'896a5bc0bfeb229774e5545661441825e50be1a3c510082566e2dc23d7596a4e'

In [5]:
iris_block.data

'396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2'

In [6]:
iris_block.previous_hashes

['GenesisFile']

In [7]:
import hashlib

hashlib.sha256(str(iris_block.name) + 
               #str(iris_block.timestamp) + 
               str(iris_block.data) + 
               str(iris_block.previous_hashes)).hexdigest()

'896a5bc0bfeb229774e5545661441825e50be1a3c510082566e2dc23d7596a4e'

In [14]:
iris_block.__dict__

{'data': '396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2',
 'hash': '896a5bc0bfeb229774e5545661441825e50be1a3c510082566e2dc23d7596a4e',
 'name': 'iris.csv',
 'previous_hashes': ['GenesisFile'],
 'timestamp': '2018-09-20 17:07:20.783649'}

In [15]:
check = Block('iris.csv', date.datetime.now(), ['GenesisFile', 'AnotherFile'])

In [16]:
check.__dict__

{'data': '396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2',
 'hash': '5573c711be2cad5d51dd1ce352f47465cb7398240f78d40b5c61c24df853ab29',
 'name': 'iris.csv',
 'previous_hashes': ['AnotherFile', 'GenesisFile'],
 'timestamp': '2018-09-20 17:10:26.225565'}

In [17]:
check2 = Block('iris.csv', date.datetime.now(), ['AnotherFile', 'GenesisFile'])

In [18]:
check2.__dict__

{'data': '396c921bc9cf625a4ab755540084aa3d0d941c4ffed8681299689b1f502c3ac2',
 'hash': '5573c711be2cad5d51dd1ce352f47465cb7398240f78d40b5c61c24df853ab29',
 'name': 'iris.csv',
 'previous_hashes': ['AnotherFile', 'GenesisFile'],
 'timestamp': '2018-09-20 17:10:27.050923'}

In [19]:
check == check2

False