# Stage 2: Encryption

In this stage we: 

- encrypt the graded PDF files against the data frame with valid submissions

Rerun the whole notebook before Stage 2, to store the data frame with valid submissions in memory

In [None]:
path = 'z1'

In [None]:
import os
import glob

import pikepdf
from pikepdf import Pdf

# Load configuration without altering the environment
from dotenv import dotenv_values
config = dotenv_values(".env")

import pandas as pd

import img2pdf

In [None]:
# get valid submissions
df_valid_subm = pd.read_csv(f'./{path}/valid-sub.csv')
# instantiate md5 and matrikel dictionaries
md5_dic = df_valid_subm.to_dict()['MD5']
matrikel_dic = df_valid_subm.to_dict()['Matrikel']

In [None]:
OWNER, = config.values()

In [None]:
# make new directory for encrypted files
os.mkdir(f'./{path}/korrektur/korr-e')

In [None]:
def encrypt_file_pdf(korrektur_dir):
    '''
    Input: korrektur directory containing korr and nicht-korr directory; korr directory contains the graded tests as pdf
    Output: encrypted graded tests as pdf in new korr-e directory inside korrektur directory
    '''
    # make list of graded tests that are pdf
    graded_tests = glob.glob(korrektur_dir + '/korr/*.pdf')
    
    # iterate over list of graded tests
    for test in graded_tests:
        # get matrikel of graded tests and casted into integer
        matrikel = int(test.split('/')[4][0:6])
        # iterate over dictionary of matrikels of valid submissions
        for key, val in matrikel_dic.items(): 
            # match matrikel with dictionary value
            if matrikel == val: 
                # open pdf of test that matched
                pdf = Pdf.open(test)    
                # save file to korr-e directory created above and encrypted with matched key using md5 dictionary
                pdf.save(f'./{korrektur_dir}/korr-e/{matrikel_dic[key]}-e.pdf', encryption=pikepdf.Encryption(owner=OWNER, user=f'{md5_dic[key]}', R=4)) 
                # close pdf of test that matched
                pdf.close()

In [None]:
encrypt_file_pdf(f'./{path}/korrektur')

In [None]:
def encrypt_file_jpg(korrektur_dir):
    '''
    Input: korrektur directory containing korr and nicht-korr directory; korr directory contains the graded tests.
    Output: encrypted graded tests in new korr-e directory inside korrektur directory.
    '''
    # make list of graded tests that are jpg
    jpg_tests = glob.glob(korrektur_dir + '/korr/*.jpg')
    
    # iterative over list of graded tests that are jpg
    for jpg_test in jpg_tests: 
        # get matrikel of graded tests and casted into integer
        matrikel = int(jpg_test.split('/')[4][0:6])
        # get directory of the test to convert into pdf and encrypt it
        test = f'{korrektur_dir}/korr/{matrikel}.pdf'
        # convert jpg into pdf
        with open(test,'wb') as f: 
            f.write(img2pdf.convert(jpg_test))
        # iterate over dictionary of matrikels of valid submissions
        for key, val in matrikel_dic.items(): 
            # match matrikel with dictionary value
            if matrikel == val: 
                # open pdf of test that matched
                pdf = Pdf.open(test)    
                # save file to korr-e directory created above and encrypted with matched key using md5 dictionary
                pdf.save(f'./{korrektur_dir}/korr-e/{matrikel_dic[key]}-e.pdf', encryption=pikepdf.Encryption(owner=OWNER, user=f'{md5_dic[key]}', R=4)) 
                # close pdf of test that matched
                pdf.close()

In [None]:
encrypt_file_jpg(f'./{path}/korrektur')