In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [None]:
! pip install -r requirements.txt -q

In [None]:
from transformers import *
from summarizer import Summarizer

# Load model, model config and tokenizer via Transformers
custom_config = AutoConfig.from_pretrained('monologg/biobert_v1.1_pubmed')
custom_config.output_hidden_states=True
custom_tokenizer = AutoTokenizer.from_pretrained('monologg/biobert_v1.1_pubmed')
custom_model = AutoModel.from_pretrained("monologg/biobert_v1.1_pubmed", config=custom_config)


#Create Summarizer object
summed = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)


In [None]:
import os

# Generate list of processed .txt files to summarize
docs = os.listdir("/root/Filtered")


# Iterate through processed .txt files and generate summaries and write to file
for doc in docs:
    
    with open('/root/Filtered/'+ doc, "r") as in_f:
        body = in_f.read()
    
    summary = ''.join(summed(body, num_sentences = 5))
    
    path = "/root/Summary/" + doc[:-4] + "_SUMMARY.txt"
    
    with open(path, "w") as out_f:
        out_f.write(summary)

In [None]:
#uploading to AWS S3 Bucket

s3 = boto3.resource('s3')

bucket = sess.default_bucket()   

txtfiles = os.listdir("/root/Summary")

for txtfile in txtfiles:
    fpath = '/root/Summary/' + txtfile
    s3.meta.client.upload_file(fpath, bucket, txtfile)
