# Comprehend With Lambda

Use the following function in Lambda to process a specific column of a CSV file for its sentiment

In [None]:
import boto3
import csv
import os

def lambda_handler(event,  context):
    #bring in S3 and Comprehend resources
    s3 = boto3.resource('s3')
    comprehend = boto3.client('comprehend', region_name='us-east-1')

    #bring in CSV information from event trigger
    for record in event['Records']:
        #    record = event['Records'][0]
        BUCKET_NAME = record['s3']['bucket']['name']
        key = record['s3']['object']['key']

        #column name of the field we will send to comprehend
        inputField = "text"

        #prefix for input and output bucket (so that we can automatically save the output file to the right place)
        inputFolder='input'
        outputFolder='output'

        #create output filenames
        output_file_name = os.path.splitext(os.path.basename(key))[0] + "_processed.csv"
        basefolder = key.split(inputFolder)[0]
        keyOut = os.path.join(basefolder,outputFolder,output_file_name)

        #temporary files names
        temp_file_in = '/tmp/input.csv' 
        temp_file_out = '/tmp/output.csv' 

        #download the file from S3 (we could read it directly but this makes it a bit easier)
        s3.Bucket(BUCKET_NAME).download_file(key,temp_file_in)

        #open input file and read it in
        with open(temp_file_in,'r') as infile:
            reader = list(csv.reader(infile))

        #setup variable we will use to know which column to use in comprehend    
        inputIndex=0
        OutputFieldName_prefix = "Sentiment"

        #read in file
        firstLine=True
        for row in reader:
            if firstLine:
                #find column to parse
                index=0
                for field in row:
                    if field==inputField:
                        inputIndex=index
                    index+=1
                    #print(f'{index} - {field}')

                #add headers for new rows
                row.append(f'{OutputFieldName_prefix}_Overall')
                row.append(f'{OutputFieldName_prefix}_Positive')
                row.append(f'{OutputFieldName_prefix}_Negative')
                row.append(f'{OutputFieldName_prefix}_Neutral')
                row.append(f'{OutputFieldName_prefix}_Mixed')

                #process the rest of the lines for data
                firstLine=False

                #print(inputIndex)
            else:
                #call comprehend; this is where the magic happens
                res = comprehend.detect_sentiment(Text=row[inputIndex], LanguageCode='en')

                #add results from comprehend directly to the current row
                row.append(res['Sentiment'])
                row.append(res["SentimentScore"]["Positive"])
                row.append(res["SentimentScore"]["Negative"])
                row.append(res["SentimentScore"]["Neutral"])
                row.append(res["SentimentScore"]["Mixed"])


        #write out file
        with open(temp_file_out, 'w', newline='') as outfile:
            writer = csv.writer(outfile)
            for line in reader: # reverse order
                writer.writerow(line)

        #save to s3
        s3.Bucket(BUCKET_NAME).upload_file(temp_file_out,keyOut)