# S3上のPOSTデータの処理

serverless-httppost-s3-mail でS3にアップロードされたデータをPandasに読み込んで処理します

## S3からデータを読み込み

以下の項目を設定します。

- aws_access_key_id
- aws_secret_access_key
- region_name
- BUCKET_NAME
- BUCKET_PREFIX


In [0]:
AWS_ACCESS_KEY_ID = '<aws_access_key_id>'
AWS_SECRET_ACCESS_KEY = 'aws_secret_access_key'
REGION_NAME = 'us-east-1'
BUCKET_NAME = '<bucket_name>'
BUCKET_KEY_PREFIX = 'dev/'

In [0]:
import boto3
import json

from boto3.session import Session

client = boto3.client('s3', aws_access_key_id = AWS_ACCESS_KEY_ID,
                  aws_secret_access_key = AWS_SECRET_ACCESS_KEY,
                  region_name = REGION_NAME)
objs = client.list_objects(Bucket = BUCKET_NAME, Prefix = BUCKET_KEY_PREFIX)

## 取得したデータから必要な項目の抽出

In [0]:
keys = map(lambda x: x['Key'],  objs['Contents'])

names = []
ages = []
times = []
sourceips = []
useragents = []
users = []

for key in list(keys):
  data = client.get_object(Bucket=BUCKET_NAME, Key=key)
  a = json.loads(data['Body'].read().decode('utf-8'))
  b = json.loads(a['body'])
  names.append(b['Name'] if 'Name' in b else None)
  ages.append(b['Age'] if 'Age' in b else None)
  times.append(a['requestContext']['requestTimeEpoch'])
  sourceips.append(a['requestContext']['identity']['sourceIp'])
  useragents.append(a['requestContext']['identity']['userAgent'])
  users.append(a['requestContext']['identity']['user'])


## Pandas データフレームの作成

In [0]:
import pandas as pd

df = pd.DataFrame({'name':names, 'age':ages, 'time':times, 'sourceip':sourceips, 'useragent': useragents, 'user': users})
df