## Overview

This notebook shows you how to create and query a table or DataFrame loaded from data stored in AWS S3. There are two ways to establish access to S3: [IAM roles](https://docs.databricks.com/user-guide/cloud-configurations/aws/iam-roles.html) and access keys.

*We recommend using IAM roles to specify which cluster can access which buckets. Keys can show up in logs and table metadata and are therefore fundamentally insecure.* If you do use keys, you'll have to escape the `/` in your keys with `%2F`.

This is a **Python** notebook so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` magic command. Python, Scala, SQL, and R are all supported.

In [0]:
ACCESS_KEY = dbutils.secrets.get(scope = "aws-s3-prd", key = "ACCESS_KEY_ID")
SECRET_KEY = dbutils.secrets.get(scope = "aws-s3-prd", key = "SECRET_ACCESS_KEY")
ENCODED_SECRET_KEY = SECRET_KEY.replace("/", "%2F")
AWS_BUCKET_NAME = "td-infra-prd-us-east-1-s3-guardian"
MOUNT_NAME = "kafka_raw"

dbutils.fs.mount("s3a://%s:%s@%s" % (ACCESS_KEY, ENCODED_SECRET_KEY, AWS_BUCKET_NAME), "/mnt/%s" % MOUNT_NAME)
display(dbutils.fs.ls("/mnt/%s" % MOUNT_NAME))

path,name,size
dbfs:/mnt/kafka_raw/57c7413abca837e974000009/,57c7413abca837e974000009/,0
dbfs:/mnt/kafka_raw/58111bed996d5822e3000018/,58111bed996d5822e3000018/,0
dbfs:/mnt/kafka_raw/5c0fe1684b54c2000f78849b/,5c0fe1684b54c2000f78849b/,0


In [0]:
testJsonData = spark.read.json("dbfs:/mnt/5c0fe1684b54c2000f78849b/audit_logs_1590969600000-1598918400000.json")
display(testJsonData)

account_id,agent_id,event,event_id,event_type,logger_event_id,timestamp
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(5dcb26eeefe0380009de4f0c, null, List(34.232.116.65), null, user, Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Callbar/1.32.17-electron Chrome/76.0.3809.146 Electron/6.1.11 Safari/537.36, null), List(contact_read, contacts/read/5f034cf921f307000efc338a, low, success, user), create_audit_operation, f934fc31463b44bca49ac3a9dcb9eab4, List(null, contacts-api-5b96dd888f-hdslj, td-p-new-contacts-api, system), 9b10f25c-33d6-4848-af40-0068c440c5f3, 2020-08-17T22:46:07.073000Z, null, 8c8f7116-6f2f-4708-9932-9f85454356fa, null, contacts_api, 2020-08-17T22:46:06Z)",f934fc31463b44bca49ac3a9dcb9eab4,create_audit_operation,9b10f25c-33d6-4848-af40-0068c440c5f3,2020-08-17T22:46:06.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(e21d7537e72641e4be3e318fdcb60641, null, null, null, oauth_client, null, null), List(oauth_client_issue_access_token, 9f91d7e1004c4fc3aff295b6c89d751b, low, success, internal), create_audit_operation, 6c0d25e63652469f890344abb60c5b81, List(null, talkdesk-id-web-5fd4c7878d-cgxll, talkdesk-id, system), 33f76ee3-545a-4587-b58c-3b4ea9d149a4, 2020-08-17T22:46:08.148000Z, List(null, null, null, null, null, null, null, null, null, 2020-08-17T22:56:07.828Z, null, refresh_token, null, null, null, null, null, null, null, null, d0588d33fa124a8fa31d3f18ab80d262, null, null, null, null, 241787e48d4e43e6817ec043d84b7815, null, null, null, List(main-bff), null, null, null, null, 5e59c1aaac35a5000c519646, null, null), null, MAIN, null, 2020-08-17T22:46:07.828Z)",6c0d25e63652469f890344abb60c5b81,create_audit_operation,33f76ee3-545a-4587-b58c-3b4ea9d149a4,2020-08-17T22:46:07.828
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(561cec9dabd9442eaae94e007ad6d57f, null, List(54.214.162.29), null, oauth_client, Apache-HttpClient/4.5.11 (Java/1.8.0_222), null), List(read_call_recordings, /calls/b57976fc465f46c38c8191a224f19f95/recordings, low, success, user), create_audit_operation, 1a171dc6df2f46b6ae37bdeeb384a815, List(null, recordings-api-web-75fb569744-xmkw7, recordings-api, system), 5472765c-03c3-4aeb-80f0-5be43f8a2c1b, 2020-08-17T22:46:08.150000Z, null, cf785781-a69c-4269-aba4-4699e609ca25, null, null, 2020-08-17T22:46:08Z)",1a171dc6df2f46b6ae37bdeeb384a815,create_audit_operation,5472765c-03c3-4aeb-80f0-5be43f8a2c1b,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(N/A, null, List(54.88.53.127), null, user, Faraday v0.12.2, null), List(contact_read, contacts/read/5f034cf921f307000efc338a, low, success, user), create_audit_operation, 38bff718a7b84d3a9a24436a030dfe36, List(null, contacts-api-5b96dd888f-xvn4z, td-p-new-contacts-api, system), f161a167-1c27-413a-85dd-11f854c35163, 2020-08-17T22:46:09.233000Z, null, 37fd9a0e2a0877ad042ec523aad32a3f, 37fd9a0e2a0877ad042ec523aad32a3f, null, contacts_api, 2020-08-17T22:46:08Z)",38bff718a7b84d3a9a24436a030dfe36,create_audit_operation,f161a167-1c27-413a-85dd-11f854c35163,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(N/A, null, List(54.88.53.127), null, user, Faraday v0.15.4, null), List(contact_read, contacts/read/5f034cf921f307000efc338a, low, success, user), create_audit_operation, b6ff6b3dba394aa595cc74790cebeba7, List(null, contacts-api-5b96dd888f-8mw65, td-p-new-contacts-api, system), d31fb68d-fbed-4c75-895e-903a66d9379a, 2020-08-17T22:46:09.234000Z, null, 0bf50c869a328bf2294510576359534f, 0bf50c869a328bf2294510576359534f, null, contacts_api, 2020-08-17T22:46:08Z)",b6ff6b3dba394aa595cc74790cebeba7,create_audit_operation,d31fb68d-fbed-4c75-895e-903a66d9379a,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(561cec9dabd9442eaae94e007ad6d57f, null, List(52.32.109.163), null, oauth_client, Apache-HttpClient/4.5.11 (Java/1.8.0_222), null), List(read_recording, /recordings/d3d29cf3163f4dd188512e80d68e08f2/media, low, success, user), create_audit_operation, c41f2559437b440890205a3b7752fb4c, List(null, recordings-api-web-75fb569744-bchxs, recordings-api, system), 244b1fde-0ee9-4460-84b0-07f1b26e3c74, 2020-08-17T22:46:09.234000Z, null, 24cd150e-6246-43aa-a231-1577d37bf542, null, null, 2020-08-17T22:46:08Z)",c41f2559437b440890205a3b7752fb4c,create_audit_operation,244b1fde-0ee9-4460-84b0-07f1b26e3c74,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(561cec9dabd9442eaae94e007ad6d57f, null, List(52.32.109.163), null, oauth_client, Apache-HttpClient/4.5.11 (Java/1.8.0_222), null), List(read_recording_media_file, /recordings/d3d29cf3163f4dd188512e80d68e08f2/media, medium, success, user), create_audit_operation, 306a568b06484cc481e48739a5474e5c, List(null, recordings-api-web-75fb569744-bchxs, recordings-api, system), 5e50ed9b-343a-42ba-a025-a8eae8f11a9f, 2020-08-17T22:46:09.234000Z, null, 24cd150e-6246-43aa-a231-1577d37bf542, null, null, 2020-08-17T22:46:08Z)",306a568b06484cc481e48739a5474e5c,create_audit_operation,5e50ed9b-343a-42ba-a025-a8eae8f11a9f,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(561cec9dabd9442eaae94e007ad6d57f, null, List(34.216.95.140), null, oauth_client, Apache-HttpClient/4.5.11 (Java/1.8.0_222), null), List(read_call_recordings, /calls/74bf0f1166db485cae0974158bb7402e/recordings, low, success, user), create_audit_operation, 9bb1ec9e8ade48f4ac8a54f16600295b, List(null, recordings-api-web-75fb569744-7kpgv, recordings-api, system), 183ac680-9914-49f1-817d-054b98d5fd68, 2020-08-17T22:46:09.234000Z, null, c03c0b0f-77c6-4a76-9b9b-220575fc1c44, null, null, 2020-08-17T22:46:08Z)",9bb1ec9e8ade48f4ac8a54f16600295b,create_audit_operation,183ac680-9914-49f1-817d-054b98d5fd68,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(N/A, null, List(3.216.76.109), null, user, Faraday v0.15.4, null), List(contact_read, contacts/read/5f034cf921f307000efc338a, low, success, user), create_audit_operation, 0081873fa603490ab027582f813effae, List(null, contacts-api-5b96dd888f-677x9, td-p-new-contacts-api, system), 7d67642c-84b2-4166-b931-0afbb5118692, 2020-08-17T22:46:09.234000Z, null, 0a2f64d9625ea8c52d510dccb5d55bc1, 0a2f64d9625ea8c52d510dccb5d55bc1, null, contacts_api, 2020-08-17T22:46:08Z)",0081873fa603490ab027582f813effae,create_audit_operation,7d67642c-84b2-4166-b931-0afbb5118692,2020-08-17T22:46:08.000
5c0fe1684b54c2000f78849b,,"List(5c0fe1684b54c2000f78849b, List(N/A, null, List(3.216.76.109), null, user, Faraday v0.15.4, null), List(contact_read, contacts/read/5f034cf921f307000efc338a, low, success, user), create_audit_operation, 48ef693d56ee41b0bcb6dcaeb7f32c04, List(null, contacts-api-5b96dd888f-xvn4z, td-p-new-contacts-api, system), 6e077b70-4d3b-486b-9d06-483bbb2e92d0, 2020-08-17T22:46:09.234000Z, null, f988b8222327d8b9168aedfe8cc46d2e, f988b8222327d8b9168aedfe8cc46d2e, null, contacts_api, 2020-08-17T22:46:08Z)",48ef693d56ee41b0bcb6dcaeb7f32c04,create_audit_operation,6e077b70-4d3b-486b-9d06-483bbb2e92d0,2020-08-17T22:46:08.000


In [0]:
testJsonData.write.saveAsTable("5c0fe1684b54c2000f78849b")