# Reading PEAQ Data from AWS
## This relies on AWS credentials being stored in .streamlit/secrets.toml


In [1]:
import streamlit as st
#import yagmail
import json
from pathlib import Path
import pandas as pd
from glob import glob
import warnings

## Set up a connection to AWS s3 

In [2]:
# This relies on AWS credentials being stored in .streamlit/secrets.toml or Streamlit Community Cloud app secrets

from st_files_connection import FilesConnection

# Create connection object 
conn = st.connection('s3', type=FilesConnection)


2025-04-15 11:48:43.752 
  command:

    streamlit run /Users/Gavin/miniconda3/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]


## Load PEAQ_DB and pull in any new json files from AWS s3 peaq-streamlit/data
- PEAQ_DB and analysis are all local

In [3]:
# Load PEAQ database
PEAQ_DB = pd.read_pickle('PEAQ_DB.pkl')
print(f"PEAQ_DB contains {len(PEAQ_DB)} records")

PEAQ_DB contains 26 records


In [4]:
PEAQ_DB_copy = PEAQ_DB.copy()

In [5]:
######Pull in all json files from s3 bucket#####
conn.fs.get("peaq-streamlit/data/*.json","./data")
#conn.close()

AttributeError: 'FilesConnection' object has no attribute 'close'

In [6]:
def load_json(file):
    """Load a file and split it into summary and answers"""
    with open(file,'r') as f:
        j = json.load(f)
    #answers = j.pop('answers')
    answers = j['answers']
    info = {answers[q]['text']:answers[q]['answer'] for q in answers if answers[q]['category']=='info'}
    scores = j.pop('scores')
    j.update(scores)
    j.update(info)
    return pd.DataFrame.from_dict( {k:[v] for k,v in j.items()}).set_index('id')


In [8]:
files = glob('./data/*')
print(f"{len(files)} local files")
for file in files:
    record_id = file.split('/')[-1].split('.')[0]
    if record_id not in PEAQ_DB.index:
        PEAQ_DB = pd.concat([PEAQ_DB,load_json(file)])
PEAQ_DB.to_pickle('PEAQ_DB.pkl')      

74 local files


In [34]:
items = ['count','mean','min','max']
PEAQ_DB[['sex','Please enter your age','overall_score','physical','physiological','psychological']].groupby('sex').describe().loc[:,(slice(None),items)]


Unnamed: 0_level_0,Please enter your age,Please enter your age,Please enter your age,Please enter your age,overall_score,overall_score,overall_score,overall_score,physical,physical,physical,physical,physiological,physiological,physiological,physiological,psychological,psychological,psychological,psychological
Unnamed: 0_level_1,count,mean,min,max,count,mean,min,max,count,mean,min,max,count,mean,min,max,count,mean,min,max
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Female,54.0,41.444444,16.0,66.0,54.0,2.722222,-13.0,13.0,54.0,-1.555556,-9.0,2.0,54.0,0.481481,-4.0,7.0,54.0,3.796296,-1.0,8.0
Male,16.0,40.25,18.0,73.0,16.0,2.625,-20.0,10.0,16.0,-1.1875,-8.0,2.0,16.0,-0.25,-7.0,4.0,16.0,4.0625,-5.0,8.0


In [35]:
PEAQ_DB[['sex','What is your main exercise type?']].groupby('sex').value_counts()


sex     What is your main exercise type?
Female  Running                             34
        Dance                                6
        Cycling                              4
        Swimming                             4
        Triathlon                            3
        Climbing                             1
        Football                             1
        Tennis                               1
Male    Cycling                              9
        Running                              3
        Football                             2
        Rowing                               1
        Triathlon                            1
Name: count, dtype: int64

## Old manual method of downloading files one at a time

In [47]:
# Long format
files = conn.fs.ls("peaq-streamlit/data","-l")
# With time
#files = conn.fs.ls("peaq-streamlit/data","-t")
files

[{'Key': 'peaq-streamlit/data/15bf7e71-4c8b-4220-812b-744690554c33.json',
  'LastModified': datetime.datetime(2025, 4, 12, 22, 23, 54, tzinfo=tzutc()),
  'ETag': '"11cbd632402b2690561c5e3c788f5b53"',
  'ChecksumAlgorithm': ['CRC32'],
  'ChecksumType': 'FULL_OBJECT',
  'Size': 6393,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 6393,
  'name': 'peaq-streamlit/data/15bf7e71-4c8b-4220-812b-744690554c33.json'},
 {'Key': 'peaq-streamlit/data/2f525303-a3a9-4ac0-9729-c5a8752ca8ef.json',
  'LastModified': datetime.datetime(2025, 4, 12, 15, 41, 46, tzinfo=tzutc()),
  'ETag': '"62962839ffe4a22c318f9d2e1962df27"',
  'ChecksumAlgorithm': ['CRC32'],
  'ChecksumType': 'FULL_OBJECT',
  'Size': 6418,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 6418,
  'name': 'peaq-streamlit/data/2f525303-a3a9-4ac0-9729-c5a8752ca8ef.json'},
 {'Key': 'peaq-streamlit/data/32b2aa12-d6c2-40f5-99fd-97dd33cb797d.json',
  'LastModified': datetime.datetime(2025, 4, 12, 19, 52, 54, tzinfo=tzutc()),
  

In [4]:
dfl = pd.DataFrame.from_dict(files)
dfl = dfl.sort_values('LastModified', ascending=False).reset_index()
dfl['Assessment'] = dfl['name'].apply(lambda x: x.split('/')[-1].split('.')[0])

dfl[['Assessment','LastModified','Key']].set_index('Assessment')

Unnamed: 0_level_0,LastModified,Key
Assessment,Unnamed: 1_level_1,Unnamed: 2_level_1
ab0b49e7-880c-4cbf-aca5-980ea142d302,2025-04-13 14:42:50+00:00,peaq-streamlit/data/ab0b49e7-880c-4cbf-aca5-98...
f5350de8-3e58-48d2-ba74-b54d3f08092d,2025-04-13 12:38:39+00:00,peaq-streamlit/data/f5350de8-3e58-48d2-ba74-b5...
d81d2ae9-cb08-4ffa-b9c8-427390de2edd,2025-04-13 02:28:16+00:00,peaq-streamlit/data/d81d2ae9-cb08-4ffa-b9c8-42...
7765f4a3-076d-4546-95d1-405ffa3a1b83,2025-04-13 01:56:09+00:00,peaq-streamlit/data/7765f4a3-076d-4546-95d1-40...
ac4b44e9-8e8a-495a-b460-048179321d06,2025-04-12 22:53:35+00:00,peaq-streamlit/data/ac4b44e9-8e8a-495a-b460-04...
15bf7e71-4c8b-4220-812b-744690554c33,2025-04-12 22:23:54+00:00,peaq-streamlit/data/15bf7e71-4c8b-4220-812b-74...
b22079ac-e5db-4792-8843-4f3474e0fd47,2025-04-12 21:37:14+00:00,peaq-streamlit/data/b22079ac-e5db-4792-8843-4f...
5cc4302e-0a4c-4710-995b-b3c6c648b590,2025-04-12 20:41:22+00:00,peaq-streamlit/data/5cc4302e-0a4c-4710-995b-b3...
3e1a4847-f979-4df0-ab01-f796dacb9140,2025-04-12 20:13:49+00:00,peaq-streamlit/data/3e1a4847-f979-4df0-ab01-f7...
32b2aa12-d6c2-40f5-99fd-97dd33cb797d,2025-04-12 19:52:54+00:00,peaq-streamlit/data/32b2aa12-d6c2-40f5-99fd-97...


In [36]:
x = input("Paste assessment_id ")

Paste assessment_id d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9	


In [37]:
fn = f'peaq-streamlit/data/{x.strip()}.json'

### Read file from AWS

In [38]:
#assessment_id = "ba3da40b-5c52-46bd-8209-8ab0e448bba1"
#assessment_id = "759df116-d5a3-4aec-b9e0-725891cef1d7"
#assessment_id = "d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9"
#fn = f"peaq-streamlit/data/{assessment_id}.json"    

df = conn.read(fn, input_format="json", ttl=600)


2025-04-13 16:41:20.231 No runtime found, using MemoryCacheStorageManager
2025-04-13 16:41:20.233 No runtime found, using MemoryCacheStorageManager
2025-04-13 16:41:20.234 No runtime found, using MemoryCacheStorageManager
2025-04-13 16:41:20.236 No runtime found, using MemoryCacheStorageManager
2025-04-13 16:41:20.238 No runtime found, using MemoryCacheStorageManager


In [39]:
answers = df.pop('answers')
breakdown = df.pop('scores')
df.update(breakdown)

In [40]:
df_summary = pd.DataFrame.from_dict( {k:[v] for k,v in df.items()}).T
df_summary

Unnamed: 0,0
id,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
user_id,dacc397e-1c32-4a5b-b446-827a6c7d4e9f
timestamp,2025-04-12T16:46:35.654508
sex,Male
overall_score,5
interpretation,"Reassuring result, suggesting that you have a ..."
psychological,5
physiological,-2
physical,2


In [41]:
df_answers = pd.DataFrame.from_dict(answers).T
df_answers['id'] = df['id']
df_answers

Unnamed: 0,text,answer,value,unit,category,score,id
1,Please enter your age,29,29.0,years,info,,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
2,What is your main exercise type?,Cycling,,,info,0.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
3,How many hours of exercise do you perform each...,11-15,,,info,0.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
4,How do you feel if you have to miss exercise/t...,Worried or anxious,,,info,0.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
5,Please enter your current height (in metres e....,1.83,1.83,m,info,,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
6,Please enter your current weight (in kg),71,71.0,kg,info,,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
7,Please enter your lowest weight for your curre...,71,71.0,kg,info,,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
8,How often do you weight yourself per week?,1 to 6 times,,,psychological,0.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
9,"As an indication of your hormone levels, how o...",2 or 3 times,,,physiological,-1.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9
10,During the last year how many days off exercis...,0 to 6 days,,,physical,0.0,d3d526d8-dab7-4e25-a178-ddeeb1cd0ba9


### Testing Write to AWS s3

In [3]:
assessment_id = "ba3da40b-5c52-46bd-8209-8ab0e448bba1"
assessment_id = "759df116-d5a3-4aec-b9e0-725891cef1d7"


In [12]:
# Open a local assessment
# with open(f"data/{user_id}/{assessment_id}.json",'r') as file:
#     report = json.load(file)
# report

In [10]:

class DateTimeEncoder(json.JSONEncoder):
    """Custom JSON encoder for datetime objects."""
    def default(self, obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()
        return super().default(obj)

def datetime_decoder(json_dict):
    """Custom JSON decoder for datetime objects."""
    for key, value in json_dict.items():
        if key == "timestamp" and isinstance(value, str):
            try:
                json_dict[key] = datetime.datetime.fromisoformat(value)
            except ValueError:
                pass
    return json_dict

In [11]:
assessment_data = report

AWS_DIR = Path("peaq-streamlit/data")
#AWS_user_dir = AWS_DIR / user_id
file_path = AWS_DIR / f"{assessment_id}.json"
with conn.open(file_path, 'w') as f:
    json.dump(assessment_data, f, cls=DateTimeEncoder, indent=4)


In [17]:
# Simple test
with conn.open("peaq-streamlit/text.json",'w') as f:
    f.write(json.dumps(report))

In [10]:
report

NameError: name 'report' is not defined

In [42]:
!pwd

/Users/Gavin/Gavin/Jupyter/Projects/PEAQ
