## Oura Ring Daily Activity Data Cleaning
### Daily Summary Dataset

In [4]:
import json
import pandas as pd
import os

In [5]:
# Columns to extract, including contributors dictionary fields
columns = [
    "id", "day", "score", "active_calories", "average_met_minutes",
    "high_activity_time", "medium_activity_time", "low_activity_time",
    "high_activity_met_minutes", "medium_activity_met_minutes", "low_activity_met_minutes",
    "inactivity_alerts", "steps", "target_calories", "meters_to_target",
    "total_calories", "equivalent_walking_distance", "non_wear_time", "resting_time",
    "sedentary_met_minutes", "sedentary_time",
    "meet_daily_targets", "move_every_hour", "recovery_time", 
    "stay_active", "training_frequency", "training_volume"
]

#### UL

In [6]:
# List of users
user_ids_UL = [
    'UL_0201', 'UL_0225', 'UL_0352', 'UL_0417', 'UL_0422',
    'UL_0480', 'UL_1086', 'UL_1184', 'UL_9900'
]

In [7]:
# Directory path
directory_UL = '/Users/katherinetian/Downloads/Daily Activity/UL/'

# Initialize an empty list to hold all records from all users
all_records = []

# Process each SA user ID
for user_id in user_ids_UL:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_UL, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract relevant data, including contributors fields
        for entry in data.get("data", []):
            # Basic fields with user_id as the first column
            record = {"user_id": user_id}
            record.update({field: entry.get(field, None) for field in columns if field not in ["user_id", "meet_daily_targets", "move_every_hour", "recovery_time", "stay_active", "training_frequency", "training_volume"]})
            # Contributors fields
            contributors = entry.get("contributors", {})
            record.update({
                "meet_daily_targets": contributors.get("meet_daily_targets", None),
                "move_every_hour": contributors.get("move_every_hour", None),
                "recovery_time": contributors.get("recovery_time", None),
                "stay_active": contributors.get("stay_active", None),
                "training_frequency": contributors.get("training_frequency", None),
                "training_volume": contributors.get("training_volume", None)
            })
            all_records.append(record)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from all records and save as a single CSV file
combined_df = pd.DataFrame(all_records, columns=columns)
combined_csv_path = os.path.join(directory_UL, "daily_activity_UL.csv")
combined_df.to_csv(combined_csv_path, index=False)
print(f"All user data combined and saved: {combined_csv_path}")


All user data combined and saved: /Users/katherinetian/Downloads/Daily Activity/UL/daily_activity_UL.csv


In [8]:
# Columns for the class_5_min table
columns = ["user_id", "day", "class_5_min"]

# Initialize an empty list to hold all records for class_5_min
class_5_min_records = []

# Process each UL user ID
for user_id in user_ids_UL:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_UL, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract the class_5_min data and day (date) for each entry
        for entry in data.get("data", []):
            day = entry.get("day", None)  # Date in the format YYYY-MM-DD
            class_5_min = entry.get("class_5_min", None)  # Five-minute interval activity classifications
            
            if day and class_5_min:
                # Create a record for each 5-minute interval
                for activity_class in class_5_min:
                    # Append each interval as a record
                    record = {
                        "user_id": user_id,
                        "day": day,
                        "class_5_min": int(activity_class) if activity_class.isdigit() else None
                    }
                    class_5_min_records.append(record)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from the records and save as a CSV file
class_5_min_df = pd.DataFrame(class_5_min_records, columns=columns)
class_5_min_csv_path = os.path.join(directory_UL, "activity_intraday_UL.csv")
class_5_min_df.to_csv(class_5_min_csv_path, index=False)
print(f"Class 5-minute activity data saved: {class_5_min_csv_path}")

Class 5-minute activity data saved: /Users/katherinetian/Downloads/Daily Activity/UL/activity_intraday_UL.csv


In [9]:
# Columns for the met_intraday table
met_intraday_columns = ["user_id", "datetime", "met_intraday"]

# Initialize an empty list to hold all records for met_intraday
met_intraday_records = []

# Process each UL user ID
for user_id in user_ids_UL:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_UL, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract MET data for each entry
        for entry in data.get("data", []):
            met_data = entry.get("met", {})  # MET data dictionary
            
            # Extract relevant fields from MET data
            interval = met_data.get("interval", None)  # Time interval in seconds between each MET measurement
            start_timestamp = met_data.get("timestamp", None)  # Start timestamp for the MET data sequence
            items = met_data.get("items", [])  # Array of MET values
            
            # Check if required MET data is present
            if start_timestamp and interval and items:
                # Convert the start timestamp to a datetime object
                try:
                    start_datetime = pd.to_datetime(start_timestamp)
                    
                    # Create a record for each MET value
                    for i, met_value in enumerate(items):
                        # Calculate the timestamp for each measurement based on the interval
                        datetime_val = start_datetime + pd.Timedelta(seconds=interval * i)
                        
                        # Append the record to met_intraday_records
                        record = {
                            "user_id": user_id,
                            "datetime": datetime_val,
                            "met_intraday": met_value
                        }
                        met_intraday_records.append(record)
                
                except Exception as e:
                    print(f"Error processing MET timestamp for participant {user_id}: {e}")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from the met_intraday records
met_intraday_df = pd.DataFrame(met_intraday_records, columns=met_intraday_columns)

# Save to a CSV file
met_intraday_csv_path = os.path.join(directory_UL, "met_intraday_activity_UL.csv")
met_intraday_df.to_csv(met_intraday_csv_path, index=False)
print(f"MET intraday data combined and saved: {met_intraday_csv_path}")

MET intraday data combined and saved: /Users/katherinetian/Downloads/Daily Activity/UL/met_intraday_activity_UL.csv


#### SA

In [10]:
# List of SA user IDs
user_ids_SA = [
    'SA_0071', 'SA_0148', 'SA_0243', 'SA_0528', 'SA_0585','SA_0721', 
    'SA_0762', 'SA_0811', 'SA_0820', 'SA_1173', 'SA_1207', 'SA_1368'
]

In [11]:
# Directory path
directory_SA = '/Users/katherinetian/Downloads/Daily Activity/SA/'

# Initialize an empty list to hold all records from all users
all_records = []

# Process each SA user ID
for user_id in user_ids_SA:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_SA, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract relevant data, including contributors fields
        for entry in data.get("data", []):
            # Basic fields with user_id as the first column
            record = {"user_id": user_id}
            record.update({field: entry.get(field, None) for field in columns if field not in ["user_id", "meet_daily_targets", "move_every_hour", "recovery_time", "stay_active", "training_frequency", "training_volume"]})
            # Contributors fields
            contributors = entry.get("contributors", {})
            record.update({
                "meet_daily_targets": contributors.get("meet_daily_targets", None),
                "move_every_hour": contributors.get("move_every_hour", None),
                "recovery_time": contributors.get("recovery_time", None),
                "stay_active": contributors.get("stay_active", None),
                "training_frequency": contributors.get("training_frequency", None),
                "training_volume": contributors.get("training_volume", None)
            })
            all_records.append(record)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from all records and save as a single CSV file
combined_df = pd.DataFrame(all_records, columns=columns)
combined_csv_path = os.path.join(directory_SA, "daily_activity_SA.csv")
combined_df.to_csv(combined_csv_path, index=False)
print(f"All user data combined and saved: {combined_csv_path}")


All user data combined and saved: /Users/katherinetian/Downloads/Daily Activity/SA/daily_activity_SA.csv


In [12]:
# Columns for the class_5_min table
columns = ["user_id", "day", "class_5_min"]

# Initialize an empty list to hold all records for class_5_min
class_5_min_records = []

# Process each UL user ID
for user_id in user_ids_SA:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_SA, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract the class_5_min data and day (date) for each entry
        for entry in data.get("data", []):
            day = entry.get("day", None)  # Date in the format YYYY-MM-DD
            class_5_min = entry.get("class_5_min", None)  # Five-minute interval activity classifications
            
            if day and class_5_min:
                # Create a record for each 5-minute interval
                for activity_class in class_5_min:
                    # Append each interval as a record
                    record = {
                        "user_id": user_id,
                        "day": day,
                        "class_5_min": int(activity_class) if activity_class.isdigit() else None
                    }
                    class_5_min_records.append(record)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from the records and save as a CSV file
class_5_min_df = pd.DataFrame(class_5_min_records, columns=columns)
class_5_min_csv_path = os.path.join(directory_SA, "activity_intraday_SA.csv")
class_5_min_df.to_csv(class_5_min_csv_path, index=False)
print(f"Class 5-minute activity data saved: {class_5_min_csv_path}")

Class 5-minute activity data saved: /Users/katherinetian/Downloads/Daily Activity/SA/activity_intraday_SA.csv


In [13]:
# Columns for the met_intraday table
met_intraday_columns = ["user_id", "datetime", "met_intraday"]

# Initialize an empty list to hold all records for met_intraday
met_intraday_records = []

# Process each UL user ID
for user_id in user_ids_SA:
    file_name = f"daily_activity_{user_id}.json"
    file_path = os.path.join(directory_SA, file_name)
    
    # Try to open and process each file
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Extract MET data for each entry
        for entry in data.get("data", []):
            met_data = entry.get("met", {})  # MET data dictionary
            
            # Extract relevant fields from MET data
            interval = met_data.get("interval", None)  # Time interval in seconds between each MET measurement
            start_timestamp = met_data.get("timestamp", None)  # Start timestamp for the MET data sequence
            items = met_data.get("items", [])  # Array of MET values
            
            # Check if required MET data is present
            if start_timestamp and interval and items:
                # Convert the start timestamp to a datetime object
                try:
                    start_datetime = pd.to_datetime(start_timestamp)
                    
                    # Create a record for each MET value
                    for i, met_value in enumerate(items):
                        # Calculate the timestamp for each measurement based on the interval
                        datetime_val = start_datetime + pd.Timedelta(seconds=interval * i)
                        
                        # Append the record to met_intraday_records
                        record = {
                            "user_id": user_id,
                            "datetime": datetime_val,
                            "met_intraday": met_value
                        }
                        met_intraday_records.append(record)
                
                except Exception as e:
                    print(f"Error processing MET timestamp for participant {user_id}: {e}")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Create a DataFrame from the met_intraday records
met_intraday_df = pd.DataFrame(met_intraday_records, columns=met_intraday_columns)

# Save to a CSV file
met_intraday_csv_path = os.path.join(directory_SA, "met_intraday_activity_SA.csv")
met_intraday_df.to_csv(met_intraday_csv_path, index=False)
print(f"MET intraday data combined and saved: {met_intraday_csv_path}")

MET intraday data combined and saved: /Users/katherinetian/Downloads/Daily Activity/SA/met_intraday_activity_SA.csv


In [14]:
# Paths to the SA and UL CSV files
sa_file_path = '/Users/katherinetian/Downloads/Daily Activity/SA/daily_activity_SA.csv'
ul_file_path = '/Users/katherinetian/Downloads/Daily Activity/UL/daily_activity_UL.csv'

# Read each CSV file into a DataFrame
sa_df = pd.read_csv(sa_file_path)
ul_df = pd.read_csv(ul_file_path)

# Concatenate the two DataFrames
combined_df = pd.concat([sa_df, ul_df], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_csv_path = '/Users/katherinetian/Downloads/Daily Activity/combined_daily_activity.csv'
combined_df.to_csv(combined_csv_path, index=False)

print(f"SA and UL data combined and saved: {combined_csv_path}")


SA and UL data combined and saved: /Users/katherinetian/Downloads/Daily Activity/combined_daily_activity.csv


In [15]:
# Paths to the SA and UL CSV files
sa_file_path = '/Users/katherinetian/Downloads/Daily Activity/SA/activity_intraday_SA.csv'
ul_file_path = '/Users/katherinetian/Downloads/Daily Activity/UL/activity_intraday_UL.csv'

# Read each CSV file into a DataFrame
sa_df = pd.read_csv(sa_file_path)
ul_df = pd.read_csv(ul_file_path)

# Concatenate the two DataFrames
combined_df = pd.concat([sa_df, ul_df], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_csv_path = '/Users/katherinetian/Downloads/Daily Activity/combined_activity_intraday.csv'
combined_df.to_csv(combined_csv_path, index=False)

print(f"SA and UL data combined and saved: {combined_csv_path}")

SA and UL data combined and saved: /Users/katherinetian/Downloads/Daily Activity/combined_activity_intraday.csv


In [16]:
# Paths to the SA and UL CSV files
sa_file_path = '/Users/katherinetian/Downloads/Daily Activity/SA/met_intraday_activity_SA.csv'
ul_file_path = '/Users/katherinetian/Downloads/Daily Activity/UL/met_intraday_activity_UL.csv'

# Read each CSV file into a DataFrame
sa_df = pd.read_csv(sa_file_path)
ul_df = pd.read_csv(ul_file_path)

# Concatenate the two DataFrames
combined_df = pd.concat([sa_df, ul_df], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_csv_path = '/Users/katherinetian/Downloads/Daily Activity/met_intraday_activity.csv'
combined_df.to_csv(combined_csv_path, index=False)

print(f"SA and UL data combined and saved: {combined_csv_path}")

SA and UL data combined and saved: /Users/katherinetian/Downloads/Daily Activity/met_intraday_activity.csv
