# Necessary Imports

**Create the data frame for Step Counts:**

In [17]:
import pandas as pd

# Path to your Apple Health CSV file
health_data_path = "/content/apple_health_data.csv"  # Replace with your actual file path

# Step 1: Load the CSV file
# Use low_memory=False to avoid DtypeWarning for large datasets
health_df = pd.read_csv(health_data_path, low_memory=False)

# Step 2: Handle mixed data types
# Convert the 'value' column to numeric, forcing errors to NaN
health_df['value'] = pd.to_numeric(health_df['value'], errors='coerce')

# Step 3: Filter for step count data
# Filter rows where 'type' is 'HKQuantityTypeIdentifierStepCount'
step_count_df = health_df[health_df['type'] == 'HKQuantityTypeIdentifierStepCount']

# Step 4: Drop rows with missing or invalid values
step_count_df = step_count_df.dropna(subset=['value'])

# Step 5: Display the Step Count DataFrame
print("Step Count Data:")
print(step_count_df.head())

Step Count Data:
                                type      sourceName   unit  \
2  HKQuantityTypeIdentifierStepCount  Kadir’s iPhone  count   
3  HKQuantityTypeIdentifierStepCount  Kadir’s iPhone  count   
4  HKQuantityTypeIdentifierStepCount  Kadir’s iPhone  count   
5  HKQuantityTypeIdentifierStepCount  Kadir’s iPhone  count   
6  HKQuantityTypeIdentifierStepCount  Kadir’s iPhone  count   

                creationDate                  startDate  \
2  2023-01-21 22:09:08 +0300  2023-01-21 21:59:00 +0300   
3  2023-01-21 22:39:12 +0300  2023-01-21 22:28:09 +0300   
4  2023-01-22 00:23:38 +0300  2023-01-22 00:13:30 +0300   
5  2023-01-22 01:45:41 +0300  2023-01-22 01:11:31 +0300   
6  2023-01-22 02:06:38 +0300  2023-01-22 01:56:20 +0300   

                     endDate  value  
2  2023-01-21 21:59:18 +0300   31.0  
3  2023-01-21 22:28:12 +0300    8.0  
4  2023-01-22 00:13:35 +0300   13.0  
5  2023-01-22 01:16:19 +0300   36.0  
6  2023-01-22 01:58:50 +0300   87.0  


In [18]:
import pandas as pd
import json

# Path to your Spotify JSON file
spotify_data_path = "/content/spotify_history_all.json"  # Replace with your actual file path

# Load Spotify JSON
with open(spotify_data_path, 'r') as f:
    spotify_data = json.load(f)

# Create a DataFrame from Spotify data
spotify_df = pd.DataFrame(spotify_data)

# Convert the 'ts' field to datetime and extract the date
spotify_df['endTime'] = pd.to_datetime(spotify_df['ts'])
spotify_df['date'] = spotify_df['endTime'].dt.date

# Ensure ms_played is numeric
spotify_df['ms_played'] = spotify_df['ms_played'].astype(float)

# Display the Spotify DataFrame
print("Spotify Data:")
print(spotify_df.head())

Spotify Data:
                     ts                                           platform  \
0  2018-10-07T09:39:49Z        Android OS 5.1.1 API 22 (samsung, SM-J200H)   
1  2018-10-07T10:27:32Z        Android OS 5.1.1 API 22 (samsung, SM-J200H)   
2  2018-10-07T11:32:57Z        Android OS 5.1.1 API 22 (samsung, SM-J200H)   
3  2019-06-11T18:03:49Z  Android-tablet OS 8.0.0 API 26 (General Mobile...   
4  2019-06-11T18:03:54Z  Android-tablet OS 8.0.0 API 26 (General Mobile...   

   ms_played conn_country         ip_addr master_metadata_track_name  \
0    18553.0           TR  178.246.86.252                      Kibir   
1     3229.0           TR  178.246.86.252               Korkma Söyle   
2     1523.0           TR  178.246.86.252               Korkma Söyle   
3    18974.0           TR  178.246.30.204                     Mayday   
4     4758.0           TR  178.246.30.204                   Semt İşi   

  master_metadata_album_artist_name master_metadata_album_album_name  \
0           