# Data Analysis

### 1. Load the Google Sheet worksheet

In [95]:
# Import python library gspread to read the Google Sheet
import gspread

In [96]:
# Use the service_account method to give service account details to gspread
sa = gspread.service_account(filename="extended-pagoda-419111-6c5d3c5c6b77.json")

In [97]:
# Use the sa gspread client to connect to the Google Sheet
sheet = sa.open("2024: Body Composition Tracking")

# Use the worksheet method to define worksheet to open
work_sheet = sheet.worksheet("Weighings")

In [124]:
# Import pandas to extract the data
import pandas as pd
import numpy as np

# Use the get_all_records to extract the data to a dataframe
df = pd.DataFrame(work_sheet.get_all_records())

### 2. Data Exploratory

In [125]:
# Use the head method to take a look at the first 5 rows of data
df.head()

Unnamed: 0,Unnamed: 1,DATE,TIME,WEIGHT,BMI,BODY FAT,FAT-FREE BODY WEIGHT,SUB FAT,VIS FAT,BODY WATER,SKE MUSCLE,MUSCLE MASS,BONE MASS,PROTEIN,BMR,AGE
0,,1 Nov,07:03,167.4,24.8,24.1%,127.1,16.2%,9.5,52.0%,52.2%,119.0,6.5,19.9%,1619,38
1,,2 Nov,07:03,166.2,24.6,23.8%,126.6,16.0%,9.4,52.2%,52.4%,118.6,6.6,20.0%,1614,38
2,,9 Nov,07:04,166.4,24.6,23.9%,126.6,16.1%,9.4,52.1%,52.3%,118.6,6.6,20.0%,1615,38
3,,16 Nov,07:04,166.6,24.7,24.0%,126.6,16.1%,9.5,52.1%,52.2%,118.6,6.5,20.0%,1616,38
4,,23 Nov,07:03,165.8,24.6,23.8%,126.3,16.0%,9.4,52.2%,52.4%,118.5,6.6,20.1%,1611,38


In [126]:
# Use the tail method to take a look at the last 5 rows of data
df.tail()

Unnamed: 0,Unnamed: 1,DATE,TIME,WEIGHT,BMI,BODY FAT,FAT-FREE BODY WEIGHT,SUB FAT,VIS FAT,BODY WATER,SKE MUSCLE,MUSCLE MASS,BONE MASS,PROTEIN,BMR,AGE
185,,28 May,00:00,0.0,0.0,0.0%,0.0,0.0%,0.0,0.0%,0.0%,0.0,0.0,0.0%,0,0
186,,29 May,00:00,0.0,0.0,0.0%,0.0,0.0%,0.0,0.0%,0.0%,0.0,0.0,0.0%,0,0
187,,30 May,00:00,0.0,0.0,0.0%,0.0,0.0%,0.0,0.0%,0.0%,0.0,0.0,0.0%,0,0
188,,31 May,00:00,0.0,0.0,0.0%,0.0,0.0%,0.0,0.0%,0.0%,0.0,0.0,0.0%,0,0
189,,1 Jun,00:00,0.0,0.0,0.0%,0.0,0.0%,0.0,0.0%,0.0%,0.0,0.0,0.0%,0,0


In [129]:
# Convert 0 values to NaN
df.replace("00:00", np.NaN, inplace=True)
df.replace(0.0, np.NaN, inplace=True)
df.replace("0.0%", np.NaN, inplace=True)
df.replace(0, np.NaN, inplace=True)

# Drop rows with NaN values
df.dropna(axis=0, inplace=True)
df.tail()

Unnamed: 0,Unnamed: 1,DATE,TIME,WEIGHT,BMI,BODY FAT,FAT-FREE BODY WEIGHT,SUB FAT,VIS FAT,BODY WATER,SKE MUSCLE,MUSCLE MASS,BONE MASS,PROTEIN,BMR,AGE
125,,29 Mar,07:03,158.8,23.5,22.0%,123.9,14.9%,8.6,53.2%,53.6%,116.0,6.5,20.7%,1571,36
126,,30 Mar,07:05,159.2,23.6,22.1%,124.0,14.9%,8.7,53.2%,53.5%,116.2,6.5,20.7%,1574,38
127,,31 Mar,09:47,160.8,23.8,22.6%,124.5,15.2%,8.9,52.9%,53.2%,116.7,6.4,20.5%,1583,38
128,,1 Apr,07:06,161.2,23.9,22.6%,124.8,15.2%,8.9,52.9%,53.2%,117.0,6.4,20.5%,1585,38
129,,2 Apr,08:27,160.2,23.7,22.4%,124.3,15.1%,8.8,53.0%,53.3%,116.4,6.4,20.6%,1580,38


In [130]:
# Print the data type of each column
print(df.dtypes)

                        object
DATE                    object
TIME                    object
WEIGHT                  object
BMI                     object
BODY FAT                object
FAT-FREE BODY WEIGHT    object
SUB FAT                 object
VIS FAT                 object
BODY WATER              object
SKE MUSCLE              object
MUSCLE MASS             object
BONE MASS               object
PROTEIN                 object
BMR                     object
AGE                     object
dtype: object


In [131]:
# Drop blank column
df.drop("", axis = 1, inplace=True)

# Rename each column name to be lower case
df.rename(columns=str.lower, inplace=True)

In [134]:
# Get a concise summary of the data
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 130 entries, 0 to 129
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   date                  130 non-null    object
 1   time                  130 non-null    object
 2   weight                130 non-null    object
 3   bmi                   130 non-null    object
 4   body fat              130 non-null    object
 5   fat-free body weight  130 non-null    object
 6   sub fat               130 non-null    object
 7   vis fat               130 non-null    object
 8   body water            130 non-null    object
 9   ske muscle            130 non-null    object
 10  muscle mass           130 non-null    object
 11  bone mass             130 non-null    object
 12  protein               130 non-null    object
 13  bmr                   130 non-null    object
 14  age                   130 non-null    object
dtypes: object(15)
memory usage: 16.2+ KB


In [105]:
# Convert data types to correct format
#df['time']=pd.to_datetime(df['time'])
#df['date']=pd.to_datetime(df['date'])
#df.loc[df['weight'] == '', 'weight'] = 0.0
#df[['weight']] = df[['weight']].astype("float")
# df.head()

In [106]:
# Get a statistical summary of each column
#df.describe()