In [2]:
from fitnick.base.base import get_df_from_db, create_spark_session
from pyspark.sql import functions as F

In [3]:
spark_session = create_spark_session()
df = get_df_from_db(spark_session=spark_session, database='fitbit', schema='heart', table='daily')

In [4]:
df.count()

160

In [5]:
df.show()

+------------+----------+----------+----------+------------------+
|        type|   minutes|      date|  calories|resting_heart_rate|
+------------+----------+----------+----------+------------------+
|Out of Range|1307.00000|2020-08-12|2316.92384|                64|
|    Fat Burn| 111.00000|2020-08-12| 803.28542|                64|
|      Cardio|   1.00000|2020-08-12|  10.68788|                64|
|        Peak|   0.00000|2020-08-12|   0.00000|                64|
|Out of Range|1256.00000|2020-08-16|2271.26376|                66|
|    Fat Burn|  97.00000|2020-08-16| 667.78308|                66|
|      Cardio|   0.00000|2020-08-16|   0.00000|                66|
|        Peak|   0.00000|2020-08-16|   0.00000|                66|
|Out of Range|  12.00000|2020-09-17|  18.83448|                 0|
|    Fat Burn|   0.00000|2020-09-17|   0.00000|                 0|
|      Cardio|   0.00000|2020-09-17|   0.00000|                 0|
|        Peak|   0.00000|2020-09-17|   0.00000|               

In [6]:
# given the above df, how would you show total calories burned for each date?
agg_df = df.groupBy(F.col('date')).agg(F.sum('calories')).alias('calories')
agg_df.orderBy('date').show(agg_df.count())

+----------+-------------+
|      date|sum(calories)|
+----------+-------------+
|2020-08-10|   2438.68243|
|2020-08-11|   2663.30764|
|2020-08-12|   3130.89714|
|2020-08-13|   2885.06340|
|2020-08-14|   3237.32772|
|2020-08-15|   3301.11950|
|2020-08-16|   2939.04684|
|2020-08-17|   2935.40828|
|2020-08-18|   2907.24035|
|2020-08-19|   3127.92480|
|2020-08-20|   3005.10044|
|2020-08-21|   3126.66437|
|2020-08-22|   2934.32613|
|2020-08-23|   3024.99679|
|2020-08-24|   3145.74174|
|2020-08-25|   2933.78016|
|2020-08-26|   3259.07660|
|2020-08-27|   2786.45880|
|2020-08-28|   2954.24080|
|2020-08-29|   2950.57896|
|2020-08-30|   3213.29694|
|2020-08-31|   3147.11566|
|2020-09-01|   3019.00800|
|2020-09-02|   3204.99759|
|2020-09-03|   3028.44855|
|2020-09-04|   2922.87960|
|2020-09-05|   2839.33824|
|2020-09-06|   2942.80329|
|2020-09-07|   2906.42344|
|2020-09-08|   2866.14761|
|2020-09-09|   2888.65216|
|2020-09-10|   2958.74490|
|2020-09-11|   3021.66144|
|2020-09-12|   3027.38814|
|

In [7]:
import datetime
import pyspark.sql.functions as F

def get_avg_calories_burned_for_period(period=30, compare_to_today=True):
    spark_session = create_spark_session()
    end_date = datetime.date.today() - datetime.timedelta(days=1)  # exclude today's data
    start_date = end_date - datetime.timedelta(days=period)
    df = get_df_from_db(spark_session=spark_session, database='fitbit', schema='heart', table='daily')
    
    filtered_df = df.where(df.date.between(start_date, end_date))
    agg_df = filtered_df.groupBy(F.col('date')).agg(F.sum('calories'))
    avg_for_period = agg_df.select('sum(calories)').agg(F.avg('sum(calories)').alias('calories'))
    
    if compare_to_today:
        todays_sum_calories = df.where(df.date == datetime.date.today()).agg(F.sum('calories').alias('calories')).take(1)[0].calories
        avg_sum_calories = avg_for_period.take(1)[0].calories
        print('TODAY\'S BURNED CALORIES: {}'.format(todays_sum_calories))
        print('{} DAY AVG DAY\'S BURNED CALORIES: {}'.format(period, avg_sum_calories))
        print('\n')
        
        if todays_sum_calories < avg_sum_calories:
            tomorrow = datetime.datetime.now() + datetime.timedelta(1)
            midnight = datetime.datetime(year=tomorrow.year, month=tomorrow.month, day=tomorrow.day, hour=0, minute=0, second=0)
            time_to_midnight = ((midnight - datetime.datetime.now()).seconds / 60) / 60
            calorie_difference = avg_sum_calories - todays_sum_calories
            print("You've burned {} less calories today compared to your {} day average.".format(calorie_difference, period))
            print(
                "There's still {} hours left to go today, so you'll need to burn {} calories per hour, on average.".format(
                time_to_midnight, float(calorie_difference) / time_to_midnight)
            )
            print(f"You'll burn {83.3 * time_to_midnight} just by existing - so the other {float(calorie_difference) - (83.3 * time_to_midnight)} will need to come from exercise!")
        return

    return

get_avg_calories_burned_for_period()

TODAY'S BURNED CALORIES: 18.83448
30 DAY AVG DAY'S BURNED CALORIES: 3012.394556774


You've burned 2993.560076774 less calories today compared to your 30 day average.
There's still 23.537777777777777 hours left to go today, so you'll need to burn 127.18108332215824 calories per hour, on average.
You'll burn 1960.6968888888887 just by existing - so the other 1032.8631878851115 will need to come from exercise!
