### General import

In [1]:
# IMPORTS

import pandas as pd
import numpy as np
import json
from datetime import datetime

### Export poids (weight.json)

In [2]:
# Define Constants
WEIGHT_DF_COLUMN_DATETIME='date'
WEIGHT_DF_COLUMN_WEIGHT='poids'
WEIGHT_FILE_PATH='../data/poids.csv'

JSON_FIELD_DATE='dates'
JSON_FIELD_WEIGHT='values'
JSON_FILE_PATH='../docs/data/weight.json'

In [3]:
# Loading weight data
df_weight=pd.read_csv(WEIGHT_FILE_PATH, sep=';', parse_dates=[WEIGHT_DF_COLUMN_DATETIME])

In [4]:
# Conversion Poids
df_json=pd.DataFrame()
df_json[JSON_FIELD_DATE]=df_weight[[WEIGHT_DF_COLUMN_DATETIME]].apply(lambda x : x.iloc[0].strftime("%Y-%m-%d"), axis=1)
df_json[JSON_FIELD_WEIGHT]=df_weight[[WEIGHT_DF_COLUMN_WEIGHT]]
# replace Nan with None, so it will be replaced by null in json
df_json = df_json.replace(np.nan, None)
#df_json

In [5]:
# OUTPUT 
data_object = {
  JSON_FIELD_DATE: df_json[JSON_FIELD_DATE].to_list(),
  JSON_FIELD_WEIGHT: df_json[JSON_FIELD_WEIGHT].to_list(),
}

with open(JSON_FILE_PATH, 'w', encoding='utf-8') as f:
    json.dump(data_object, f, indent=4)

# Print
# data_json = json.dumps(data_object)
# print(data_json)

### Export propreté

In [6]:
# Define Constants
POOP_DF_COLUMN_DATE='date'
POOP_DF_COLUMN_DATETIME='datetime'
POOP_DF_COLUMN_HOUR='heure'
POOP_DF_COLUMN_DATETIME_OF_DAY='datetime_ofday'
POOP_DF_COLUMN_NATURE='nature'
POOP_DF_VALUE_NATURE_POOP='caca'
POOP_DF_COLUMN_LOCATION='lieu'
POOP_DF_VALUE_LOCATION_IN='in'

In [7]:
# Loading poop data
# data consolidation: Computing poop date and hour of day as datetime
df_poop=pd.read_csv('../data/poop.csv',sep=';')
df_poop[POOP_DF_COLUMN_DATETIME]=df_poop[[POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_HOUR]].apply(lambda x : datetime.strptime('{}T{}'.format(x.iloc[0],x.iloc[1]), "%Y-%m-%dT%H:%M"), axis=1)
_today=datetime.now().strftime("%Y-%m-%d")
df_poop[POOP_DF_COLUMN_DATETIME_OF_DAY]=df_poop[[POOP_DF_COLUMN_HOUR]].apply(lambda x : datetime.strptime('{}T{}'.format(_today,x.iloc[0]), "%Y-%m-%dT%H:%M"), axis=1)

In [8]:
# filtering during the day data
_start_of_day=datetime.now().replace(hour=6, minute=30, second=0)
_end_of_day=datetime.now().replace(hour=21, minute=0, second=0)

df_poop_day=df_poop[df_poop.apply(lambda x: (x[POOP_DF_COLUMN_DATETIME_OF_DAY] > _start_of_day) & (x[POOP_DF_COLUMN_DATETIME_OF_DAY] < _end_of_day), axis=1)]
df_poop_day_efficiency=df_poop_day.groupby(POOP_DF_COLUMN_DATE).size().to_frame('total').reset_index()
df_poop_day_efficiency['success']=df_poop_day.loc[df_poop_day[POOP_DF_COLUMN_LOCATION] != POOP_DF_VALUE_LOCATION_IN].groupby(POOP_DF_COLUMN_DATE).size().to_frame('total').reset_index()['total']
df_poop_day_efficiency['success_rate']=round(df_poop_day_efficiency['success'] * 100 / df_poop_day_efficiency['total'], 1)

In [9]:
# to Json
# OUTPUT 
data_object = {
  JSON_FIELD_DATE: df_poop_day_efficiency[POOP_DF_COLUMN_DATE].to_list(),
  JSON_FIELD_WEIGHT: df_poop_day_efficiency['success_rate'].to_list(),
}


# Print
data_json = json.dumps(data_object)
print(data_json)

{"dates": ["2024-04-04", "2024-04-05", "2024-04-06", "2024-04-07", "2024-04-08", "2024-04-09", "2024-04-10", "2024-04-11", "2024-04-12"], "values": [75.0, 88.9, 100.0, 91.7, 76.5, 100.0, 63.6, 76.9, 92.9]}
