### General import

In [76]:
# IMPORTS

import pandas as pd
import numpy as np
import json
from datetime import datetime

### Export poids (weight.json)

In [77]:
# Define Constants
WEIGHT_DF_COLUMN_DATETIME='date'
WEIGHT_DF_COLUMN_WEIGHT='poids'
WEIGHT_FILE_PATH='../data/poids.csv'

WEIGHT_JSON_FIELD_DATE='dates'
WEIGHT_JSON_FIELD_WEIGHT='values'
WEIGHT_JSON_FILE_PATH='../docs/data/weight.json'

In [78]:
# Loading weight data
df_weight=pd.read_csv(WEIGHT_FILE_PATH, sep=';', parse_dates=[WEIGHT_DF_COLUMN_DATETIME])

In [79]:
# Conversion Poids
df_json=pd.DataFrame()
df_json[WEIGHT_JSON_FIELD_DATE]=df_weight[[WEIGHT_DF_COLUMN_DATETIME]].apply(lambda x : x.iloc[0].strftime("%Y-%m-%d"), axis=1)
df_json[WEIGHT_JSON_FIELD_WEIGHT]=df_weight[[WEIGHT_DF_COLUMN_WEIGHT]]
# replace Nan with None, so it will be replaced by null in json
df_json = df_json.replace(np.nan, None)
#df_json

In [80]:
# OUTPUT 
weight_data_object = {
  WEIGHT_JSON_FIELD_DATE: df_json[WEIGHT_JSON_FIELD_DATE].to_list(),
  WEIGHT_JSON_FIELD_WEIGHT: df_json[WEIGHT_JSON_FIELD_WEIGHT].to_list(),
}

with open(WEIGHT_JSON_FILE_PATH, 'w', encoding='utf-8') as f:
    json.dump(weight_data_object, f, indent=4)

# Print
# data_json = json.dumps(weight_data_object)
# print(data_json)

### Export propreté

In [81]:
# Define Constants
POOP_DF_COLUMN_DATE='date'
POOP_DF_COLUMN_DATETIME='datetime'
POOP_DF_COLUMN_HOUR='heure'
POOP_DF_COLUMN_DATETIME_OF_DAY='datetime_ofday'
POOP_DF_COLUMN_NATURE='nature'
POOP_DF_VALUE_NATURE_POOP='caca'
POOP_DF_COLUMN_LOCATION='lieu'
POOP_DF_VALUE_LOCATION_IN='in'

POOP_JSON_FIELD_DATE='dates'
POOP_JSON_FIELD_DAY_SUCCESS_RATE='day_success_rate'
POOP_JSON_FIELD_DAY_NB_FAILURE='day_nb_failure'
POOP_JSON_FIELD_NIGHT_NB_FAILURE='night_nb_failure'
POOP_JSON_FILE_PATH='../docs/data/poop.json'

In [82]:
# Loading poop data
# data consolidation: Computing poop date and hour of day as datetime
df_poop=pd.read_csv('../data/poop.csv',sep=';')
df_poop[POOP_DF_COLUMN_DATETIME]=df_poop[[POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_HOUR]].apply(lambda x : datetime.strptime('{}T{}'.format(x.iloc[0],x.iloc[1]), "%Y-%m-%dT%H:%M"), axis=1)
_today=datetime.now().strftime("%Y-%m-%d")
df_poop[POOP_DF_COLUMN_DATETIME_OF_DAY]=df_poop[[POOP_DF_COLUMN_HOUR]].apply(lambda x : datetime.strptime('{}T{}'.format(_today,x.iloc[0]), "%Y-%m-%dT%H:%M"), axis=1)

In [83]:
# filtering during the day data
_start_of_day=datetime.now().replace(hour=6, minute=30, second=0)
_end_of_day=datetime.now().replace(hour=21, minute=0, second=0)

df_poop_day=df_poop[df_poop.apply(lambda x: (x[POOP_DF_COLUMN_DATETIME_OF_DAY] > _start_of_day) & (x[POOP_DF_COLUMN_DATETIME_OF_DAY] < _end_of_day), axis=1)]
df_poop_day_efficiency=df_poop_day.groupby(POOP_DF_COLUMN_DATE).size().to_frame('total').reset_index()
# formule compliquée mais qui permet de compter les jours à zero
df_poop_day_efficiency['success']=df_poop_day.groupby([POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_LOCATION]).size().to_frame('success').unstack(fill_value=0).stack()\
    .query(POOP_DF_COLUMN_LOCATION + ' !=  "' + POOP_DF_VALUE_LOCATION_IN + '"')\
    .groupby(POOP_DF_COLUMN_DATE).sum().reset_index()['success']
df_poop_day_efficiency[POOP_JSON_FIELD_DAY_SUCCESS_RATE]=round(df_poop_day_efficiency['success'] * 100 / df_poop_day_efficiency['total'], 1)
df_poop_day_efficiency[POOP_JSON_FIELD_DAY_NB_FAILURE]=df_poop_day_efficiency['total'] - df_poop_day_efficiency['success']

df_poop_night=df_poop[df_poop.apply(lambda x: (x[POOP_DF_COLUMN_DATETIME_OF_DAY] <= _start_of_day) | (x[POOP_DF_COLUMN_DATETIME_OF_DAY] >= _end_of_day), axis=1)]
df_poop_night_efficiency=df_poop_night.groupby(POOP_DF_COLUMN_DATE).size().to_frame('total').reset_index()
# formule compliquée mais qui permet de compter les jours à zero
df_poop_night_efficiency[POOP_JSON_FIELD_NIGHT_NB_FAILURE]=df_poop_night.groupby([POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_LOCATION]).size().to_frame(POOP_JSON_FIELD_NIGHT_NB_FAILURE).unstack(fill_value=0).stack()\
    .query(POOP_DF_COLUMN_LOCATION + ' ==  "' + POOP_DF_VALUE_LOCATION_IN + '"')\
    .reset_index()[POOP_JSON_FIELD_NIGHT_NB_FAILURE]


  df_poop_day_efficiency['success']=df_poop_day.groupby([POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_LOCATION]).size().to_frame('success').unstack(fill_value=0).stack()\
  df_poop_night_efficiency[POOP_JSON_FIELD_NIGHT_NB_FAILURE]=df_poop_night.groupby([POOP_DF_COLUMN_DATE, POOP_DF_COLUMN_LOCATION]).size().to_frame(POOP_JSON_FIELD_NIGHT_NB_FAILURE).unstack(fill_value=0).stack()\


In [84]:
# to Json
# OUTPUT 
poop_data_object = {
  POOP_JSON_FIELD_DATE: df_poop_day_efficiency[POOP_DF_COLUMN_DATE].to_list(),
  POOP_JSON_FIELD_DAY_SUCCESS_RATE: df_poop_day_efficiency[POOP_JSON_FIELD_DAY_SUCCESS_RATE].to_list(),
  POOP_JSON_FIELD_DAY_NB_FAILURE: df_poop_day_efficiency[POOP_JSON_FIELD_DAY_NB_FAILURE].to_list(),
  POOP_JSON_FIELD_NIGHT_NB_FAILURE: df_poop_night_efficiency[POOP_JSON_FIELD_NIGHT_NB_FAILURE].to_list(),
}

with open(POOP_JSON_FILE_PATH, 'w', encoding='utf-8') as f:
    json.dump(poop_data_object, f, indent=4)

# Print
#data_json = json.dumps(data_object)
#print(data_json)