<a href="https://colab.research.google.com/github/micheldion/colab-dev01/blob/main/depenses.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
from google.colab import drive
import pandas as pd 
import math
import glob

drive.mount('/content/gdrive')

TPS = 0.05
TVQ = 0.09975
FISCAL_MONTH_INDEX = 4

def fiscalMonthClosure(index):
   def c(month):
      return month -index if month > index else (12-index) + month
   return c

currentFiscalMonth = fiscalMonthClosure(FISCAL_MONTH_INDEX)

def fiscalYear(date):
  return str(date.year)+"-"+str(date.year+1)  if date.month > 4 else str(date.year  -1)+"-"+str(date.year)

def round_half_up(n):
  multiplier = 10 ** 2
  return math.floor(n*multiplier + 0.5) / multiplier

def computeTPS (subTotal):
  return round_half_up(subTotal * TPS)

def computeTVQ (subTotal):
  return round_half_up(subTotal * TVQ)  

def taxePeriod(month):
    if   month in [11,12,1]:
      return 1
    elif month in [2,3,4]:
      return 2
    elif month in [5,6,7]:
      return 3
    elif month in [8,9,10]:
      return 4
    else:
      raise Exception("Not in range "+str(month))


def process(glob, folder):
  data_files = glob.glob(folder)
  V = [i.split("/")[-1].split(".pdf")[0] for i in data_files]
  print(V)
  df = pd.DataFrame(V) 
  df[['Date','SubTotal','TPS','TVQ','Desc']] = df[0].str.split("_",expand=True,)
  df.Date = pd.to_datetime(df.Date, errors="coerce")
  df.SubTotal = pd.to_numeric(df.SubTotal, errors="coerce")
  df.TPS = pd.to_numeric(df.TPS, errors="coerce")
  df.TVQ = pd.to_numeric(df.TVQ, errors="coerce")
  
  df['Day'] =  df['Date'].dt.day
  df['Month'] =  df['Date'].dt.month
  
  df['Total'] = df.SubTotal + df.TPS + df.TVQ
  df['ComputedTPS'] = df.SubTotal.map(computeTPS)
  df['ComputedTVQ'] = df.SubTotal.map(computeTVQ)
  df['AssertTPS'] = (df.ComputedTPS == df.TPS) | (df.TPS == 0)
  df['AssertTVQ'] = (df.ComputedTVQ == df.TVQ) | (df.TVQ == 0)
  df['OK'] = (df.AssertTPS) & (df.AssertTVQ)
  df["FiscalMonth"] = df.Month.map(currentFiscalMonth)
  df['FiscalYear'] = df.Date.map(fiscalYear)
  df['TaxePeriod'] = df.Date.dt.month.map(taxePeriod)

  df2 = df[['Date','FiscalYear', 'FiscalMonth', 'TaxePeriod','Day','Desc','SubTotal','TPS','TVQ','Total']]
  return df2.set_index(df2.Date).sort_index(),df


def totalByMonth(glob, folder):
  t = process(glob, folder)[0]
  t = t.groupby(pd.Grouper(freq='M'))['Total','TPS','TVQ'].sum() 
  t['year'] = t.index.year
  t['Month'] = t.index.month
 
  t.index =t.index.month.map(currentFiscalMonth)
  t.index.names = ['FiscalMonth']
  return t

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
a = process(glob, "/content/gdrive/My Drive/datazentrik/déboursés/2*/*.*")[0]
a
a[a['FiscalMonth']==7]

In [None]:
a.SubTotal.sort_values()

In [None]:
a = totalByMonth(glob, "/content/gdrive/My Drive/datazentrik/déboursés/2*/*.*")
a

In [None]:
#|mai, juin,juillet|aoutsept,oct|nov dec,janvier|fev,mars,avri`

t = process(glob, "/content/gdrive/My Drive/datazentrik/déboursés/2*/*.*")[0]

#t = t.groupby("TaxePeriod")['Total','TPS','TVQ'].sum() 
t