In [122]:
# Importing libraries
import pandas as pd
import numpy as np
import openpyxl
from openpyxl import load_workbook
from datetime import datetime
import os
import re

## Step 1: Data Loading

In [123]:
# This code will be used in order to hide sensitive information according to the GDPR
file = 'val.xlsx'
df1 = pd.read_excel(file)
# Accessing the wanted information
seller = df1.at[0, 'Unnamed: 1']

# Substitute the middle part of the CPF with asterisks
seller = re.sub(r'(\d{3})\.\d{3}\.\d{3}-(\d{2})', r'\1.***.***-\2', seller)
df1.at[0, 'Unnamed: 1'] = seller
# Assign the modified seller back to the dataframe
df1['SELLER'] = seller
df1.head(15)

Unnamed: 0,RELATÓRIO DE COMISSÕES,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,SELLER
0,Parceiro:,VALERIA MENDES (172.***.***-31),,,,,VALERIA MENDES (172.***.***-31)
1,Comissão:,10% (De 03/04/2023 até 15/04/2024),,,,,VALERIA MENDES (172.***.***-31)
2,Contato:,valeria@maxbot.com.br,,,,,VALERIA MENDES (172.***.***-31)
3,Período:,De 01/01/2018 até 30/06/2024,,,,,VALERIA MENDES (172.***.***-31)
4,Total:,R$ 18738.36,,,,,VALERIA MENDES (172.***.***-31)
5,DATA PAGTO,,CLIENTE,PRODUTO,VALOR PAGO,VALOR COMISSÃO,VALERIA MENDES (172.***.***-31)
6,,,,,,,VALERIA MENDES (172.***.***-31)
7,13/01/2022,eliasabdo,ELIAS ABDO,ATD,85.6,0,VALERIA MENDES (172.***.***-31)
8,13/01/2022,eliasabdo,ELIAS ABDO,ATD,80,0,VALERIA MENDES (172.***.***-31)
9,05/02/2022,eliasabdo,ELIAS ABDO,ATD,207,0,VALERIA MENDES (172.***.***-31)


In [124]:
#df1.info()
#df1.summary()

## Step 2: Data Transformation

In [125]:
# Creating a new column with the seller's name information
seller = df1.at[0, 'Unnamed: 1'] #accessing the wanted information
df1['SELLER'] = seller

#Creating a new column with the evaluated period
period = df1.at[3, 'Unnamed: 1']
df1['PERIOD'] = period

#utilizing the correct line as column names
df1.columns = df1.iloc[5]
df1.drop(df1.index[:7], inplace = True)

#Deleting a useless column 'VALOR COMISSÃO'
df1.drop('VALOR COMISSÃO', axis=1, inplace = True)

#renaming the second column as USERNAME instead of NaN
df1 = df1.rename(columns={np.nan: 'USERNAME'})
#renaming the index column


#reseting the index column
df1.reset_index(drop = True, inplace = True)
#Removing index name
df1 = df1.rename_axis(None, axis=1)

In [126]:
#Converting the data type of the 'DATA PAGTO' column to a date.

#Joining the two following steps into one.
#df['DATA PAGTO'] = pd.to_datetime(df['DATA PAGTO'], format='%d/%m/%Y')
#df['DATA PAGTO'] = df['DATA PAGTO'].dt.strftime('%Y/%m')

df1['DATA PAGTO'] = pd.to_datetime(df1['DATA PAGTO'], format='%d/%m/%Y').dt.strftime('%Y/%m')
df1.head()

Unnamed: 0,DATA PAGTO,USERNAME,CLIENTE,PRODUTO,VALOR PAGO,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024
0,2022/01,eliasabdo,ELIAS ABDO,ATD,85.6,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024
1,2022/01,eliasabdo,ELIAS ABDO,ATD,80.0,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024
2,2022/02,eliasabdo,ELIAS ABDO,ATD,207.0,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024
3,2022/03,eliasabdo,ELIAS ABDO,ATD,207.0,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024
4,2023/05,pedrinele,ANDERSON PEDRINELE,ATD,27.13,VALERIA MENDES (172.***.***-31),De 01/01/2018 até 30/06/2024


In [127]:
#Obtaining the "MRR Agenda" by client
table1 = pd.pivot_table(df1, values = 'VALOR PAGO', index = 'USERNAME', columns = 'DATA PAGTO', aggfunc= 'sum', fill_value = 0)
table1

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
12outubro,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,4090.8,0.00,0.0,0.00,0.00
admcesfa,0.0,0,0,0.0,0.0,0.0,0.0,0.0,4594.8,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.00
admglennda,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,245.32,0.0,0.0,0.00,0.0,0.00,0.00
adminbert,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,816.9,487.00,487.0,512.28,523.21
admmaster,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7173.00,0.0,0.0,0.00,0.0,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
upcred,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,751.8,771.84,751.8,790.82,1476.80
vania,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,4090.80,0.00
villasdistribuidora,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00
villereal,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00


In [128]:
#Adding a column to obtain the number of months each client has made a payment
table1['NUM_PAYMENTS'] = (table1>0).sum(axis=1)
table1

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06,NUM_PAYMENTS
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
12outubro,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,4090.8,0.00,0.0,0.00,0.00,1
admcesfa,0.0,0,0,0.0,0.0,0.0,0.0,0.0,4594.8,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.00,1
admglennda,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,245.32,0.0,0.0,0.00,0.0,0.00,0.00,1
adminbert,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,816.9,487.00,487.0,512.28,523.21,5
admmaster,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7173.00,0.0,0.0,0.00,0.0,0.00,0.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
upcred,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,751.8,771.84,751.8,790.82,1476.80,5
vania,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,4090.80,0.00,1
villasdistribuidora,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00,1
villereal,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00,1


In [129]:
#A function to get a list with X months prior to current month.
def last_months(x):
    now = pd.Timestamp(datetime.now())
    dates = []
    for i in range (x):
        date = now - pd.DateOffset(months=i+1)
        year_month = date.strftime('%Y/%m')
        dates.append(year_month)
    return dates

In [130]:
#Just checking if our function gets the results we need
last_month = (last_months(1))
last_3_months = last_months(3)
type((last_month))
print(last_3_months)

['2024/06', '2024/05', '2024/04']


In [131]:
#Creating a column that gives us the number of payments in the last three months.
#This is important so we do not get fooled by clients that buys 6 or 12 months for example.
table1['NUM_PAYMENTS_LAST3_MON'] = table1.loc[:,last_3_months].gt(0).sum(axis=1)

In [132]:
table1.sort_values(by='NUM_PAYMENTS_LAST3_MON', ascending=False).head(300)

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06,NUM_PAYMENTS,NUM_PAYMENTS_LAST3_MON
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
miguelunica1,0.0,0,0,0.0,0.0,0.0,0.00,0.0,427.84,576.65,576.65,576.65,971.20,971.20,971.20,971.20,971.20,9,3
annehage,0.0,0,0,0.0,0.0,0.0,547.00,547.0,547.00,547.00,547.00,547.00,547.00,574.10,625.31,657.47,711.66,11,3
rmaximo1,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.00,612.33,365.04,365.04,365.04,381.81,415.04,504.14,468.14,8,3
docladigital,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,577.00,577.00,577.00,577.00,4,3
precocerto,0.0,0,0,0.0,0.0,0.0,407.00,407.0,407.00,407.00,407.00,407.00,407.00,407.00,407.00,428.12,428.12,11,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
infinityinvest,0.0,0,0,0.0,0.0,0.0,94.26,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1,0
jaqueline.almeida,0.0,0,0,5579.4,0.0,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1,0
juliano.tp,0.0,0,0,6499.2,0.0,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1,0
admcesfa,0.0,0,0,0.0,0.0,0.0,0.00,0.0,4594.80,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1,0


In [133]:
table1

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06,NUM_PAYMENTS,NUM_PAYMENTS_LAST3_MON
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
12outubro,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,4090.8,0.00,0.0,0.00,0.00,1,0
admcesfa,0.0,0,0,0.0,0.0,0.0,0.0,0.0,4594.8,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.00,1,0
admglennda,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,245.32,0.0,0.0,0.00,0.0,0.00,0.00,1,0
adminbert,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,816.9,487.00,487.0,512.28,523.21,5,3
admmaster,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7173.00,0.0,0.0,0.00,0.0,0.00,0.00,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
upcred,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,751.8,771.84,751.8,790.82,1476.80,5,3
vania,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,4090.80,0.00,1,1
villasdistribuidora,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00,1,0
villereal,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,5251.2,0.0,0.00,0.0,0.00,0.00,1,0


In [134]:
table1[(table1[last_month[0]]>0) & (table1['NUM_PAYMENTS'] <=3) & (table1['NUM_PAYMENTS_LAST3_MON'] <=3)]

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06,NUM_PAYMENTS,NUM_PAYMENTS_LAST3_MON
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
closerscompany,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,546.67,0.0,400.0,2,2
dwa.ti,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3670.8,1,1
mayaraxavier,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1654.83,1169.7,2,2
mayaraxavier2109,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1977.35,1420.35,2,2
opcaovirtual,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4090.8,1,1
paulaklima,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,547.0,557.0,557.0,3,3
sartoriadmin,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4678.8,4561.92,2,2
sigapregao,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,594.13,501.3,501.3,3,3
unirittercanoas,0.0,0,0,0.0,4967.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4139.5,2,1


In [135]:
dict_map = {True: 'Elegible', False:'Ineligible'}
table1['COMISSION'] = ((table1['NUM_PAYMENTS'] <=3 ) & (table1['NUM_PAYMENTS'] == table1['NUM_PAYMENTS_LAST3_MON']) & (table1[last_month[0]] > 0)).map(dict_map)

In [136]:
table1 = table1.sort_values(by='COMISSION')
table1

DATA PAGTO,2022/01,2022/02,2022/03,2023/05,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,2024/04,2024/05,2024/06,NUM_PAYMENTS,NUM_PAYMENTS_LAST3_MON,COMISSION
USERNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
sigapregao,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,594.13,501.3,501.30,3,3,Elegible
opcaovirtual,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,4090.80,1,1,Elegible
closerscompany,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,546.67,0.0,400.00,2,2,Elegible
sartoriadmin,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,4678.8,4561.92,2,2,Elegible
paulaklima,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,547.00,557.0,557.00,3,3,Elegible
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
inecel,0.0,0,0,0.0,6694.8,231.0,420.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.00,3,0,Ineligible
infinityinvest,0.0,0,0,0.0,0.0,0.0,94.26,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.00,1,0,Ineligible
jaqueline.almeida,0.0,0,0,5579.4,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.00,1,0,Ineligible
ecomerce,0.0,0,0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,439.87,4675.2,0.0,0.00,0.0,0.00,2,0,Ineligible


# Comission Due

In [137]:
Comission_Value = (table1.loc[table1['COMISSION'] == 'Elegible', last_month[0]].sum())/10
Comission_Value

1637.1870000000001

## Saving the table of interest in the excel file

In [138]:
with pd.ExcelWriter(file, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'replace') as writer:
    table1.to_excel(writer, sheet_name='DadosComissoes')