In [1]:
import pandas as pd
from datetime import datetime

In [2]:
df = pd.read_csv('PD 2023 Wk 1 Input.csv')
targets = pd.read_csv('Targets.csv')

In [3]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Transaction Code     365 non-null    object
 1   Value                365 non-null    int64 
 2   Customer Code        365 non-null    int64 
 3   Online or In-Person  365 non-null    int64 
 4   Transaction Date     365 non-null    object
dtypes: int64(3), object(2)
memory usage: 14.4+ KB


In [5]:
targets.head()

Unnamed: 0,Online or In-Person,Q1,Q2,Q3,Q4
0,Online,72500,70000,60000,60000
1,In-Person,75000,70000,70000,60000


In [6]:
df = df[df['Transaction Code'].apply(lambda x: x.startswith('DSB'))]

In [7]:
# Rename the values in the Online or In-person field, Online of the 1 values and In-Person for the 2 values
df['Online or In-Person'] = df['Online or In-Person'].apply(lambda x: 'Online' if x==1 else 'In-Person')

In [8]:
pd.to_datetime(df['Transaction Date'])

2     2023-07-14
4     2023-08-26
5     2023-05-27
11    2023-12-05
12    2023-04-15
         ...    
350   2023-10-03
351   2023-09-08
353   2023-04-02
354   2023-06-28
359   2023-11-25
Name: Transaction Date, Length: 102, dtype: datetime64[ns]

In [9]:
# Change the date to be the quarter
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], dayfirst=True)
df['Quarter'] = df['Transaction Date'].apply(lambda x: x.quarter)
df.drop('Transaction Date',axis=1, inplace=True)

In [10]:
df_1 = df.groupby(by=['Quarter','Online or In-Person']).sum()
df_1
df_1 = df_1.reset_index()

In [11]:
# Pivot the quarterly targets so we have a row for each Type of Transaction and each Quarter and Rename the fields
targets = targets.melt(id_vars=["Online or In-Person"], 
        var_name="Quarter", 
        value_name="Quarterly Target")

In [12]:
targets

Unnamed: 0,Online or In-Person,Quarter,Quarterly Target
0,Online,Q1,72500
1,In-Person,Q1,75000
2,Online,Q2,70000
3,In-Person,Q2,70000
4,Online,Q3,60000
5,In-Person,Q3,70000
6,Online,Q4,60000
7,In-Person,Q4,60000


In [13]:
# Remove the 'Q' from the quarter field and make the data type numeric
targets['Quarter'] = targets['Quarter'].apply(lambda x: int(x.replace('Q','')))

In [14]:
targets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Online or In-Person  8 non-null      object
 1   Quarter              8 non-null      int64 
 2   Quarterly Target     8 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 320.0+ bytes


In [15]:
# Join the two datasets together
output = df_1.merge(targets, how='inner', on=['Quarter','Online or In-Person'])

In [16]:
# Calculate the Variance to Target for each row
output['Variance to Target'] = output['Value'] - output['Quarterly Target']

In [17]:
output

Unnamed: 0,Quarter,Online or In-Person,Value,Customer Code,Quarterly Target,Variance to Target
0,1,In-Person,77576,1300032,75000,2576
1,1,Online,74562,1500075,72500,2062
2,2,In-Person,70634,1300061,70000,634
3,2,Online,69325,1600089,70000,-675
4,3,In-Person,74189,1400069,70000,4189
5,3,Online,59072,1200067,60000,-928
6,4,In-Person,43223,1000062,60000,-16777
7,4,Online,61908,900070,60000,1908
