In [None]:
import csv
import os
from dotenv import load_dotenv
import pandas as pd
import json
import numpy as np
from datetime import datetime


load_dotenv()

concatenated_db_filepath = os.environ['DB_CONCATENATED']
qr_code_secret_key = os.environ['BENEF_2024_QR_CODE_URL_SECRET']
qr_code_base_url = os.environ['BENEF_2024_QR_CODE_BASE_URL']

# new
pathfile_campaign_csv_b = os.environ['CAMPAIGN_CSV_B']
pathfile_campaign_csv_b_and_a = os.environ['CAMPAIGN_CSV_B_AND_A']
pathfile_campaign_csv_b_existings = os.environ['CAMPAIGN_CSV_B_EXISTINGS']
pathfile_campaign_csv_b_and_a_existings = os.environ['CAMPAIGN_CSV_B_AND_A_EXISTINGS']

In [None]:
df_db = pd.read_csv(concatenated_db_filepath, index_col=0, sep=',', usecols=['nom', 'prenom', 'genre', 'allocataire', 'id_psp', 'created_at', 'date_naissance'],  dtype={'created_at': 'str'})
df_db = df_db.reset_index()

In [None]:
# unwrap alloc
df_json_allocataire = pd.json_normalize(df_db['allocataire'].apply(json.loads))
df_json_allocataire = df_json_allocataire.add_prefix('allocataire-')
df_db.index = pd.RangeIndex(start=0, stop=len(df_db), step=1)
df_db_unwraped = pd.merge(df_db, df_json_allocataire, left_index=True, right_index=True)
df_db_unwraped = df_db_unwraped.drop(columns=['allocataire'])

In [None]:
# Remove when email or tel is None
mask_email_and_phone_empty = df_db_unwraped['allocataire-courriel'].apply(lambda x: pd.isna(x) or x == '') & df_db_unwraped['allocataire-telephone'].apply(lambda x: pd.isna(x) or x == '')
df_db_unwraped_reachable = df_db_unwraped[~mask_email_and_phone_empty]

print(f"{len(df_db_unwraped) - len(df_db_unwraped_reachable)} rows deleted because they are not reachable by email or phone")

In [None]:
# rename columns
column_mapping = {
    'allocataire-courriel': 'email',
    'allocataire-qualite': 'allocataire_qualite',
    'allocataire-nom': 'allocataire_nom',
    'allocataire-prenom': 'allocataire_prenom',
    'allocataire-telephone': 'telephone',
    'prenom': 'beneficiaire_prenom',
    'nom': 'beneficiaire_nom',
    'genre': 'beneficiaire_genre',
    'date_naissance': 'beneficiaire_date_naissance',
    'id_psp': 'code'
}

df_db_unwraped_reachable.columns = df_db_unwraped_reachable.columns.to_series().replace(column_mapping)

In [None]:
# only keep necessary columns
df_campaign = df_db_unwraped_reachable[['email',
'allocataire_nom',
'allocataire_prenom','beneficiaire_prenom', 'beneficiaire_nom', 'beneficiaire_genre', 'beneficiaire_date_naissance', 'code', 'telephone', 'created_at']]

In [None]:
# new format for birth date
df_campaign['beneficiaire_date_naissance'] = pd.to_datetime(df_campaign['beneficiaire_date_naissance'].apply(lambda v: v[:10]), format='%Y-%m-%d')
df_campaign['beneficiaire_date_naissance'] = df_campaign['beneficiaire_date_naissance'].dt.strftime('%d/%m/%Y')

In [None]:
# Ajout d'une colonne pour le sexe 
df_campaign['neele'] = 'Né le'
mask_girl = df_campaign['beneficiaire_genre'] == 'F'
df_campaign.loc[mask_girl, 'neele'] =  'Née le'

In [None]:
# capitalize on name / surname
df_campaign['allocataire_prenom'] = df_campaign['allocataire_prenom'].astype(str).apply(lambda x: x.capitalize())
df_campaign['allocataire_nom'] = df_campaign['allocataire_nom'].astype(str).apply(lambda x: x.capitalize())
df_campaign['beneficiaire_prenom'] = df_campaign['beneficiaire_prenom'].astype(str).apply(lambda x: x.capitalize())
df_campaign['beneficiaire_nom'] = df_campaign['beneficiaire_nom'].astype(str).apply(lambda x: x.capitalize())

In [None]:
# internationalize phone_number
df_campaign['telephone'] = df_campaign['telephone'].replace('^0', '+33', regex=True)

In [None]:
# Génération des URLs pour le QR code
import hmac
import hashlib
import urllib.parse
import base64

from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
from Crypto.Random import get_random_bytes

base_64_key = base64.b64decode(qr_code_secret_key)
key_mapping = { 'beneficiaire_prenom': 'bp', 'beneficiaire_nom': 'bn', 'beneficiaire_genre' : 'bg', 'beneficiaire_date_naissance': 'bdn', 'code': 'c'}

def encrypt(data):
    cipher = AES.new(base_64_key, AES.MODE_CBC)
    ct_bytes = cipher.encrypt(pad(data.encode('utf-8'), AES.block_size))
    iv = cipher.iv
    ct = base64.b64encode(iv + ct_bytes).decode('utf-8')
    return ct


def generate_encrypted_url_column(row):
    params = {key_mapping.get(column): row[column] for column in df_campaign.columns}
    cleaned_params = {k: v for k, v in params.items() if k is not None}
    encoded_params = urllib.parse.urlencode(cleaned_params)
    encoded_encrypted_params = encrypt(encoded_params)
    full_url_string = f"{qr_code_base_url}#{urllib.parse.quote_plus(encoded_encrypted_params)}"
    return full_url_string
    
if 'url_qr_code' in df_campaign:
    del df_campaign['url_qr_code']

df_campaign['url_qr_code'] = df_campaign.apply(generate_encrypted_url_column, axis=1)


In [None]:
# remove phone when email is filled
mask_has_email = ~df_campaign['email'].isna()
df_campaign.loc[mask_has_email, 'telephone'] = None

In [None]:
# TODO: exclude RGPD or deceased users

In [None]:
# separate already existing from new beneficiaries
import pytz

mask_existing_benef = pd.to_datetime(df_campaign['created_at'].str[:19]) < datetime(2024,7,15)
df_campaign_new = df_campaign[~mask_existing_benef]
df_campaign_existing = df_campaign[mask_existing_benef]

assert len(df_campaign_existing) + len(df_campaign_new) == len(df_campaign)

In [None]:
# existing rows, case allocataire != bénéficiaire
mask_alloc_diff_benef = df_campaign_existing['beneficiaire_prenom'].str.lower() != df_campaign_existing['allocataire_prenom'].str.lower()
df_campaign_existing_alloc_diff_benef = df_campaign_existing[mask_alloc_diff_benef]

In [None]:
# existing rows, case allocataire == bénéficiaire
mask_alloc_eq_benef = df_campaign_existing['beneficiaire_prenom'].str.lower() == df_campaign_existing['allocataire_prenom'].str.lower()
df_campaign_existing_alloc_eq_benef = df_campaign_existing[mask_alloc_eq_benef]

In [None]:
# new rows, case allocataire != bénéficiaire
mask_alloc_diff_benef = df_campaign_new['beneficiaire_prenom'].str.lower() != df_campaign_new['allocataire_prenom'].str.lower()
df_campaign_new_alloc_diff_benef = df_campaign_new[mask_alloc_diff_benef]

In [None]:
# new rows, case allocataire == bénéficiaire
mask_alloc_eq_benef = df_campaign_new['beneficiaire_prenom'].str.lower() == df_campaign_new['allocataire_prenom'].str.lower()
df_campaign_new_alloc_eq_benef = df_campaign_new[mask_alloc_eq_benef]

In [None]:
assert len(df_campaign_new_alloc_eq_benef) + len(df_campaign_new_alloc_diff_benef) + len(df_campaign_existing_alloc_eq_benef) + len(df_campaign_existing_alloc_diff_benef) == len(df_campaign)

# génération des csv
df_campaign_new_alloc_eq_benef.to_csv(pathfile_campaign_csv_b, index=False)
df_campaign_new_alloc_diff_benef.to_csv(pathfile_campaign_csv_b_and_a, index=False)
df_campaign_existing_alloc_eq_benef.to_csv(pathfile_campaign_csv_b_existings, index=False)
df_campaign_existing_alloc_diff_benef.to_csv(pathfile_campaign_csv_b_and_a_existings, index=False)