In [612]:
# Import libraries
import pandas as pd
import re
import json
import sys
sys.path.append("..")
from utils.filesystem import *
from utils.salesforce import *

In [613]:
# Config file path
config_file = '../config.json'

# Load the config file
with open(config_file) as f:
    config = json.load(f)

# Load the basic config parameters
username = config['salesforce_username']
password = config['salesforce_password']
security_token = config["salesforce_token"]

In [614]:
# Create a SalesForceAPI object
sf_api = SalesForceAPI(username, password, security_token)

# Authenticate and export contacts to CSV
if sf_api.authenticate():
    print("Authentication successful")

    # Export contacts to CSV
    file_path = '../data/raw/contacts_export.csv'
    sf_api.export_basic_contact_info_to_csv(file_path)
    print(f"Contacts exported to {file_path}")
else:
    print("Authentication failed")

Authentication successful
Exported 897 contacts to ../data/raw/contacts_export.csv
Contacts exported to ../data/raw/contacts_export.csv


In [615]:
# Load transaction and contact data from CSV files
contacts_df = pd.read_csv('../data/raw/contacts_export.csv', delimiter=',')

print(len(contacts_df))


897


In [616]:
# Filter contacts where FirstName is equal to LastName
same_name_contacts = contacts_df[contacts_df['FirstName'] == contacts_df['LastName']]

# Remove FirstName for these contacts
contacts_df.loc[same_name_contacts.index, 'FirstName'] = ""

In [617]:
# Remove the strings 'SLL', 'SL', 'S.L.', 'S.L.L.', 's.a', 'S.C.A.', 's.l.l', 'S.C.A.', 'S.A.', 'S.L.L', 'S.L' from the LastName column
# Apply the cleanup only to rows where 'ContactType__c' is "Donante empresa"
mask = contacts_df['ContactType__c'] == "Donante empresa"
contacts_df.loc[mask, 'LastName'] = contacts_df.loc[mask, 'LastName'].str.replace(r"\b,?\s?S\.?A\.?\b", "", regex=True)
contacts_df.loc[mask, 'LastName'] = contacts_df.loc[mask, 'LastName'].str.replace(r"\b,?\s?S\.?L\.?\b", "", regex=True)
contacts_df.loc[mask, 'LastName'] = contacts_df.loc[mask, 'LastName'].str.replace(r"\b,?\s?SLL\b", "", regex=True)
contacts_df.loc[mask, 'LastName'] = contacts_df.loc[mask, 'LastName'].str.replace(r"\b,?\s?SL\b", "", regex=True)

# Do the same for the FirstName column
contacts_df.loc[mask, 'FirstName'] = contacts_df.loc[mask, 'FirstName'].str.replace(r"\b,?\s?S\.?A\.?\b", "", regex=True)
contacts_df.loc[mask, 'FirstName'] = contacts_df.loc[mask, 'FirstName'].str.replace(r"\b,?\s?S\.?L\.?\b", "", regex=True)
contacts_df.loc[mask, 'FirstName'] = contacts_df.loc[mask, 'FirstName'].str.replace(r"\b,?\s?SLL\b", "", regex=True)
contacts_df.loc[mask, 'FirstName'] = contacts_df.loc[mask, 'FirstName'].str.replace(r"\b,?\s?SL\b", "", regex=True)



In [618]:
# Function to convert names to proper case, handling exceptions for "De" and "La"
def proper_case(name):
    if pd.isnull(name) or name == "":
        return ""
    words = name.split()
    exceptions = {"de", "la", "del", "las", "los", "y"}
    # Capitalize each word unless it is in the exceptions set
    words = [word.lower() if word.lower() in exceptions else word.capitalize() for word in words]
    # Join the words back into a single string
    return ' '.join(words)

# Apply proper case transformation
contacts_df['FirstName'] = contacts_df['FirstName'].apply(proper_case)
contacts_df['MiddleName'] = contacts_df['MiddleName'].apply(proper_case)
contacts_df['LastName'] = contacts_df['LastName'].apply(proper_case)

# Add a new column 'baja' with default value False
contacts_df['baja'] = False

# Define a function to update ContactCategory__c and baja based on ContactType__c
def update_contact(row):
    if row['ContactType__c'] == 'Baja':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
        row['baja'] = True
    elif row['ContactType__c'] == 'Socio ordinario':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
    elif row['ContactType__c'] == 'Socio colaborador':
        row['ContactCategory__c'] = 'a1kMI000002OzsPYAS'
    elif row['ContactType__c'] == 'Donante particular':
        row['ContactCategory__c'] = 'a1kMI000002OzvdYAC'
    elif row['ContactType__c'] == 'Donante empresa':
        row['ContactCategory__c'] = 'a1kMI000002Ozu1YAC'
    elif row['ContactType__c'] == 'No socio':
        row['ContactCategory__c'] = 'a1kMI000002P0YLYA0'
    elif row['ContactType__c'] == 'Pendiente documentación':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
    return row

# Apply the update_contact function to each row in the DataFrame
contacts_df = contacts_df.apply(update_contact, axis=1)


In [619]:
# Trim leading and trailing whitespaces from all name columns
contacts_df['FirstName'] = contacts_df['FirstName'].str.strip()
contacts_df['MiddleName'] = contacts_df['MiddleName'].str.strip()
contacts_df['LastName'] = contacts_df['LastName'].str.strip()

# Remove "." from the LastName column, when it's the last character
contacts_df['LastName'] = contacts_df['LastName'].str.replace(r'\.$', '', regex=True)

In [620]:
# Filter rows where LastName and MiddleName are present, but FirstName is not
filtered_result_df =  contacts_df[(contacts_df['LastName'].notnull()) & (contacts_df['LastName'] != "") &
                               (contacts_df['MiddleName'].notnull()) & (contacts_df['MiddleName'] != "") &
                               ((contacts_df['FirstName'].isnull()) | (contacts_df['FirstName'] == ""))]

# Display the filtered DataFrame
print(len(filtered_result_df))

0


In [621]:
# Filter rows where ContactType__c is "Donante empresa" and either FirstName, MiddleName, or LastName is missing
donante_empresa_missing_names_df = contacts_df[(contacts_df['ContactType__c'] == 'Donante empresa') & 
                                               ((contacts_df['FirstName'].isnull()) | (contacts_df['FirstName'] == "") |
                                                (contacts_df['MiddleName'].isnull()) | (contacts_df['MiddleName'] == "") |
                                                (contacts_df['LastName'].isnull()) | (contacts_df['LastName'] == ""))]

# Display the filtered DataFrame
print(len(donante_empresa_missing_names_df))


36


In [622]:
# Filter rows where both FirstName and MiddleName are missing
missing_first_and_middle_name_df =  contacts_df[(( contacts_df['FirstName'].isnull()) | ( contacts_df['FirstName'] == "")) &
                                             (( contacts_df['MiddleName'].isnull()) | ( contacts_df['MiddleName'] == ""))]

# Count the number of rows in the filtered DataFrame
print(len(missing_first_and_middle_name_df))


46


In [623]:
# Filter rows where LastName is missing
missing_last_name_df =  contacts_df[( contacts_df['LastName'].isnull()) | ( contacts_df['LastName'] == "")]
# Count the number of rows in the filtered DataFrame
print(len(missing_last_name_df))

print(missing_last_name_df)

0
Empty DataFrame
Columns: [attributes, Id, ContactType__c, ContactCategory__c, FirstName, MiddleName, LastName, Name, Email, baja]
Index: []


In [624]:
# Keep only the specified columns
result_df = contacts_df[['Id', 'ContactCategory__c', 'FirstName', 'MiddleName', 'LastName', 'baja']]

# Save the updated DataFrame to a new CSV file
result_df.to_csv('../data/raw/contacts_export_updated.csv', index=False)