In [None]:
import pandas as pd

In [None]:
# Load transaction and contact data from CSV files
contacts_df = pd.read_csv('../data/raw/contacts_export.csv', delimiter=',')

print(contacts_df.head())


In [None]:
# Function to convert names to proper case, handling exceptions for "De" and "La"
def proper_case(name):
    if pd.isnull(name) or name == "":
        return ""
    words = name.split()
    exceptions = {"de", "la", "del", "las", "los", "y"}
    # Capitalize each word unless it is in the exceptions set
    words = [word.lower() if word.lower() in exceptions else word.capitalize() for word in words]
    # Join the words back into a single string
    return ' '.join(words)

# Apply proper case transformation
contacts_df['FirstName'] = contacts_df['FirstName'].apply(proper_case)
contacts_df['MiddleName'] = contacts_df['MiddleName'].apply(proper_case)
contacts_df['LastName'] = contacts_df['LastName'].apply(proper_case)

# Add a new column 'baja' with default value False
contacts_df['baja'] = False

# Define a function to update ContactCategory__c and baja based on ContactType__c
def update_contact(row):
    if row['ContactType__c'] == 'Baja':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
        row['baja'] = True
    elif row['ContactType__c'] == 'Socio ordinario':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
    elif row['ContactType__c'] == 'Socio colaborador':
        row['ContactCategory__c'] = 'a1kMI000002OzsPYAS'
    elif row['ContactType__c'] == 'Donante particular':
        row['ContactCategory__c'] = 'a1kMI000002OzvdYAC'
    elif row['ContactType__c'] == 'Donante empresa':
        row['ContactCategory__c'] = 'a1kMI000002Ozu1YAC'
    elif row['ContactType__c'] == 'No socio':
        row['ContactCategory__c'] = 'a1kMI000002P0YLYA0'
    elif row['ContactType__c'] == 'Pendiente documentación':
        row['ContactCategory__c'] = 'a1kMI000002OzqnYAC'
    return row

# Apply the update_contact function to each row in the DataFrame
contacts_df = contacts_df.apply(update_contact, axis=1)

# Filter contacts where FirstName is equal to LastName
same_name_contacts = contacts_df[contacts_df['FirstName'] == contacts_df['LastName']]
# Remove FirstName for these contacts
contacts_df.loc[same_name_contacts.index, 'FirstName'] = ""


In [None]:
# Filter rows where LastName and MiddleName are present, but FirstName is not
filtered_result_df =  contacts_df[(contacts_df['LastName'].notnull()) & (contacts_df['LastName'] != "") &
                               (contacts_df['MiddleName'].notnull()) & (contacts_df['MiddleName'] != "") &
                               ((contacts_df['FirstName'].isnull()) | (contacts_df['FirstName'] == ""))]

# Display the filtered DataFrame
filtered_result_df


In [None]:
# Filter rows where ContactType__c is "Donante empresa" and either FirstName, MiddleName, or LastName is missing
donante_empresa_missing_names_df = contacts_df[(contacts_df['ContactType__c'] == 'Donante empresa') & 
                                               ((contacts_df['FirstName'].isnull()) | (contacts_df['FirstName'] == "") |
                                                (contacts_df['MiddleName'].isnull()) | (contacts_df['MiddleName'] == "") |
                                                (contacts_df['LastName'].isnull()) | (contacts_df['LastName'] == ""))]

# Display the filtered DataFrame
donante_empresa_missing_names_df


In [None]:
# Filter rows where both FirstName and MiddleName are missing
missing_first_and_middle_name_df =  contacts_df[(( contacts_df['FirstName'].isnull()) | ( contacts_df['FirstName'] == "")) &
                                             (( contacts_df['MiddleName'].isnull()) | ( contacts_df['MiddleName'] == ""))]

# Display the filtered DataFrame
missing_first_and_middle_name_df


In [None]:
# Keep only the specified columns
result_df = contacts_df[['Id', 'ContactCategory__c', 'FirstName', 'MiddleName', 'LastName', 'baja']]

# Save the updated DataFrame to a new CSV file
result_df.to_csv('../data/raw/contacts_export_updated.csv', index=False)

In [None]:
import pandas as pd

# Sample data
data = {
    "ContactType__c": ["Donante empresa", "Donante empresa", "Donante particular"],
    "FirstName": ["Empresa", "Corporation", None],
    "MiddleName": [None, None, None],
    "LastName": ["S.L.", "S.a", "de los Santos"]
}

contacts_df = pd.DataFrame(data)
print(contacts_df)

# Strings to remove and their variations
remove_strings = ["S.l", "Sl", "Sa", "S.a", "S.c.a.", "Slu", "S.l.", "SLL"]
remove_variations = [s.lower().replace(".", "").replace(" ", "") for s in remove_strings]

# Function to clean the name by removing specified strings and variations
def clean_name(name):
    if pd.isnull(name) or name == "":
        return ""
    name = name.lower()
    for string in remove_variations:
        name = name.replace(string, "")
    return proper_case(name).strip()

# Merge FirstName, MiddleName, and LastName into the LastName column
contacts_df['LastName'] = contacts_df[['FirstName', 'MiddleName', 'LastName']].fillna('').apply(lambda x: ' '.join(x).strip(), axis=1)

# Clear the FirstName and MiddleName columns
contacts_df['FirstName'] = ""
contacts_df['MiddleName'] = ""

# Clean the LastName column by removing specified strings and variations
contacts_df['LastName'] = contacts_df['LastName'].apply(clean_name)

# Display the updated DataFrame
print(contacts_df)