In [1]:
import os
import shutil
import datetime

def rename_file(directory):
    # List files in the directory
    files = os.listdir(directory)
    
    for file in files:
        # Skip files that should not be renamed
        if file in ["master_data.csv", "final.csv", "input_data.csv","processed_input_data.csv"]:
            continue
        
        old_path = os.path.join(directory, file)
        new_path = os.path.join(directory, "input_data.csv")
        
        # Rename the first found file
        os.rename(old_path, new_path)
        print(f'Renamed "{file}" to "input_data.csv"')
        break  # Rename only one file

def clean_directory(directory):
    # Remove any other files that are not allowed
    for file in os.listdir(directory):
        if file not in ["master_data.csv", "final.csv", "input_data.csv", "processed_input_data.csv"]:
            os.remove(os.path.join(directory, file))
            print(f'Removed "{file}"')

# Define the data folder path
data_folder = "/Users/tony/Desktop/outbound/data"  # Change this to the correct path if needed

# Run the functions
rename_file(data_folder)
clean_directory(data_folder)



In [2]:
#merge master_data.csv and processed_input_data.csv, #then deduplicate the final.csv files based on the lead_id
# and save the result as final.csv, 

import pandas as pd

# Define file paths
master_file = '/Users/tony/Desktop/outbound/data/master_data.csv'
processed_file = '/Users/tony/Desktop/outbound/data/processed_input_data.csv'
output_file = '/Users/tony/Desktop/outbound/data/final.csv'

# Load both CSV files
master_df = pd.read_csv(master_file)
processed_df = pd.read_csv(processed_file)

def merge_and_deduplicate(master_df, processed_df, output_file):
    # Merge the dataframes
    merged_df = pd.concat([master_df, processed_df], ignore_index=True)
    
    # Remove duplicates based on lead_id, keeping the last occurrence
    deduplicated_df = merged_df.drop_duplicates(subset='lead_id', keep='last')
    
    # Save the final deduplicated dataframe to CSV
    deduplicated_df.to_csv(output_file, index=False)
    
    print(f"Merged and deduplicated file saved as {output_file}")

# Execute function
merge_and_deduplicate(master_df, processed_df, output_file)



Merged and deduplicated file saved as /Users/tony/Desktop/outbound/data/final.csv


In [3]:
#move the master_data.csv and processed_input_data.csv files to a new folder called "/Users/tony/Desktop/outbound/data_backup"
#before moving rename them by adding a timestamp to the filename
#copy final.csv to a folder named /Users/tony/Desktop/outbound/final_data
#rename final.csv to master_data.csv



def backup_files(source_folder, backup_folder):
    # Ensure the backup folder exists
    os.makedirs(backup_folder, exist_ok=True)
    
    # Get the current timestamp
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    
    # List of files to move
    files_to_move = ["master_data.csv", "processed_input_data.csv"]
    
    for file_name in files_to_move:
        source_path = os.path.join(source_folder, file_name)
        
        if os.path.exists(source_path):
            # Create new filename with timestamp
            new_file_name = f"{file_name.split('.')[0]}_{timestamp}.csv"
            destination_path = os.path.join(backup_folder, new_file_name)
            
            # Move the file
            shutil.move(source_path, destination_path)
            print(f"Moved: {source_path} -> {destination_path}")
        else:
            print(f"File not found: {source_path}")

# Define source and destination folders
source_folder = "/Users/tony/Desktop/outbound/data"
backup_folder = "/Users/tony/Desktop/outbound/data_backup"

# Run the backup function
backup_files(source_folder, backup_folder)





Moved: /Users/tony/Desktop/outbound/data/master_data.csv -> /Users/tony/Desktop/outbound/data_backup/master_data_20250203142658.csv
Moved: /Users/tony/Desktop/outbound/data/processed_input_data.csv -> /Users/tony/Desktop/outbound/data_backup/processed_input_data_20250203142658.csv


In [4]:
#move final to final_data


# Define source and destination paths
source_file = "/Users/tony/Desktop/outbound/data/final.csv"
destination_folder = "/Users/tony/Desktop/outbound/final_data"

# Ensure the destination folder exists
os.makedirs(destination_folder, exist_ok=True)

# Define the full destination file path
destination_path = os.path.join(destination_folder, os.path.basename(source_file))

# Copy and replace the file if it already exists
shutil.copy2(source_file, destination_path)

print(f"File successfully copied to: {destination_path}")



File successfully copied to: /Users/tony/Desktop/outbound/final_data/final.csv


In [5]:
#rename final.csv to master_data.csv

# Define the file paths
old_path = "/Users/tony/Desktop/outbound/data/final.csv"
new_path = "/Users/tony/Desktop/outbound/data/master_data.csv"

try:
    # If master_data.csv exists, remove it
    if os.path.exists(new_path):
        os.remove(new_path)
        print("Existing master_data.csv removed.")
    
    # Rename the file
    shutil.move(old_path, new_path)
    print(f"File successfully renamed to: {new_path}")

except FileNotFoundError:
    print("Error: The file 'final.csv' does not exist.")
except PermissionError:
    print("Error: Permission denied. Check file permissions.")
except Exception as e:
    print(f"Error: {e}")


File successfully renamed to: /Users/tony/Desktop/outbound/data/master_data.csv


In [6]:
#see the value in days_since_last_visit column in the final.csv file, replace the value by recalculating the number of days between the current date and the last_visit_date column value



