In [8]:
import pandas as pd
import numpy as np
from datetime import datetime

def combine_branch_data(file_paths):
    dataframes = []
    for file in file_paths:
        df = pd.read_csv(file)
        dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

def clean_data(df):
    df_cleaned = df.copy()

    df_cleaned = df_cleaned.dropna(subset=['transaction_id', 'date', 'customer_id'])

    df_cleaned.loc[:, 'date'] = pd.to_datetime(df_cleaned['date'])

    df_cleaned = df_cleaned.sort_values('date', ascending=False)
    df_cleaned = df_cleaned.drop_duplicates(subset='transaction_id', keep='first')

    return df_cleaned

def calculate_total_sales(df):
    df = df.copy()

    df.loc[:, 'total_sale'] = df['price'] * df['quantity']

    total_sales = df.groupby('branch')['total_sale'].sum().reset_index()
    total_sales.columns = ['branch', 'total']

    return total_sales

def main():
    file_paths = ['/content/branch_a.csv', '/content/branch_b.csv', '/content/branch_c.csv']

    print("Branch A Raw data")
    print(df_branch_a)
    print("Branch B Raw data")
    print(df_branch_b)
    print("Branch C Raw data")
    print(df_branch_c)

    try:
        print("Combining CSV files...")
        combined_df = combine_branch_data(file_paths)
        print(f"Combined data shape: {combined_df.shape}")

        print("\nCleaning data...")
        cleaned_df = clean_data(combined_df)
        print(f"Cleaned data shape: {cleaned_df.shape}")

        print("\nCalculating total sales...")
        total_sales_df = calculate_total_sales(cleaned_df)

        print("\nTotal sales per branch:")
        print(total_sales_df)

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()


Branch A Raw data
   transaction_id branch        date  product_id  quantity  price customer_id
0               1      A  2023-01-01         101         2   50.0        C001
1               2      A  2023-01-02         102         1   20.0        C002
2               3      A  2023-01-03         103         3   15.0        C003
3               4      A  2023-01-04         101         2   50.0        C004
4               5      A         NaN         104         1   30.0        C005
5               6      A  2023-01-06         105         2   25.0         NaN
Branch B Raw data
   transaction_id branch        date  product_id  quantity  price customer_id
0               7      B  2023-01-01         101         1   50.0        C006
1               8      B  2023-01-02         106         2   40.0        C007
2               9      B  2023-01-03         107         3   25.0        C008
3              10      B  2023-01-04         108         1   30.0        C009
4              11      B  20