In [None]:
import pandas as pd

def load_csv_to_dataframe(file_path: str) -> pd.DataFrame:
    """
    Load a CSV file into a pandas DataFrame.
    """
    return pd.read_csv(file_path)

def check_matching_columns(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[bool, set[str]]:
    """
    Check if the column names of two DataFrames match.
    """
    columns_df1 = set(df1.columns)
    columns_df2 = set(df2.columns)
    
    if columns_df1 == columns_df2:
        return True, columns_df1
    else:
        mismatched_columns = columns_df1.symmetric_difference(columns_df2)
        return False, mismatched_columns

def get_row_count(df: pd.DataFrame) -> int:
    """
    Get the total number of rows in a DataFrame.
    """
    return df.shape[0]

def compare_column_values(df1: pd.DataFrame, df2: pd.DataFrame, primary_key: str) -> dict[str, float]:
    """
    Compare the values of each column in the DataFrames based on the primary key.
    """
    if primary_key not in df1.columns or primary_key not in df2.columns:
        raise ValueError(f"Primary key '{primary_key}' not found in both DataFrames.")
    
    # Sort DataFrames by the primary key
    df1_sorted = df1.sort_values(by=primary_key).reset_index(drop=True)
    df2_sorted = df2.sort_values(by=primary_key).reset_index(drop=True)
    
    # Merge DataFrames on the primary key
    merged_df = pd.merge(df1_sorted, df2_sorted, on=primary_key, suffixes=('_df1', '_df2'))
    
    comparison_result = {}
    
    for column in df1.columns:
        if column == primary_key:
            continue
        
        col_df1 = f"{column}_df1"
        col_df2 = f"{column}_df2"
        
        total_values = merged_df.shape[0]
        mismatched_values = (merged_df[col_df1] != merged_df[col_df2]) & (~merged_df[col_df1].isna() | ~merged_df[col_df2].isna())
        mismatch_percentage = (mismatched_values.sum() / total_values) * 100
        
        comparison_result[column] = mismatch_percentage
    
    return comparison_result

def get_mismatch_records_for_column(file1: str, file2: str, primary_key: str, column: str) -> pd.DataFrame:
    """
    Get mismatch records for a specific column.
    """
    df1 = load_csv_to_dataframe(file1)
    df2 = load_csv_to_dataframe(file2)
    
    if column not in df1.columns or column not in df2.columns:
        raise ValueError(f"Column '{column}' not found in both DataFrames.")
    
    if primary_key not in df1.columns or primary_key not in df2.columns:
        raise ValueError(f"Primary key '{primary_key}' not found in both DataFrames.")
    
    # Sort DataFrames by the primary key
    df1_sorted = df1.sort_values(by=primary_key).reset_index(drop=True)
    df2_sorted = df2.sort_values(by=primary_key).reset_index(drop=True)
    
    # Merge DataFrames on the primary key
    merged_df = pd.merge(df1_sorted, df2_sorted, on=primary_key, suffixes=('_df1', '_df2'))
    
    # Find mismatched records for the specified column
    col_df1 = f"{column}_df1"
    col_df2 = f"{column}_df2"
    
    mismatch_records = merged_df[(merged_df[col_df1] != merged_df[col_df2]) & (~merged_df[col_df1].isna() | ~merged_df[col_df2].isna())][[col_df1, col_df2]]
    
    return mismatch_records

def count_columns_with_mismatches(comparison_result: dict[str, float]) -> int:
    """
    Count the number of columns with mismatching records.
    """
    return sum(1 for mismatch_percentage in comparison_result.values() if mismatch_percentage > 0)

def main(file1: str, file2: str, primary_key: str) -> str:
    """
    Main function to load CSV files and compare the DataFrames.
    """
    df1 = load_csv_to_dataframe(file1)
    df2 = load_csv_to_dataframe(file2)
    
    columns_match, column_info = check_matching_columns(df1, df2)
    
    if not columns_match:
        return f"No. of mismatching columns: {len(column_info)} ({', '.join(column_info)})"
    
    row_count_df1 = get_row_count(df1)
    row_count_df2 = get_row_count(df2)
    
    comparison_result = compare_column_values(df1, df2, primary_key)
    
    columns_with_mismatches = count_columns_with_mismatches(comparison_result)
    
    output = f"Total number of matching columns: {len(column_info)}\n"
    output += f"Total number of rows in DataFrame 1: {row_count_df1}\n"
    output += f"Total number of rows in DataFrame 2: {row_count_df2}\n"
    for column, mismatch_percentage in comparison_result.items():
        output += f"{column} mismatching values: {mismatch_percentage:.2f}%\n"
    output += f"\nNumber of columns with mismatching records: {columns_with_mismatches}\n"
    
    return output
