In [1]:
import pandas as pd
import os

# Load both dataframes

path_local = "../_noaa_nclimgrid_local/"
filename_local = "df_compare_months_local.csv"
processed_csv_filename = os.path.join(path_local, filename_local)

df_v1 = pd.read_csv(processed_csv_filename)
df_v2 = pd.read_csv('df_compare_months_local_v2.csv')

# Rename and map columns in df_v1 to match df_v2 naming
rename_map = {
    'fips_full': 'fips',
    'tmax_avg_jan': 'tmax_m01', 'prcp_tot_jan': 'prcp_m01',
    'tmax_avg_feb': 'tmax_m02', 'prcp_tot_feb': 'prcp_m02',
    'tmax_avg_mar': 'tmax_m03', 'prcp_tot_mar': 'prcp_m03',
    'tmax_avg_apr': 'tmax_m04', 'prcp_tot_apr': 'prcp_m04',
    'tmax_avg_may': 'tmax_m05', 'prcp_tot_may': 'prcp_m05',
    'tmax_avg_jun': 'tmax_m06', 'prcp_tot_jun': 'prcp_m06',
    'tmax_avg_jul': 'tmax_m07', 'prcp_tot_jul': 'prcp_m07',
    'tmax_avg_aug': 'tmax_m08', 'prcp_tot_aug': 'prcp_m08',
    'tmax_avg_sep': 'tmax_m09', 'prcp_tot_sep': 'prcp_m09',
    'tmax_avg_oct': 'tmax_m10', 'prcp_tot_oct': 'prcp_m10',
    'tmax_avg_nov': 'tmax_m11', 'prcp_tot_nov': 'prcp_m11',
    'tmax_avg_dec': 'tmax_m12', 'prcp_tot_dec': 'prcp_m12',
}

df_v1 = df_v1.rename(columns=rename_map)

# Ensure we only keep relevant columns
common_columns = ['fips'] + list(rename_map.values())[1:]  # all target columns
df_v1 = df_v1[common_columns]
df_v2 = df_v2[['fips'] + [col for col in df_v2.columns if col.startswith('tmax_m') or col.startswith('prcp_m')]]

# Sort both dataframes by fips (and optionally year if present)
df_v1_sorted = df_v1.sort_values('fips').reset_index(drop=True)
df_v2_sorted = df_v2.sort_values('fips').reset_index(drop=True)

# Define tolerances
tmax_tol = 0.001
prcp_tol = 0.01

# Initialize counters
mismatches = []

# Compare each row
for idx in range(len(df_v1_sorted)):
    row1 = df_v1_sorted.loc[idx]
    row2 = df_v2_sorted.loc[idx]
    fips = row1['fips']
    
    for col in row1.index:
        if col == 'fips':
            continue
        val1 = row1[col]
        val2 = row2[col]
        if pd.isna(val1) and pd.isna(val2):
            continue
        if pd.isna(val1) or pd.isna(val2):
            mismatches.append((fips, col, val1, val2))
            continue
        tol = tmax_tol if col.startswith('tmax') else prcp_tol
        if abs(val1 - val2) > tol:
            mismatches.append((fips, col, val1, val2))

# Summarize results
print(f"Total rows compared: {len(df_v1_sorted)}")
print(f"Total mismatches found: {len(mismatches)}")

# Mismatches per column
from collections import Counter
cols = [m[1] for m in mismatches]
col_counts = Counter(cols)
print("\nMismatches per column:")
for col, count in col_counts.items():
    print(f"  {col}: {count}")

# Show sample mismatches
print("\nSample mismatches (up to 10):")
for m in mismatches[:10]:
    print(f"FIPS {m[0]}, {m[1]}: df_v1={m[2]}, df_v2={m[3]}")


FileNotFoundError: [Errno 2] No such file or directory: '../_noaa_nclimgrid_local/df_compare_months_local.csv'