In [1]:
import re
import pandas as pd
import janitor               # brings .clean_names()
from pathlib import Path

folder = Path('list_exports') / 'brizo'
frames = []

for csv_path in folder.glob('*.csv'):
    df = pd.read_csv(csv_path, dtype=str, skipinitialspace=True).clean_names()

    # Normalize 'id' → 'brizo_id'
    if 'id' in df.columns and 'brizo_id' not in df.columns:
        df = df.rename(columns={'id': 'brizo_id'})

    # Pick up only true phone/mobile columns (exclude any *_type)
    phone_cols = [
        c for c in df.columns
        if re.search(r'(phone|mobile)', c)
           and not c.endswith('_type')
    ]

    if 'brizo_id' in df.columns and phone_cols:
        df_melted = (
            df[['brizo_id'] + phone_cols]
              .melt(
                 id_vars=['brizo_id'],
                 value_vars=phone_cols,
                 var_name='phone_field',
                 value_name='phone_number'
              )
              .dropna(subset=['phone_number'])
        )
        df_melted['list_source'] = csv_path.stem
        frames.append(df_melted[['brizo_id', 'phone_number', 'list_source']])

combined = pd.concat(frames, ignore_index=True)
print(f"Processed {len(frames)} files → {len(combined)} total phone records")

Processed 56 files → 1009111 total phone records


In [2]:
counts   = combined.groupby('brizo_id') \
                   .size() \
                   .reset_index(name='count')
display(counts)

Unnamed: 0,brizo_id,count
0,00007b5fcec398ab,3
1,000144264e8456f6,4
2,0001aa412e069ffa,1
3,00022a7453654b49,5
4,000243c264f11715,6
...,...,...
103209,fffd16a3a13966b8,11
103210,fffe113114e7c1e5,21
103211,fffebad4ae63da74,21
103212,fffedd63e21115a3,1


In [None]:
counts.to_csv('data/exported_data/counts.csv', index=False)
combined.to_csv('data/exported_data/combined.csv', index=False)

