In [None]:
import os
import glob
import pandas as pd
import hashlib

def compute_uid(title, source):
    raw = f"{title}_{source}"
    return hashlib.sha1(raw.encode('utf-8')).hexdigest()[:10]

# Ruta base (ajustá según sea hourly o full)
csv_files = glob.glob('./data/rss_slices/*.csv')

for file in csv_files:
    filename = os.path.basename(file)
    print(f"⏳ Procesando: {filename}")
    
    df = pd.read_csv(file)

    # Validación mínima
    if 'Title' not in df.columns or 'Source' not in df.columns or 'Published' not in df.columns:
        print(f"❌ Skipping {filename}: missing critical columns.")
        continue

    # Parseo robusto de fechas
    df['Published'] = pd.to_datetime(df['Published'], errors='coerce', utc=True)
    df = df.dropna(subset=['Published'])

    # Ordenar por fecha
    df = df.sort_values('Published').reset_index(drop=True)

    # Recalcular uids
    df['uid'] = df.apply(lambda row: compute_uid(row['Title'], row['Source']), axis=1)

    # Sobrescribir article_id secuencial
    df['article_id'] = df.index + 1


    # Guardar
    df.to_csv(file, index=False, encoding='utf-8-sig')
    print(f"✅ Reescrito: {filename} con {len(df)} artículos.")


In [1]:
# def reverse_string(s):
#     reversed_s = ''.join(reversed(s))
#     return reversed

In [3]:
# string = 'hello'
# print(reverse_string(string))

<class 'reversed'>
