In [5]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --------- Chemins de TES fichiers ----------
sp_path   = "/Users/martin/Documents/Projet pour tester si ca marche/chart_20250909T220305.csv"
gold_path = "/Users/martin/Documents/Projet pour tester si ca marche/chart_20250909T220317.csv"

def load_series_from_csv(path: str) -> pd.Series:
    """
    Charge une série temporelle (index datetime, valeurs float) depuis un CSV.
    Détection flexible des colonnes de date et conversion forcée en numérique.
    """
    if not os.path.exists(path):
        raise FileNotFoundError(f"Fichier introuvable : {path}")

    df = pd.read_csv(path)
    if df.empty:
        raise ValueError(f"CSV vide : {path}")

    # Normaliser colonnes
    cols = [str(c).strip() for c in df.columns]
    df.columns = cols

    # Détecter colonne de date
    date_candidates = [c for c in cols if re.search(r"date|time|period|month|year", c, re.I)]
    date_col = None
    for c in ([cols[0]] + date_candidates):
        if c not in df.columns:
            continue
        try:
            df[c] = pd.to_datetime(df[c], errors="raise", infer_datetime_format=True, utc=False)
            date_col = c
            break
        except Exception:
            continue

    if date_col is None:
        raise ValueError(f"Impossible d'identifier une colonne de date dans {path}. Colonnes={cols}")

    # Conversion forcée en numérique pour toutes les colonnes sauf la date
    for c in cols:
        if c != date_col:
            df[c] = pd.to_numeric(df[c].astype(str).str.replace(",", "."), errors="coerce")

    # Prendre la dernière colonne contenant au moins une valeur numérique
    numeric_cols = [c for c in cols if c != date_col and df[c].notna().any()]
    if not numeric_cols:
        raise ValueError(f"Aucune colonne numérique trouvée dans {path}. Colonnes={cols}")

    val_col = numeric_cols[-1]

    ser = pd.Series(df[val_col].values, index=df[date_col])
    ser = ser.dropna().astype(float).sort_index()
    ser.name = val_col
    return ser

def guess_label(path: str, ser: pd.Series) -> str:
    name = os.path.basename(path).lower()
    if "gold" in name or "xau" in name or "or" in name:
        return "GOLD"
    if "gspc" in name or "sp500" in name or "s&p" in name:
        return "SP500"
    median_val = float(np.nanmedian(ser.tail(100)))
    return "SP500" if median_val > 1000 else "GOLD"

# --------- Chargement des deux séries ----------
ser1 = load_series_from_csv(sp_path)
ser2 = load_series_from_csv(gold_path)

label1 = guess_label(sp_path, ser1)
label2_guess = guess_label(gold_path, ser2)
label2 = "GOLD" if label1 == "SP500" else "SP500" if label2_guess == label1 else label2_guess

ser1.name = label1
ser2.name = label2

df = pd.concat([ser1, ser2], axis=1).dropna().sort_index()

# Renommage si nécessaire
if set(df.columns) != {"SP500", "GOLD"}:
    c0, c1 = df.columns
    med0, med1 = np.nanmedian(df[c0]), np.nanmedian(df[c1])
    if med0 > med1:
        df.columns = ["SP500", "GOLD"]
    else:
        df.columns = ["GOLD", "SP500"]
        df = df[["SP500", "GOLD"]]

df_m = df.resample("M").last().dropna()
df_m["RATIO"] = df_m["SP500"] / df_m["GOLD"]

# Base 100 en 1920 ou 1ère valeur dispo
base_candidates = df_m.loc["1920-01-01":"1920-12-31", "RATIO"].dropna()
base_value = base_candidates.iloc[0] if not base_candidates.empty else df_m["RATIO"].iloc[0]
df_m["RATIO_BASE100"] = (df_m["RATIO"] / base_value) * 100
df_m["MM7"] = df_m["RATIO_BASE100"].rolling(84, min_periods=1).mean()

plt.figure(figsize=(12, 6))
plt.plot(df_m.index, df_m["RATIO_BASE100"], label="Ratio S&P 500 / Or (Base 100 en 1920)")
plt.plot(df_m.index, df_m["MM7"], label="Moyenne mobile 7 ans")
plt.title("Ratio S&P 500 / Or (Base 100 en 1920) avec MM 7 ans")
plt.xlabel("Année")
plt.ylabel("Base 100")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig("ratio_sp_or_base100_1920_mm7.png", dpi=150)
plt.show()

df_m.to_csv("ratio_sp_or_base100_1920_mm7.csv")
print("✅ Exporté : ratio_sp_or_base100_1920_mm7.csv + PNG")


  df[c] = pd.to_datetime(df[c], errors="raise", infer_datetime_format=True, utc=False)


ValueError: Aucune colonne numérique trouvée dans /Users/martin/Documents/Projet pour tester si ca marche/chart_20250909T220305.csv. Colonnes=['Date;"Value"']