In [8]:
import pandas as pd

def parse_markdown_table(md_text: str) -> pd.DataFrame:
    # Берём только строки таблицы вида | ... |
    lines = [ln.strip() for ln in md_text.strip().splitlines() if ln.strip()]
    lines = [ln for ln in lines if ln.startswith("|") and ln.endswith("|")]
    if len(lines) < 2:
        raise ValueError("Не нашёл markdown-таблицу (нужно минимум header + separator).")

    header = [c.strip() for c in lines[0].strip("|").split("|")]
    data_lines = lines[2:]  # пропускаем separator line

    rows = []
    for ln in data_lines:
        cells = [c.strip() for c in ln.strip("|").split("|")]
        if len(cells) < len(header):
            cells += [""] * (len(header) - len(cells))
        elif len(cells) > len(header):
            cells = cells[:len(header)]
        rows.append(cells)

    df = pd.DataFrame(rows, columns=header)

    # Авто-конверсия чисел (поддержит и запятую как десятичный разделитель)
    for col in df.columns:
        ser = df[col].astype(str).str.replace(",", ".", regex=False)
        df[col] = pd.to_numeric(ser, errors="ignore")

    # Округление всех числовых столбцов до 3 знаков
    num_cols = df.select_dtypes(include="number").columns
    df[num_cols] = df[num_cols].round(4)

    return df



In [19]:
md_table = r"""
| atlas       | strategy_full        | gsr   | fc_type   | model   |   china2ihb_auc |   ihb2china_auc |   avg_cross_site_auc |
|:------------|:---------------------|:------|:----------|:--------|----------------:|----------------:|---------------------:|
| Brainnetome | 24P                  | GSR   | tangent   | logreg  |        0.908163 |        0.936198 |             0.922181 |
| Schaefer200 | AROMA Non-Aggressive | GSR   | tangent   | logreg  |        0.873299 |        0.955295 |             0.914297 |
| Schaefer200 | aCompCor(5)+12P      | noGSR | tangent   | logreg  |        0.883929 |        0.939236 |             0.911582 |
| HCPex       | aCompCor(5)+12P      | noGSR | tangent   | logreg  |        0.88648  |        0.936198 |             0.911339 |
| HCPex       | 24P                  | GSR   | tangent   | logreg  |        0.880244 |        0.940104 |             0.910174 |
| Brainnetome | 24P                  | noGSR | tangent   | logreg  |        0.892149 |        0.926649 |             0.909399 |
| HCPex       | aCompCor(5)+24P      | noGSR | tangent   | logreg  |        0.883078 |        0.93316  |             0.908119 |
| Schaefer200 | aCompCor(50%)+12P    | noGSR | tangent   | logreg  |        0.867772 |        0.945747 |             0.906759 |
| Schaefer200 | aCompCor(5)+24P      | noGSR | tangent   | logreg  |        0.876417 |        0.937066 |             0.906742 |
| Brainnetome | AROMA Non-Aggressive | GSR   | tangent   | logreg  |        0.87004  |        0.942274 |             0.906157 |
| Brainnetome | aCompCor(5)+12P      | noGSR | tangent   | logreg  |        0.882937 |        0.927517 |             0.905227 |
| Brainnetome | aCompCor(5)+12P      | GSR   | tangent   | logreg  |        0.875425 |        0.934896 |             0.905161 |
| Schaefer200 | AROMA Non-Aggressive | noGSR | tangent   | logreg  |        0.87004  |        0.940104 |             0.905072 |
| Schaefer200 | aCompCor(50%)+12P    | GSR   | tangent   | logreg  |        0.873016 |        0.936632 |             0.904824 |
| Schaefer200 | aCompCor(50%)+24P    | noGSR | tangent   | logreg  |        0.866922 |        0.942708 |             0.904815 |
| Brainnetome | aCompCor(5)+24P      | noGSR | tangent   | logreg  |        0.872874 |        0.936198 |             0.904536 |
| Brainnetome | aCompCor(5)+24P      | GSR   | tangent   | logreg  |        0.871173 |        0.9375   |             0.904337 |
| HCPex       | 24P                  | noGSR | tangent   | logreg  |        0.858135 |        0.945747 |             0.901941 |
| Brainnetome | aCompCor(50%)+24P    | GSR   | tangent   | logreg  |        0.86678  |        0.935764 |             0.901272 |
| HCPex       | aCompCor(5)+12P      | GSR   | tangent   | logreg  |        0.860119 |        0.940538 |             0.900329 |
"""

df = parse_markdown_table(md_table)

df.to_excel("auc_crossite_table.xlsx", index=False)
#df.to_csv("qc_fc_table.csv", index=False)

print(df.head())

         atlas         strategy_full    gsr  fc_type   model  china2ihb_auc  \
0  Brainnetome                   24P    GSR  tangent  logreg         0.9082   
1  Schaefer200  AROMA Non-Aggressive    GSR  tangent  logreg         0.8733   
2  Schaefer200       aCompCor(5)+12P  noGSR  tangent  logreg         0.8839   
3        HCPex       aCompCor(5)+12P  noGSR  tangent  logreg         0.8865   
4        HCPex                   24P    GSR  tangent  logreg         0.8802   

   ihb2china_auc  avg_cross_site_auc  
0         0.9362              0.9222  
1         0.9553              0.9143  
2         0.9392              0.9116  
3         0.9362              0.9113  
4         0.9401              0.9102  


  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
  df[col] = pd.to_numeric(ser, errors="ignore")
