## Task 3: Parts T16 in Registered Vehicles (Adelshofen)

In [24]:
# =============== Task3: Count vehicles registered in Adelshofen containing T16 (final version) ===============
from pathlib import Path
import re
import pandas as pd

DATA = Path("Data")

# ---------- 工具：递归找文件 ----------
def find_one(patterns):
    if isinstance(patterns, str):
        patterns = [patterns]
    patterns = [p.lower() for p in patterns]
    for p in DATA.rglob("*"):
        if p.is_file():
            name = p.name.lower()
            if all(k in name for k in patterns):
                return p
    return None

# ---------- 工具：读 CSV/TXT（容错） ----------
def read_any(path: Path, nrows=None):
    encs = ["utf-8", "cp1252", "latin1", "iso-8859-1"]
    seps = [";", ",", "\t", "|"]
    suf = path.suffix.lower()
    if suf in [".csv", ".txt"]:
        for enc in encs:
            for sep in seps:
                try:
                    return pd.read_csv(path, sep=sep, encoding=enc, dtype=str,
                                       nrows=nrows, low_memory=False)
                except Exception:
                    pass
                try:
                    return pd.read_csv(path, sep=sep, encoding=enc, dtype=str,
                                       nrows=nrows, engine="python")
                except Exception:
                    continue
        return pd.read_csv(path, dtype=str, nrows=nrows, engine="python")
    elif suf in [".xls", ".xlsx"]:
        return pd.read_excel(path, dtype=str, nrows=nrows)
    else:
        raise ValueError(f"Unsupported format: {suf}")

# ---------- 0) 解析 Einzelteil_T16.txt -> 保存为 CSV ----------
t16_txt = find_one(["einzelteil", "t16", ".txt"]) or (DATA / "Einzelteil_T16.txt")
t16_csv = find_one(["einzelteil", "t16", ".csv"]) or (DATA / "Einzelteil_T16.csv")

if t16_txt and t16_txt.exists() and not t16_csv.exists():
    SEP_FIELDS = re.compile(r'\s*\|\s*\|\s*')
    SEP_RECORD = re.compile(r'[\t\r\n]+')
    text = t16_txt.read_text(encoding="utf-8", errors="ignore").lstrip("\ufeff").strip()
    segments = [seg for seg in SEP_RECORD.split(text) if seg.strip()]
    header_fields = SEP_FIELDS.split(segments[0].strip())
    header = [""] + header_fields
    rows = []
    for seg in segments[1:]:
        tokens = SEP_FIELDS.split(seg.strip())
        if len(tokens) < 23:
            tokens += [""] * (23 - len(tokens))
        elif len(tokens) > 23:
            tokens = tokens[:22] + [" | | ".join(tokens[22:])]
        rows.append(tokens)
    with (DATA / "Einzelteil_T16.csv").open("w", encoding="utf-8-sig", newline="") as f:
        f.write(";".join(header) + "\n")
        for r in rows:
            f.write(";".join(r) + "\n")
    print(f"[INFO] Converted TXT to CSV: {t16_csv}")

# ---------- 1) 读取 Zulassungen_alle_Fahrzeuge.* 并取 Adelshofen 的 ID ----------
reg_path = find_one(["zulassungen", "alle", "fahrzeuge"])
if not reg_path:
    raise FileNotFoundError("Zulassungen_alle_Fahrzeuge.* not found")
df_reg = read_any(reg_path)
df_reg = df_reg.loc[:, ~df_reg.columns.str.startswith("Unnamed")]
df_reg.columns = [c.replace('"','').replace(',','').strip() for c in df_reg.columns]

veh_col = next((c for c in ["IDNummer","ID_Fahrzeug","Fahrzeug_ID","Fahrzeugnummer"] if c in df_reg.columns), None)
place_col = next((c for c in ["Gemeinden","Zulassungsort","Ort","Zulassung"] if c in df_reg.columns), None)
if place_col is None:
    for c in df_reg.columns:
        try:
            if df_reg[c].astype(str).str.upper().str.contains("ADELSHOFEN", na=False).any():
                place_col = c; break
        except Exception:
            pass
assert veh_col and place_col, f"Missing key columns: veh={veh_col}, place={place_col}"

adel_ids = set(df_reg.loc[df_reg[place_col].astype(str).str.upper()=="ADELSHOFEN", veh_col].astype(str))
print(f"[INFO] Number of vehicles registered in Adelshofen: {len(adel_ids)}")

# ---------- 2) 使用 K2ST2 -> Typ21/Typ22 的数据建立映射 ----------
k2   = find_one(["k2st2",".csv"])
typ21= find_one(["typ21",".csv"])
typ22= find_one(["typ22",".csv"])

if not all([k2, typ21, typ22]):
    raise FileNotFoundError("K2ST2/Typ21/Typ22 files not found in Data")

def read_clean(path):
    df = read_any(path)
    return df.loc[:, ~df.columns.str.startswith("Unnamed")]

df_k2, df_21, df_22 = read_clean(k2), read_clean(typ21), read_clean(typ22)

id_k2_col = next((c for c in ["ID_K2ST2","ID_Sitze","Sitz_ID","ID_Sitz"] if c in df_k2.columns), None)
s21 = next((c for c in ["ID_Sitze","Sitz_ID","ID_Sitz"] if c in df_21.columns), None)
f21 = next((c for c in ["ID_Fahrzeug","Fahrzeug_ID","IDNummer"] if c in df_21.columns), None)
s22 = next((c for c in ["ID_Sitze","Sitz_ID","ID_Sitz"] if c in df_22.columns), None)
f22 = next((c for c in ["ID_Fahrzeug","Fahrzeug_ID","IDNummer"] if c in df_22.columns), None)

ids = pd.Index(df_k2[id_k2_col].astype(str).unique())
sel_21 = df_21[df_21[s21].astype(str).isin(ids)][[s21, f21]].copy(); sel_21.columns = ["ID_Sitze","ID_Fahrzeug"]
sel_22 = df_22[df_22[s22].astype(str).isin(ids)][[s22, f22]].copy(); sel_22.columns = ["ID_Sitze","ID_Fahrzeug"]
matches = pd.concat([sel_21, sel_22], ignore_index=True)

# ---------- 3) Adelshofen 交集 ----------
common_ids = set(matches["ID_Fahrzeug"].astype(str)).intersection(adel_ids)
print(f"[INFO] Total matched vehicles: {matches['ID_Fahrzeug'].nunique()}")
print(f"[INFO] Vehicles registered in Adelshofen with T16: {len(common_ids)}")

print("\n=== Final Result ===")
print("Number of vehicles registered in Adelshofen with T16:", len(common_ids))
print("Common vehicle IDs (first 50):", list(common_ids)[:50])
print("\nVehicle details (first 10 rows):")
print(df_reg[df_reg[veh_col].astype(str).isin(common_ids)].head(10))


[INFO] Number of vehicles registered in Adelshofen: 132
[INFO] Total matched vehicles: 655075
[INFO] Vehicles registered in Adelshofen with T16: 36

=== Final Result ===
Number of vehicles registered in Adelshofen with T16: 36
Common vehicle IDs (first 50): ['21-2-21-65888', '21-2-21-163320', '22-2-22-237411', '21-2-21-163319', '22-2-21-34238', '21-2-21-269819', '21-2-21-221942', '21-2-21-143953', '21-2-21-221943', '21-2-21-378774', '22-2-22-223225', '21-2-21-418854', '21-2-21-360428', '21-2-21-172112', '21-2-21-418853', '21-2-21-91656', '21-2-21-216726', '21-2-21-232184', '21-2-21-389484', '21-2-21-436924', '21-2-21-216727', '21-2-21-360429', '21-2-21-185383', '22-2-21-34237', '22-2-22-223224', '21-2-21-269818', '21-2-21-143954', '22-2-22-178084', '21-2-21-446290', '22-2-22-237410', '22-2-22-178083', '21-2-21-172111', '21-2-21-436923', '21-2-21-259509', '21-2-21-91655', '21-2-21-350288']

Vehicle details (first 10 rows):
               IDNummer   Gemeinden   Zulassung
437134    21-2-2