# 1. Imports and configuration

In [69]:
import pandas as pd
import os
import re
from config import CONFIG


In [89]:
raw_data_path = CONFIG["paths"]["raw_data"]
processed_data_path = CONFIG["paths"]["processsed"]

In [71]:
# API-Key aus Textdatei laden
with open("API_KEY.txt", "r") as f:
    API_KEY = f.read().strip()

# 2. Load LEGO model (MPD/LDR file) into a DataFrame

In [72]:
def parse_model_to_df(filepath):
    """
    Liest eine .ldr-Datei im LDraw-Format ein und gibt einen DataFrame mit allen Bauteilzeilen (Typ 1) zurück.

    Parameter:
    ----------
    filepath : str
        Pfad zur .ldr-Datei

    Rückgabe:
    ---------
    df : pd.DataFrame
        DataFrame mit Spalten: color, x, y, z, a-i (Matrix), part
    """
    part_lines = []

    # Datei einlesen
    try:
        with open(filepath, 'r') as f:
            for line in f:
                if line.startswith('1 '):  # Nur Teilezeilen (Typ 1)
                    part_lines.append(line.strip())
    except FileNotFoundError:
        raise FileNotFoundError(f"Datei nicht gefunden: {filepath}")
    except Exception as e:
        raise RuntimeError(f"Fehler beim Einlesen der Datei: {e}")

    # Zerlegen in strukturierte Daten
    data = []
    for line in part_lines:
        parts = line.split()
        if len(parts) >= 15:
            data.append({
                'color': int(parts[1]),
                'x': float(parts[2]),
                'y': float(parts[3]),
                'z': float(parts[4]),
                'a': float(parts[5]), 'b': float(parts[6]), 'c': float(parts[7]),
                'd': float(parts[8]), 'e': float(parts[9]), 'f': float(parts[10]),
                'g': float(parts[11]), 'h': float(parts[12]), 'i': float(parts[13]),
                'part': parts[14]
            })

    # In DataFrame umwandeln
    df = pd.DataFrame(data)

    return df

In [73]:
model_name = "30050-1.mpd"

In [91]:
model_dataframe = parse_model_to_df(os.path.join(raw_data_path, model_name))
model_dataframe["part"] = model_dataframe["part"].apply(lambda x: os.path.splitext(x)[0])

# 3. Build Dataframe for parts

## 3.1 Extract part numbers from `raw_parts_df`

In [75]:
parts = model_dataframe["part"].unique()
for n, part in enumerate(parts):
    part = part.split(".")
    parts[n] = part[0]

## 3.2 Fetch part information from the Rebrickable API

In [76]:
import requests
results = []

for counter, part_num in enumerate(parts):
    PART_NUM = part_num  
    url = f"https://rebrickable.com/api/v3/lego/parts/{PART_NUM}/"
    headers = {"Authorization": f"key {API_KEY}"}

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        data_normalized = pd.json_normalize(data)
        results.append(pd.json_normalize(response.json()))

    else:
        print(f"Fehler Part_NUM: {part_num}, Position: {counter}; {response.status_code};{response.text}")

print("Abgeschlossen!")

Fehler Part_NUM: 30050, Position: 0; 404;{"detail":"No Part matches the given query."}
Fehler Part_NUM: 2436a, Position: 8; 404;{"detail":"No Part matches the given query."}
Abgeschlossen!


In [77]:
raw_parts_df = pd.concat(results)

## 3.3. Load part category information and merge with ``raw_parts_df``

In [78]:
part_category_df = pd.read_csv("./data/part_categories.csv")
parts_df = raw_parts_df.merge(part_category_df, left_on="part_cat_id", right_on="id", how="left")

# schöner benennen:
parts_df = parts_df.rename(columns={"name_x": "part_name", "name_y": "category_name"})
parts_df = parts_df.drop(columns=["id", "part_count"])
parts_df.head()

Unnamed: 0,part_num,part_name,part_cat_id,year_from,year_to,part_url,part_img_url,prints,molds,alternates,print_of,external_ids.BrickLink,external_ids.BrickOwl,external_ids.Brickset,external_ids.LDraw,external_ids.LEGO,external_ids.Peeron,category_name
0,3020,Plate 2 x 4,14,1962,2025,https://rebrickable.com/parts/3020/plate-2-x-4/,https://cdn.rebrickable.com/media/parts/elemen...,[],[3020a],[3709],,[3020],[80025],"[3020, 5584]",[3020],"[3020, 5584]",,Plates
1,3021,Plate 2 x 3,14,1962,2025,https://rebrickable.com/parts/3021/plate-2-x-3/,https://cdn.rebrickable.com/media/parts/elemen...,[],[],[],,[3021],[960521],[3021],[3021],[3021],,Plates
2,4081b,"Plate Special 1 x 1 with Clip Light, Bar Hole ...",9,1984,2025,https://rebrickable.com/parts/4081b/plate-spec...,https://cdn.rebrickable.com/media/parts/elemen...,[],[],[4081a],,[4081b],[194287],"[4018, 4081, 41632]",[4081b],"[4081, 41632]",,Plates Special
3,60470a,Plate Special 1 x 2 with Clips Horizontal [Thi...,9,2007,2013,https://rebrickable.com/parts/60470a/plate-spe...,https://cdn.rebrickable.com/media/parts/ldraw/...,[],[60470b],[],,[60470],[20393],[60470],[60470a],[60470],[60470],Plates Special
4,3710,Plate 1 x 4,14,1976,2025,https://rebrickable.com/parts/3710/plate-1-x-4/,https://cdn.rebrickable.com/media/parts/elemen...,"[3710pr0003, 3710pr0001, 3710pr0002]",[],[],,[3710],[515555],[3710],[3710],[3710],,Plates


## 3.4 Function definitions for extracting structural information from `part_name` (applied in 3.5)

In [79]:
def extract_dimensions(name):
    # 3D-Maße (z. B. 1 x 1 x 2/3)
    match3 = re.search(r"(\d+)\s*x\s*(\d+)\s*x\s*([\d/]+)", name)
    if match3:
        return match3.groups()

    # 2D-Maße (z. B. 2 x 4 oder 18 x 14)
    match2 = re.search(r"(\d+)\s*x\s*(\d+)", name)
    if match2:
        return (*match2.groups(), None)

    return (None, None, None)

In [80]:
def extract_bracket_info(name):
    bracket_match = re.search(r"\[(.*?)\]", name)
    bracket_text = bracket_match.group(1) if bracket_match else None

    return bracket_text

## 3.5 Apply extraction functions to `part_name`

In [81]:
parts_df[["dim1", "dim2", "dim3"]] = parts_df["part_name"].apply(lambda x: pd.Series(extract_dimensions(x)))
parts_df["bracket_info"] = parts_df["part_name"].apply(lambda x: pd.Series(extract_bracket_info(x)))

# 4. Results

In [92]:
base_name = os.path.splitext(model_name)[0]
parts_df.to_csv(os.path.join(processed_data_path, f"{base_name}_parts.csv"), index=False)
model_dataframe.to_csv(os.path.join(processed_data_path, f"{base_name}_model.csv"), index=False)