In [None]:
# 安装依赖 (首次运行)
# !pip install -r ../requirements.txt

In [None]:
import sys
sys.path.insert(0, '..')

# 导入模块
from src.parser import parse_simai, generate_simai, Note, NoteType, ChartMeta
from src.audio import extract_features, detect_beats, OnsetDetector
from src.data import SimaiTokenizer, create_tokenizer

## 1. 解析现有谱面

In [None]:
# 示例谱面
example_chart = """
&title=示例曲目
&wholebpm=150
&first=0.5
&lv_5=12+
&des_5=AI谱师
&inote_5=
(150){4}
1,2,3,4,
5,6,7,8,
1/5,2/6,3/7,4/8,
1h[4:2],,5h[4:2],,
1-5[4:1],
3^7[4:2],
b1/b5,
E
"""

# 解析谱面
chart = parse_simai(example_chart)

print(f"曲名: {chart.meta.title}")
print(f"BPM: {chart.meta.bpm}")
print(f"音符数: {len(chart.notes)}")
print("\n音符列表:")
for note in chart.notes:
    print(f"  {note.time:.3f}s: {note.note_type.name} @ {note.position}")

## 2. 创建和测试 Tokenizer

In [None]:
# 创建 tokenizer
tokenizer = create_tokenizer(max_seq_length=4096)
print(f"词表大小: {tokenizer.vocab_size}")

# 将谱面转换为 token
tokenized = tokenizer.tokenize(chart)
print(f"\nToken 数量: {len(tokenized.tokens)}")
print(f"Token IDs 形状: {tokenized.token_ids.shape}")

# 显示前 20 个 token
print("\n前 20 个 Token:")
for token in tokenized.tokens[:20]:
    print(f"  {token}")

## 3. 音频特征提取 (需要音频文件)

In [None]:
# 如果有音频文件，可以提取特征
audio_path = "path/to/your/audio.mp3"  # 修改为你的音频路径

# 提取特征 (取消注释运行)
# features = extract_features(audio_path)
# print(f"音频时长: {features.duration:.2f}s")
# print(f"检测 BPM: {features.tempo:.1f}")
# print(f"节拍数: {len(features.beat_times)}")
# print(f"Mel 频谱形状: {features.mel_spectrogram.shape}")

## 4. 手动创建谱面

In [None]:
from src.parser import Note, NoteType, ChartMeta, generate_simai

# 创建元数据
meta = ChartMeta(
    title="我的第一个AI谱面",
    bpm=140,
    offset=0.5,
    level="10+",
    designer="AI",
    difficulty=5
)

# 创建音符列表
notes = [
    Note(time=0.5, position='1', note_type=NoteType.TAP),
    Note(time=1.0, position='5', note_type=NoteType.TAP),
    Note(time=1.5, position='3', note_type=NoteType.TAP, is_break=True),
    Note(time=2.0, position='1', note_type=NoteType.HOLD),
    Note(time=3.0, position='5', note_type=NoteType.SLIDE, slide_path='5-1', slide_end='1'),
]

# 生成 simai 文本
simai_output = generate_simai(notes, meta, bpm=140, divisor=4)
print("生成的谱面:")
print(simai_output)

## 5. 使用训练好的模型生成谱面 (需要先训练)

In [None]:
# from src.generation import ChartGenerator, GenerationConfig

# # 加载模型
# model_path = "models/best.pt"
# generator = ChartGenerator(model_path)

# # 配置生成参数
# config = GenerationConfig(
#     temperature=0.8,
#     top_k=50,
#     top_p=0.9
# )

# # 生成谱面
# audio_path = "path/to/your/audio.mp3"
# simai_text = generator.generate(audio_path, config)
# print(simai_text)

## 6. 导出到 Majdata 格式

In [None]:
# from src.generation import export_for_majdata

# # 导出谱面
# export_for_majdata(
#     simai_text=simai_output,
#     audio_path="path/to/audio.mp3",
#     output_dir="output/charts",
#     song_name="我的AI谱面"
# )

## 下一步

1. **收集数据**: 将大量的 simai 谱面放入 `data/raw/`，对应音频放入 `data/audio/`
2. **预处理数据**: 运行 `python -m src.data.preprocess`
3. **训练模型**: 运行 `python -m src.training.train`
4. **生成谱面**: 使用训练好的模型生成新谱面
5. **在 MajdataView 中预览**: 导出并预览生成的谱面