論文  
https://arxiv.org/abs/2211.00895<br>
<br>
GitHub  
https://github.com/sweetcocoa/pop2piano<br>
<br>
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/pop2piano_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 環境セットアップ

## GPU確認

In [None]:
!nvidia-smi

## GitHubからコード取得

In [None]:
!git clone https://github.com/sweetcocoa/pop2piano/

# Commits on Nov 3, 2022
!cd /content/pop2piano
!git checkout e79dcf312d436fecf2a065f6d5a5f65c31b03479

## ライブラリのインストール

In [None]:
%cd /content/pop2piano

!apt-get install -y fluidsynth
!pip install pretty-midi==0.2.9 omegaconf==2.1.1 youtube-dl==2021.12.17 transformers==4.16.1 pytorch-lightning essentia==2.1b6.dev609 note-seq==0.0.3 pyFluidSynth==1.3.0

!pip install moviepy==0.2.3.5 imageio==2.4.1
!pip install yt-dlp

## ライブラリのインポート

In [None]:
%cd /content/pop2piano

import os 
import sys

import glob
import random

import torch
import torchaudio
import librosa
import numpy as np
import pandas as pd
import IPython.display as ipd
import soundfile as sf
from google.colab import files

from tqdm.auto import tqdm
from omegaconf import OmegaConf
import note_seq

from utils.dsp import get_stereo
from utils.demo import download_youtube
from transformer_wrapper import TransformerWrapper
from midi_tokenizer import MidiTokenizer, extrapolate_beat_times
from preprocess.beat_quantizer import extract_rhythm, interpolate_beat_times

from yt_dlp import YoutubeDL
from moviepy.video.fx.resize import resize
from moviepy.editor import VideoFileClip

device = "cuda" if torch.cuda.is_available() else "cpu"
print("using device:", device)

# 学習済みモデルのセットアップ

## 学習済みモデルのダウンロード

In [None]:
%cd /content/pop2piano

!wget -c https://github.com/sweetcocoa/pop2piano/releases/download/dpi_2k_epoch/model-1999-val_0.67311615.ckpt \
      -O model-1999-val_0.67311615.ckpt

## 学習済みモデルのロード

In [None]:
%cd /content/pop2piano

config = OmegaConf.load("./config.yaml")
wrapper = TransformerWrapper(config)
wrapper = wrapper.load_from_checkpoint("model-1999-val_0.67311615.ckpt", config=config).to(device)
model = "dpipqxiy"
wrapper.eval()

## 入力音声取得

In [None]:
video_url = 'https://www.youtube.com/watch?v=Qd01-6xVSHk' #@param {type:"string"}

#@markdown 動画の切り抜き範囲(秒)を指定してください。\
#@markdown 30秒以上の場合OOM発生の可能性が高いため注意
start_sec =  56#@param {type:"integer"}
end_sec =  88#@param {type:"integer"}

(start_pt, end_pt) = (start_sec, end_sec)

In [None]:
%cd /content/pop2piano

!mkdir -p test_video test_audio

download_resolution = 720
full_video_path = './test_video/full_video.mp4'
input_clip_path = './test_video/clip_video.mp4'
input_audio_path = './test_audio/clip_audio.mp3'

# 動画ダウンロード
ydl_opts = {'format': f'best[height<={download_resolution}]', 'overwrites': True, 'outtmpl': full_video_path}
with YoutubeDL(ydl_opts) as ydl:
    ydl.download([video_url])

# 指定区間切り抜き
with VideoFileClip(full_video_path) as video:
    subclip = video.subclip(start_pt, end_pt)
    subclip.write_videofile(input_clip_path)

In [None]:
# 動画から音声を切り抜き
videoclip = VideoFileClip(input_clip_path)

audioclip = videoclip.audio
audioclip.write_audiofile(input_audio_path)

audioclip.close()
videoclip.close()

# pop2piano

In [None]:
# @markdown set Arranger
composer = "composer1" #@param['composer1', 'composer2', 'composer3', 'composer4', 'composer5', 'composer6', 'composer7', 'composer8', 'composer9', 'composer10', 'composer11', 'composer12', 'composer13', 'composer14', 'composer15', 'composer16', 'composer17', 'composer18', 'composer19', 'composer20', 'composer21']

In [None]:
pm, composer, mix_path, midi_path = wrapper.generate(
    audio_path=input_audio_path, 
    composer=composer, 
    model=model,
    show_plot=True, 
    save_midi=True, 
    save_mix=True, 
)
note_seq.plot_sequence(note_seq.midi_to_note_sequence(pm))