# An sample to finetune wave2lip on SageMaker

In [None]:
## Update sagemaker python sdk version
!pip install -U sagemaker

In [None]:
import sagemaker
import boto3
from sagemaker import get_execution_role

sess = sagemaker.Session()
role = get_execution_role()
sagemaker_default_bucket = sess.default_bucket()

account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name

In [None]:
## download training script from github
!rm -rf ./wav2lip
!git clone https://github.com/Rudrabha/Wav2Lip.git

## Download pretrained model(expert Discriminator & face detect) and upload to s3

In [None]:
!cd ./wav2lip/face_detection && wget https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth
!cd ./wav2lip/models && wget https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/EQRvmiZg-HRAjvI6zqN9eTEBP74KefynCwPWVmF57l-AYA?e=ZRPHKP

In [None]:
!chmod +x ./s5cmd
!./s5cmd sync ./wav2lip/ s3://${sagemaker_default_bucket}/models/wav2lip/ 

## Prepare docker image

In [None]:
%%writefile Dockerfile
## You should change below region code to the region you used, here sample is use us-west-2
From 763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04 
#From pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime

ENV LANG=C.UTF-8
ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE

RUN apt-get install ffmpeg


In [None]:
## You should change below region code to the region you used, here sample is use us-west-2
!aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com

**Build image and push to ECR.**

In [None]:
## define repo name, should contain *sagemaker* in the name
repo_name = "sagemaker-wav2lip-demo"

In [None]:
%%script env repo_name=$repo_name bash

#!/usr/bin/env bash

# This script shows how to build the Docker image and push it to ECR to be ready for use
# by SageMaker.

# The argument to this script is the image name. This will be used as the image on the local
# machine and combined with the account and region to form the repository name for ECR.
# The name of our algorithm
algorithm_name=${repo_name}

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

## Train!

use s3 dataset path, which is aligned with data_root of wav2lip 
should be like:
data_root (mvlrs_v1)
├── main, pretrain (we use only main folder in this work)
|	├── list of folders
|	│   ├── five-digit numbered video IDs ending with (.mp4)

In [None]:
%%writefile train.sh
#!/bin/bash

chmod +x ./s5cmd
#./s5cmd sync  s3://${sagemaker_default_bucket}/models/wav2lip/ ./wav2lip/
pip install -r ./wav2lip/requirements.txt

###data process
python ./wav2lip/preprocess.py --data_root /opt/ml/data/inputs/data_root/main --preprocessed_root /tmp/lrs2_preprocessed/
###train the expert discriminator
python ./wav2lip/color_syncnet_train.py --data_root /tmp/lrs2_preprocessed/ --checkpoint_dir /tmp/trained_syncnet/
###train wav2lip 
python ./wav2lip/hq_wav2lip_train.py --data_root /tmp/lrs2_preprocessed/ --checkpoint_dir /tmp/trained_wav2lip/ --syncnet_checkpoint_path /tmp/trained_syncnet/

./s5cmd sync /tmp/trained_wav2lip/ s3://$MODEL_S3_BUCKET/models/wav2lip/output/$(date +%Y-%m-%d-%H-%M-%S)/

In [None]:
## The image uri which is build and pushed above
image_uri = "{}.dkr.ecr.{}.amazonaws.com/{}:latest".format(account, region, repo_name)
image_uri

In [None]:
## set train_data_path to your training dataset path in s3
train_data_path = f's3://{sagemaker_default_bucket}/wav2lip/train_data/'

inputs = {'data_root': train_data_path}

In [None]:
import time
from sagemaker.estimator import Estimator

environment = {
              'MODEL_S3_BUCKET': sagemaker_default_bucket # The bucket to store pretrained model and fine-tune model
}

base_job_name = 'wav2lip-demo'         

instance_type = 'ml.p4d.24xlarge'

estimator = Estimator(role=role,
                      entry_point='train.sh',
                      source_dir='./',
                      base_job_name=base_job_name,
                      instance_count=1,
                      instance_type=instance_type,
                      image_uri=image_uri,
                      environment=environment,
                      disable_profiler=True,
                      debugger_hook_config=False,
                      max_run=24*60*60*2)

estimator.fit(inputs)

You could find the model path in S3 from above logs.