In [3]:
from model import dynamic_vae

In [5]:
import torch.nn.utils.prune as prune
import torch
import torch.nn as nn

In [23]:
model_fold0 = torch.load("model_fold0.torch", weights_only=False, map_location='cpu')
model_fold1 = torch.load("model_fold1.torch", weights_only=False, map_location='cpu')
model_fold2 = torch.load("model_fold2.torch", weights_only=False, map_location='cpu')
model_fold3 = torch.load("model_fold3.torch", weights_only=False, map_location='cpu')
model_fold4 = torch.load("model_fold4.torch", weights_only=False, map_location='cpu')

In [24]:
import torch

# 모델 파일 리스트
model_files = [f"model_fold{i}.torch" for i in range(5)]

# 양자화 및 저장
for i, model_file in enumerate(model_files):
    print(f"🔄 Fold {i} 모델 불러오는 중...")
    model = torch.load(model_file, weights_only=False, map_location='cpu')

    print(f"⚙️  Fold {i} 모델 양자화 중...")
    quantized_model = torch.quantization.quantize_dynamic(
        model,
        {torch.nn.Linear, torch.nn.GRU},
        dtype=torch.qint8
    )

    quant_model_path = f"quantized_model_fold{i}.torch"
    torch.save(quantized_model, quant_model_path)
    print(f"✅ Fold {i} 양자화 완료 및 저장: {quant_model_path}\n")


🔄 Fold 0 모델 불러오는 중...
⚙️  Fold 0 모델 양자화 중...
✅ Fold 0 양자화 완료 및 저장: quantized_model_fold0.torch

🔄 Fold 1 모델 불러오는 중...
⚙️  Fold 1 모델 양자화 중...
✅ Fold 1 양자화 완료 및 저장: quantized_model_fold1.torch

🔄 Fold 2 모델 불러오는 중...
⚙️  Fold 2 모델 양자화 중...
✅ Fold 2 양자화 완료 및 저장: quantized_model_fold2.torch

🔄 Fold 3 모델 불러오는 중...
⚙️  Fold 3 모델 양자화 중...
✅ Fold 3 양자화 완료 및 저장: quantized_model_fold3.torch

🔄 Fold 4 모델 불러오는 중...
⚙️  Fold 4 모델 양자화 중...
✅ Fold 4 양자화 완료 및 저장: quantized_model_fold4.torch



In [25]:
import torch
import torch.nn as nn

def is_quantized_model(model):
    for name, module in model.named_modules():
        if 'quantized' in str(type(module)):
            print(f"[✓] Quantized layer found: {name} → {type(module)}")
            return True
    print("[✗] No quantized layers found.")
    return False


In [26]:
from torch.ao.nn.quantized.dynamic import GRU as QuantizedGRU

def check_quantized(model):
    for name, module in model.named_modules():
        if isinstance(module, QuantizedGRU):
            print(f"Quantized GRU found: {name}")
        elif 'quantized' in str(type(module)):
            print(f"Quantized layer found: {name} → {type(module)}")


In [28]:
# 파일 리스트
original_model_files = [f"model_fold{i}.torch" for i in range(5)]
quantized_model_files = [f"quantized_model_fold{i}.torch" for i in range(5)]

# 원본 모델 테스트
print("💡 [Original Models 테스트 시작]")
for i, model_file in enumerate(original_model_files):
    print(f"\n[Original Model Fold {i}]")
    model = torch.load(model_file, map_location='cpu', weights_only=False)
    is_quantized_model(model)
    check_quantized(model)


💡 [Original Models 테스트 시작]

[Original Model Fold 0]
[✗] No quantized layers found.

[Original Model Fold 1]
[✗] No quantized layers found.

[Original Model Fold 2]
[✗] No quantized layers found.

[Original Model Fold 3]
[✗] No quantized layers found.

[Original Model Fold 4]
[✗] No quantized layers found.


In [33]:
print("[Quantized Model Fold 0]")
model = torch.load("quantized_model_fold0.torch", map_location='cpu', weights_only=False)
is_quantized_model(model)
check_quantized(model)


[Quantized Model Fold 0]
[✓] Quantized layer found: encoder_rnn → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.GRU'>
Quantized GRU found: encoder_rnn
Quantized layer found: encoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: encoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized GRU found: decoder_rnn
Quantized layer found: decoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: decoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: hidden2mean → <class 'torch.ao.nn.quantized.dynamic.modules.linear.Linear'>
Quantized layer found: hidden2mean._packed_params → <class 'torch.ao.nn.quantized.modules.linear.LinearPackedParams'>
Quantized layer found: hidden2log_v → <class 'torch.ao.nn.quantized.dynamic.modules.linear.

In [29]:
print("[Quantized Model Fold 1]")
model = torch.load("quantized_model_fold1.torch", map_location='cpu', weights_only=False)
is_quantized_model(model)
check_quantized(model)


[Quantized Model Fold 1]
[✓] Quantized layer found: encoder_rnn → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.GRU'>
Quantized GRU found: encoder_rnn
Quantized layer found: encoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: encoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized GRU found: decoder_rnn
Quantized layer found: decoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: decoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: hidden2mean → <class 'torch.ao.nn.quantized.dynamic.modules.linear.Linear'>
Quantized layer found: hidden2mean._packed_params → <class 'torch.ao.nn.quantized.modules.linear.LinearPackedParams'>
Quantized layer found: hidden2log_v → <class 'torch.ao.nn.quantized.dynamic.modules.linear.

In [30]:
print("[Quantized Model Fold 2]")
model = torch.load("quantized_model_fold2.torch", map_location='cpu', weights_only=False)
is_quantized_model(model)
check_quantized(model)


[Quantized Model Fold 2]
[✓] Quantized layer found: encoder_rnn → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.GRU'>
Quantized GRU found: encoder_rnn
Quantized layer found: encoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: encoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized GRU found: decoder_rnn
Quantized layer found: decoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: decoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: hidden2mean → <class 'torch.ao.nn.quantized.dynamic.modules.linear.Linear'>
Quantized layer found: hidden2mean._packed_params → <class 'torch.ao.nn.quantized.modules.linear.LinearPackedParams'>
Quantized layer found: hidden2log_v → <class 'torch.ao.nn.quantized.dynamic.modules.linear.

In [31]:
print("[Quantized Model Fold 3]")
model = torch.load("quantized_model_fold3.torch", map_location='cpu', weights_only=False)
is_quantized_model(model)
check_quantized(model)


[Quantized Model Fold 3]
[✓] Quantized layer found: encoder_rnn → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.GRU'>
Quantized GRU found: encoder_rnn
Quantized layer found: encoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: encoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized GRU found: decoder_rnn
Quantized layer found: decoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: decoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: hidden2mean → <class 'torch.ao.nn.quantized.dynamic.modules.linear.Linear'>
Quantized layer found: hidden2mean._packed_params → <class 'torch.ao.nn.quantized.modules.linear.LinearPackedParams'>
Quantized layer found: hidden2log_v → <class 'torch.ao.nn.quantized.dynamic.modules.linear.

In [32]:
print("[Quantized Model Fold 4]")
model = torch.load("quantized_model_fold4.torch", map_location='cpu', weights_only=False)
is_quantized_model(model)
check_quantized(model)


[Quantized Model Fold 4]
[✓] Quantized layer found: encoder_rnn → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.GRU'>
Quantized GRU found: encoder_rnn
Quantized layer found: encoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: encoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized GRU found: decoder_rnn
Quantized layer found: decoder_rnn._all_weight_values.0 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: decoder_rnn._all_weight_values.1 → <class 'torch.ao.nn.quantized.dynamic.modules.rnn.PackedParameter'>
Quantized layer found: hidden2mean → <class 'torch.ao.nn.quantized.dynamic.modules.linear.Linear'>
Quantized layer found: hidden2mean._packed_params → <class 'torch.ao.nn.quantized.modules.linear.LinearPackedParams'>
Quantized layer found: hidden2log_v → <class 'torch.ao.nn.quantized.dynamic.modules.linear.

In [34]:
import torch
import torch.nn as nn
from torch.ao.nn.quantized.dynamic import GRU as QuantizedGRU
import pandas as pd
from IPython.display import display

def compare_models(model_fp32, model_quantized):
    fp32_layers = {name: type(module).__name__ for name, module in model_fp32.named_modules()}
    quant_layers = {name: type(module).__name__ for name, module in model_quantized.named_modules()}

    all_keys = sorted(set(fp32_layers.keys()) | set(quant_layers.keys()))
    rows = []

    for key in all_keys:
        fp32_type = fp32_layers.get(key, "-")
        quant_type = quant_layers.get(key, "-")
        changed = "✅" if fp32_type != quant_type else ""
        rows.append([key, fp32_type, quant_type, changed])

    df = pd.DataFrame(rows, columns=["Layer Name", "Original", "Quantized", "Changed"])
    display(df)

# 사용 예시
# model_fp32 = ... # 원래 모델 (양자화 전)
# model_quantized = ... # 양자화한 모델

# compare_models(model_fp32, model_quantized)


In [36]:
quantized_model_fold0 = torch.load("quantized_model_fold0.torch", weights_only=False, map_location='cpu')
quantized_model_fold1 = torch.load("quantized_model_fold1.torch", weights_only=False, map_location='cpu')
quantized_model_fold2 = torch.load("quantized_model_fold2.torch", weights_only=False, map_location='cpu')
quantized_model_fold3 = torch.load("quantized_model_fold3.torch", weights_only=False, map_location='cpu')
quantized_model_fold4 = torch.load("quantized_model_fold4.torch", weights_only=False, map_location='cpu')

In [37]:
compare_models(model_fold0, quantized_model_fold0)


Unnamed: 0,Layer Name,Original,Quantized,Changed
0,,DynamicVAE,DynamicVAE,
1,decoder_rnn,GRU,GRU,
2,decoder_rnn._all_weight_values,-,ModuleList,✅
3,decoder_rnn._all_weight_values.0,-,PackedParameter,✅
4,decoder_rnn._all_weight_values.1,-,PackedParameter,✅
5,encoder_rnn,GRU,GRU,
6,encoder_rnn._all_weight_values,-,ModuleList,✅
7,encoder_rnn._all_weight_values.0,-,PackedParameter,✅
8,encoder_rnn._all_weight_values.1,-,PackedParameter,✅
9,hidden2log_v,Linear,Linear,


In [38]:
compare_models(model_fold1, quantized_model_fold1)


Unnamed: 0,Layer Name,Original,Quantized,Changed
0,,DynamicVAE,DynamicVAE,
1,decoder_rnn,GRU,GRU,
2,decoder_rnn._all_weight_values,-,ModuleList,✅
3,decoder_rnn._all_weight_values.0,-,PackedParameter,✅
4,decoder_rnn._all_weight_values.1,-,PackedParameter,✅
5,encoder_rnn,GRU,GRU,
6,encoder_rnn._all_weight_values,-,ModuleList,✅
7,encoder_rnn._all_weight_values.0,-,PackedParameter,✅
8,encoder_rnn._all_weight_values.1,-,PackedParameter,✅
9,hidden2log_v,Linear,Linear,


In [39]:
compare_models(model_fold2, quantized_model_fold2)


Unnamed: 0,Layer Name,Original,Quantized,Changed
0,,DynamicVAE,DynamicVAE,
1,decoder_rnn,GRU,GRU,
2,decoder_rnn._all_weight_values,-,ModuleList,✅
3,decoder_rnn._all_weight_values.0,-,PackedParameter,✅
4,decoder_rnn._all_weight_values.1,-,PackedParameter,✅
5,encoder_rnn,GRU,GRU,
6,encoder_rnn._all_weight_values,-,ModuleList,✅
7,encoder_rnn._all_weight_values.0,-,PackedParameter,✅
8,encoder_rnn._all_weight_values.1,-,PackedParameter,✅
9,hidden2log_v,Linear,Linear,


In [40]:
compare_models(model_fold3, quantized_model_fold3)


Unnamed: 0,Layer Name,Original,Quantized,Changed
0,,DynamicVAE,DynamicVAE,
1,decoder_rnn,GRU,GRU,
2,decoder_rnn._all_weight_values,-,ModuleList,✅
3,decoder_rnn._all_weight_values.0,-,PackedParameter,✅
4,decoder_rnn._all_weight_values.1,-,PackedParameter,✅
5,encoder_rnn,GRU,GRU,
6,encoder_rnn._all_weight_values,-,ModuleList,✅
7,encoder_rnn._all_weight_values.0,-,PackedParameter,✅
8,encoder_rnn._all_weight_values.1,-,PackedParameter,✅
9,hidden2log_v,Linear,Linear,


In [41]:
compare_models(model_fold4, quantized_model_fold4)


Unnamed: 0,Layer Name,Original,Quantized,Changed
0,,DynamicVAE,DynamicVAE,
1,decoder_rnn,GRU,GRU,
2,decoder_rnn._all_weight_values,-,ModuleList,✅
3,decoder_rnn._all_weight_values.0,-,PackedParameter,✅
4,decoder_rnn._all_weight_values.1,-,PackedParameter,✅
5,encoder_rnn,GRU,GRU,
6,encoder_rnn._all_weight_values,-,ModuleList,✅
7,encoder_rnn._all_weight_values.0,-,PackedParameter,✅
8,encoder_rnn._all_weight_values.1,-,PackedParameter,✅
9,hidden2log_v,Linear,Linear,


In [42]:
import torch
import os
import tempfile

def get_model_size(model):
    """모델을 임시 파일로 저장하고 크기(바이트 단위)를 반환"""
    with tempfile.NamedTemporaryFile(delete=False) as tmp:
        torch.save(model.state_dict(), tmp.name)
        size = os.path.getsize(tmp.name)
    os.remove(tmp.name)
    return size

def compare_model_sizes(model_fp32, model_quantized):
    """모델 용량 비교 결과 출력"""
    size_fp32 = get_model_size(model_fp32)
    size_quant = get_model_size(model_quantized)
    reduction = 100 * (size_fp32 - size_quant) / size_fp32

    print(f" Float32 모델 크기     : {size_fp32 / 1e6:.2f} MB")
    print(f" Quantized 모델 크기   : {size_quant / 1e6:.2f} MB")
    print(f" 크기 감소율           : {reduction:.2f}%")

# 사용 예시
# compare_model_sizes(model_fp32, model_quantized)


In [43]:
compare_model_sizes(model_fold0, quantized_model_fold0)

 Float32 모델 크기     : 51.38 MB
 Quantized 모델 크기   : 12.94 MB
 크기 감소율           : 74.82%


In [44]:
compare_model_sizes(model_fold1, quantized_model_fold1)

 Float32 모델 크기     : 51.38 MB
 Quantized 모델 크기   : 12.94 MB
 크기 감소율           : 74.82%


In [45]:
compare_model_sizes(model_fold2, quantized_model_fold2)

 Float32 모델 크기     : 51.38 MB
 Quantized 모델 크기   : 12.94 MB
 크기 감소율           : 74.82%


In [46]:
compare_model_sizes(model_fold3, quantized_model_fold3)

 Float32 모델 크기     : 51.38 MB
 Quantized 모델 크기   : 12.94 MB
 크기 감소율           : 74.82%


In [47]:
compare_model_sizes(model_fold4, quantized_model_fold4)

 Float32 모델 크기     : 51.38 MB
 Quantized 모델 크기   : 12.94 MB
 크기 감소율           : 74.82%
