In [1]:
import pandas as pd
import numpy as np

# 100만 행의 연도 데이터 생성
years = np.random.randint(1900, 2025, size=1_000_000)
df = pd.DataFrame({'year': years})

# 메모리 사용량 비교
print("Original (int64):", df['year'].memory_usage(deep=True) / 1024 / 1024, "MB")

df['year_int16'] = df['year'].astype('Int16')
print("Int16:", df['year_int16'].memory_usage(deep=True) / 1024 / 1024, "MB")

df['year_int32'] = df['year'].astype('Int32')
print("Int32:", df['year_int32'].memory_usage(deep=True) / 1024 / 1024, "MB")

Original (int64): 7.629520416259766 MB
Int16: 2.8611488342285156 MB
Int32: 4.768497467041016 MB


In [2]:
import pandas as pd
import numpy as np

# 다양한 타입의 컬럼들 사이에 연도 데이터 배치
df = pd.DataFrame({
    'text_before': ['some_text'] * 1_000_000,     
    'float_before': np.random.random(1_000_000),  
    'year': np.random.randint(1900, 2025, size=1_000_000),
    'float_after': np.random.random(1_000_000),   
    'text_after': ['other_text'] * 1_000_000      
})

# 초기 상태
print("\n1. 초기 상태 (컬럼별):")
for col in df.columns:
    print(f"{col}: {df[col].dtype},", df[col].memory_usage(deep=True) / 1024 / 1024, "MB")
print("Total:", df.memory_usage(deep=True).sum() / 1024 / 1024, "MB")

# year를 Int16으로 변경
df['year'] = df['year'].astype('Int16')
print("\n2. year를 Int16으로 변경 후 (컬럼별):")
for col in df.columns:
    print(f"{col}: {df[col].dtype},", df[col].memory_usage(deep=True) / 1024 / 1024, "MB")
print("Total:", df.memory_usage(deep=True).sum() / 1024 / 1024, "MB")

# year를 Int32로 변경
df['year'] = df['year'].astype('Int32')
print("\n3. year를 Int32로 변경 후 (컬럼별):")
for col in df.columns:
    print(f"{col}: {df[col].dtype},", df[col].memory_usage(deep=True) / 1024 / 1024, "MB")
print("Total:", df.memory_usage(deep=True).sum() / 1024 / 1024, "MB")


1. 초기 상태 (컬럼별):
text_before: object, 55.313236236572266 MB
float_before: float64, 7.629520416259766 MB
year: int64, 7.629520416259766 MB
float_after: float64, 7.629520416259766 MB
text_after: object, 56.266910552978516 MB
Total: 134.46820449829102 MB

2. year를 Int16으로 변경 후 (컬럼별):
text_before: object, 55.313236236572266 MB
float_before: float64, 7.629520416259766 MB
year: Int16, 2.8611488342285156 MB
float_after: float64, 7.629520416259766 MB
text_after: object, 56.266910552978516 MB
Total: 129.69983291625977 MB

3. year를 Int32로 변경 후 (컬럼별):
text_before: object, 55.313236236572266 MB
float_before: float64, 7.629520416259766 MB
year: Int32, 4.768497467041016 MB
float_after: float64, 7.629520416259766 MB
text_after: object, 56.266910552978516 MB
Total: 131.60718154907227 MB
