In [15]:
import pandas as pd
import numpy as np

In [16]:
# Example 1: Generate a large DataFrame with random data
np.random.seed(42)
num_rows = 10000
data = {
    'A': np.random.randint(1, 100, num_rows),
    'B': np.random.rand(num_rows),
    'C': np.random.choice(['Category1', 'Category2', 'Category3'], num_rows),
    'D': pd.date_range(start='2023-01-01', periods=num_rows, freq='D')
}
df = pd.DataFrame(data)
print("Example 1:")
print(df.memory_usage(deep=True).sum() / (1024 ** 2))  # Memory usage in MB

Example 1:
0.8202857971191406


In [17]:
# Example 2: Convert data types to reduce memory usage
df['A'] = df['A'].astype('int32')
df['B'] = df['B'].astype('int16')
print("\nExample 2:")
print(df.memory_usage(deep=True).sum() / (1024 ** 2))  # Memory usage in MB



Example 2:
0.7630653381347656


In [18]:
# Example 3: Use 'parse_dates' parameter when reading CSV
df.to_csv('large_data.csv', index=False)
df_csv = pd.read_csv('large_data.csv', parse_dates=['D'])
print("\nExample 3:")
print(df_csv.memory_usage(deep=True).sum() / (1024 ** 2))  # Memory usage in MB



Example 3:
0.8584327697753906


In [19]:
# Example 4: Reduce memory usage with chunking
chunk_size = 1000000
df_chunk = pd.read_csv('large_data.csv', parse_dates=['D'], chunksize=chunk_size)
total_memory_usage = 0
for chunk in df_chunk:
    total_memory_usage += chunk.memory_usage(deep=True).sum()
print("\nExample 4:")
print(total_memory_usage / (1024 ** 2))  # Memory usage in MB



Example 4:
0.8584327697753906
