In [64]:
import psutil, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Memory Usage

Memory function that returns memory being used by a Python Process

In [3]:
def memory_footprint():
    '''Returns memory (in MB) being used by Python process'''
    mem = psutil.Process(os.getpid()).memory_info().rss
    return (mem / 1024 ** 2)

In [4]:
before = memory_footprint()

In [6]:
N = (1024 ** 2) // 8 # Number of floats that fill 1 MB
x = np.random.randn(50*N) # Random array filling 50 MB

In [7]:
after = memory_footprint()

In [8]:
print('Memory before: {} MB'.format(before))

Memory before: 61.86328125 MB


In [9]:
print('Memory after: {} MB'.format(after))

Memory after: 112.5625 MB


In [10]:
before = memory_footprint()

In [11]:
x ** 2 # Computes, but doesn't bind results to a variable

array([0.33918565, 0.03886872, 0.23503756, ..., 0.19850324, 0.03636918,
       0.24840848])

In [12]:
after = memory_footprint()

In [13]:
print('Extra memory obtained: {} MB'.format(after - before))

Extra memory obtained: 50.296875 MB


In [14]:
x.nbytes

52428800

In [15]:
x.nbytes // (1024 ** 2)

50

In [17]:
df = pd.DataFrame(x)

In [18]:
df.memory_usage(index=False)

0    52428800
dtype: int64

In [20]:
df.memory_usage(index=False) // (1024 ** 2)

0    50
dtype: int64

### Data in Chunks

In [23]:
dfs = []

In [53]:
for chunk in pd.read_csv('WDIData.csv', chunksize=1000):
    is_urban = chunk['Indicator Name'] == 'Urban population (% of tatal)'
    is_AUS = chunk['Country Code'] == 'AUS'
    filtered = chunk.loc[is_AUS & is_urban]
    dfs.append(filtered)

In [67]:
len(dfs)

1692

In [68]:
print(df[:])

      Country Name Country Code  \
91143    Australia          AUS   
91144    Australia          AUS   
91145    Australia          AUS   
91146    Australia          AUS   
91147    Australia          AUS   
91148    Australia          AUS   
91149    Australia          AUS   
91150    Australia          AUS   
91151    Australia          AUS   
91152    Australia          AUS   
91153    Australia          AUS   
91154    Australia          AUS   
91155    Australia          AUS   
91156    Australia          AUS   
91157    Australia          AUS   
91158    Australia          AUS   
91159    Australia          AUS   
91160    Australia          AUS   
91161    Australia          AUS   
91162    Australia          AUS   
91163    Australia          AUS   
91164    Australia          AUS   
91165    Australia          AUS   
91166    Australia          AUS   
91167    Australia          AUS   
91168    Australia          AUS   
91169    Australia          AUS   
91170    Australia  

In [69]:
df = pd.concat(dfs)

In [70]:
df.plot.line(x='Year', y='value')
plt.ylabel('% Urban population')
plt.show()

KeyError: 'Year'