<a href="https://colab.research.google.com/github/plus2net/numpy/blob/main/numpy_1_file_io.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![alt text](https://www.plus2net.com/images/top2.jpg)        Read more on [File io ](https://www.plus2net.com/python/numpy-file-io.php) | [ Numpy ](https://www.plus2net.com/python/numpy.php)

In [None]:
import numpy as np

X = np.arange(12).reshape(3,4)

# Save a single array
np.save('data.npy', X)

# Load it back
X2 = np.load('data.npy')
print(X2.shape, X2.dtype)

# Save multiple arrays to .npz (named)
y = np.linspace(0, 1, 5)
w = np.array([1.0, 0.5, 2.0, 1.5])
np.savez('bundle.npz', features=X, target=y, weights=w)

# Or compressed
np.savez_compressed('bundle_compressed.npz', X=X, y=y, w=w)

# Load .npz (acts like a dict-like object)
data = np.load('bundle.npz')
print(list(data.keys()))     # ['features', 'target', 'weights']
print(data['features'])

(3, 4) int64
['features', 'target', 'weights']
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [None]:
A = np.array([[1.5, 2.0, 3.25],
              [4.0, 5.0, 6.125]])

# Save as CSV with header, 3 decimals
np.savetxt('matrix.csv', A, delimiter=',', fmt='%.3f',
           header='col1,col2,col3', comments='')

# Load back (skip header)
B = np.loadtxt('matrix.csv', delimiter=',', skiprows=1)
print(B)

[[1.5   2.    3.25 ]
 [4.    5.    6.125]]


In [None]:
# Create a dummy data_utf8.txt file
with open('data_utf8.txt', 'w', encoding='utf-8') as f:
    f.write('# This is a comment\n')
    f.write('1,2,3\n')
    f.write('4,5,6\n')
    f.write('7,8,9\n')

print("Created data_utf8.txt")

Created data_utf8.txt


In [None]:
from io import StringIO
import numpy as np

csv = StringIO("""# id, height, weight
1,170,65
2,,-1
3,160,58
""")

arr = np.genfromtxt(
    csv, delimiter=',', skip_header=1,
    dtype=[('id','i4'),('height','f8'),('weight','f8')],
    missing_values=['', '-1'], filling_values=np.nan
)
print(arr['height'])  # [170.  nan 160.]
print(arr['weight'])  # [65. nan 58.]

[170.  nan 160.]
[65. -1. 58.]


In [None]:
# Create a large memmap and write in chunks
m = np.memmap('bigdata.dat', dtype='float32', mode='w+', shape=(10000, 1000))
m[0:1000] = np.random.rand(1000, 1000).astype('float32')
del m  # flush to disk

# Read-only mapping later
m2 = np.memmap('bigdata.dat', dtype='float32', mode='r', shape=(10000, 1000))
print(m2[5000:5005, 100:110])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [None]:
# Create a dummy heights.csv file with units
with open('heights.csv', 'w') as f:
    f.write('id,height,weight\n')
    f.write('1,170cm,65kg\n')
    f.write('2,180cm,75kg\n')
    f.write('3,165cm,60kg\n')

print("Created heights.csv")

Created heights.csv


In [None]:
# Encoding & comments
arr = np.loadtxt('data_utf8.txt', delimiter=',', comments='#', encoding='utf-8')

# Custom converters (strip units like 'cm' and 'kg')
conv = {1: lambda s: float(s.replace('cm','')),
        2: lambda s: float(s.replace('kg',''))}
arr2 = np.loadtxt('heights.csv', delimiter=',', converters=conv, skiprows=1)

In [None]:
# 1) Save a (100, 5) float array to 'scores.npy' and load it back.
X = np.random.rand(100, 5)
np.save('scores.npy', X)
print(np.load('scores.npy').shape)

# 2) Write a (3,3) matrix to CSV with 2 decimals and a header row, then load it.
M = np.arange(9).reshape(3,3) / 3
np.savetxt('M.csv', M, fmt='%.2f', delimiter=',', header='a,b,c', comments='')
print(np.loadtxt('M.csv', delimiter=',', skiprows=1))

# 3) Use genfromtxt to read a CSV with missing values into floats, fill NaNs with column means.
arr = np.genfromtxt('gaps.csv', delimiter=',', skip_header=1)
col_means = np.nanmean(arr, axis=0)
inds = np.where(np.isnan(arr))
arr[inds] = np.take(col_means, inds[1])
print(arr)

# 4) Create a memmap to a large file, write a slice, and read it back.
mm = np.memmap('blk.dat', dtype='float64', mode='w+', shape=(2000, 2000))
mm[100:200] = 1.23
del mm
mm2 = np.memmap('blk.dat', dtype='float64', mode='r', shape=(2000, 2000))
print(mm2[150, 150])

(100, 5)
[[0.   0.33 0.67]
 [1.   1.33 1.67]
 [2.   2.33 2.67]]
[[ 1.    9.25  3.  ]
 [ 4.    5.    6.  ]
 [ 7.    8.    8.75]
 [ 6.25 10.   11.  ]
 [13.   14.   15.  ]]
1.23


In [None]:
# Create a dummy gaps.csv file for demonstration
with open('gaps.csv', 'w') as f:
    f.write('col1,col2,col3\n')
    f.write('1.0,,3.0\n')
    f.write('4.0,5.0,6.0\n')
    f.write('7.0,8.0,\n')
    f.write(',10.0,11.0\n')
    f.write('13.0,14.0,15.0\n')

print("Created gaps.csv")

Created gaps.csv


In [None]:
from pathlib import Path

p = Path('data') / 'sample_data.npy'
np.save(p, np.arange(5))
print(np.load(p))

FileNotFoundError: [Errno 2] No such file or directory: 'data/sample_data.npy'