# Reading the magnetic anomaly data
We want to read the MAGIGRF column, which contains the IGRF reduced micronivelated magnetic field.

In [1]:
import zipfile

In [2]:
data = zipfile.ZipFile(r'../data/raw/1113_XYZ.zip', 'r')

In [3]:
import numpy as np

In [4]:
#Yield the data reading one line at a time
def iter_loadtxt(filename, comment='/'):
    def iter_func():
        with data.open(filename) as f:
            for line in f:
                line = line.decode("utf-8")

                if line.startswith(comment):
                    continue

                # Ignore the line number
                if 'Tie' in line or 'Line' in line:
                    continue

                # Split the line using \s+ as the pattern
                line = line.strip().split()
                
                # Ignore lines without measurements
                # '*' is the flag value for bad data
                if '*' in line[13]:
                    continue

                # Get X, Y and MAGIGRF and convert to float
                line = [np.float(line[i]) for i in [0,1,13]]
                
                for item in line:
                    yield item
  
        iter_loadtxt.rowlength = 3

    mag = np.fromiter(iter_func(), dtype=np.float)
    mag = mag.reshape((-1, iter_loadtxt.rowlength))
    return mag

In [5]:
%%time
# These files are quite big, so they may take a couple minutes to read.
mag =  iter_loadtxt('XYZ/1113_MagTie.XYZ')
mag =  np.append(mag, iter_loadtxt('XYZ/1113_MagLine.XYZ'), axis=0)

CPU times: user 3min 14s, sys: 1.81 s, total: 3min 16s
Wall time: 3min 18s


In [6]:
# Checking the array
mag

array([[6.078430e+05, 8.232313e+06, 1.025800e+01],
       [6.078370e+05, 8.232313e+06, 1.056800e+01],
       [6.078310e+05, 8.232313e+06, 1.089400e+01],
       ...,
       [6.091890e+05, 8.506503e+06, 1.440700e+01],
       [6.091890e+05, 8.506496e+06, 1.464200e+01],
       [6.091890e+05, 8.506490e+06, 1.488400e+01]])

In [7]:
print('Number of points: {}. Size in memory: {:.2f} MB'.format(mag.shape[0], mag.nbytes/1e6))

Number of points: 31515249. Size in memory: 756.37 MB


In [8]:
data.close()

## Saving a converted file to disk
For simplicity I'll write a interim file using [numpy native format NPY](https://www.numpy.org/devdocs/reference/generated/numpy.lib.format.html).

In [9]:
import os

In [10]:
# Create output directory if it does not exist
directory = '../data/interim/'
if not os.path.exists(directory):
    os.makedirs(directory)

In [11]:
np.save('../data/interim/MAGIGRF.npy', mag)