# Reading and Writing Data

## A) mat files (scipy.io)

import numpy as np
from scipy.io import loadmat as loadmat #this is the scipy module that loads .mat files
from scipy.io import savemat as savemat #this is the scipy module that saves .mat files

uri = 'http://www.karensmith.squarespace.com/storage/python_test.mat' #can't seem to get loadmat to work with a uri

matfile = loadmat('python_test.mat')  # load .mat file

array1 = matfile['array1']
array2 = matfile['array2']

array1.shape

type(array1)

savemat('python_test_save.mat',{'array1':array1}) #save numpy array to .mat file

## B) csv, txt, xls with pandas

In [None]:
import pandas as pd #great for reading .csv and .txt files

uri1 = 'http://www.ldeo.columbia.edu/~rpa/usgs_earthquakes_2014.csv' #example from Ryan's worskshop

d1 = pd.read_csv(uri1,index_col='time') #many argument options (see pandas website for all the details)

type(d1)

d1.head()

uri2 = 'http://karensmith.squarespace.com/storage/python_test.csv'

d2 = pd.read_csv(uri2) #default case (no arguments)

d2.head()

d2 = pd.read_csv(uri2,index_col=1) #can pass arguments to specify column order

print(d2)

uri3 = 'http://karensmith.squarespace.com/storage/python_test.xls'

d3 = pd.read_excel(uri3) #pandas can also be used to read .xls files

d3.head()

d1.to_csv('earthquakes_test.csv') #writing our d1 DataFrame object to a .csv file

d2.to_excel('new_python_test.xls', sheet_name='Sheet1') #writing our d2 DataFrame object to a .xls file

## C) netcdf4

In [None]:
from netCDF4 import Dataset

uri = 'http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCDC/.ERSST/.version4/anom/.sst/T/(days%20since%201960-01-01)/streamgridunitconvert/dods'

#use 'Dataset' to read file as netcdf4
nc = Dataset(uri)

nc

SST = nc.variables['sst'][:,0] #this is the same as ['sst'[:,0,:,:]] -> gets rid of a degenerate dimension (same as squeeze in matlab)
Lat = nc.variables['Y'][:]
Lon = nc.variables['X'][:]

#write a new netcdf file
new_nc = Dataset('python_test.nc', 'w', format='NETCDF3_CLASSIC')
new_nc.description = 'Example data'

# define dimensions
new_nc.createDimension('time', None) #record dimension
new_nc.createDimension('lat', 72)
new_nc.createDimension('lon', 144)

# define variables
times = new_nc.createVariable('time', 'f8', ('time',))
latitudes = new_nc.createVariable('latitude', 'f4', ('lat',))
longitudes = new_nc.createVariable('longitude', 'f4', ('lon',))
tmp = new_nc.createVariable('tmp', 'f4', ('time', 'lat', 'lon',))

# allocate data
lats =  np.arange(-90, 90, 2.5)
lons =  np.arange(-180, 180, 2.5)
latitudes[:] = lats
longitudes[:] = lons
for i in range(5):
    tmp[i,:,:] = np.random.uniform(size=(len(lats), len(lons))) #default uniform distribution between 0 and 1

new_nc.close()

## D) netcdf3

In [None]:
from scipy.io import netcdf #scipy.io can only read/write netcdf3

#read the file we just created above using netcdf4
f = netcdf.netcdf_file('python_test.nc', 'r')
f


print(f.description)
lat = f.variables['latitude']
print(lat.shape)
print(lat[:])

f.close()
#data has to be copied to main memory if we want to process data after we close the netcdf file (see message below)

## E) hdf5

In [None]:
import h5py

## Using numpy to import regular columns of data from .CSV file
signal = numpy.loadtxt(file_location_and_name, delimiter=',')