

# Read files == np.loadtxt
# import pandas as pd, try pd.read_(click Tab)
##### Keywords: read files, matrix

In [2]:
%matplotlib inline
import numpy as np


## Read a .txt file. you can direct access it when it is in the same folder as your current running ipynb file

## numpy.loadtxt is a very simple reader. There are ways to make it do some advanced things like handle missing data or read non-numeric columns but they are all a bit tedious so this function is best used with well behaved tables.



## numpy.loadtxt has a couple of useful keywords. 
1. skiprows  -- keyword to skip header lines. By default numpy.loadtxt will split columns on white space 
1. delimiter -- you can specify other separators using the delimiter keyword. depending on the given file
1. usecols   -- If you want select only certain columns from the table use the usecols keyword. 

## First let's try to read file: weather_space.txt  

## this file gives the nicest table you can expect, and you can just simply load it:


In [3]:
weather = np.loadtxt('weather_space.txt')
print(weather)
print(type(weather) )
print(weather.shape)

[[ 0.5   0.25  0.25]
 [ 0.5   0.    0.5 ]
 [ 0.25  0.25  0.5 ]]
<class 'numpy.ndarray'>
(3, 3)


In [4]:
## weather_space.txt now only read in column 1 and 2 when you add keyword usecols = [1,2],
weather1 = np.loadtxt('weather_space.txt', usecols = [1,2])
print(weather1)
print(type(weather1) )
print(weather1.shape,'\n')

## weather_space.txt now only read in column 0, when you add keyword usecols = [0],
## But now, the result is just a 1-d array
weather1 = np.loadtxt('weather_space.txt', usecols = [0])
print(weather1)
print(type(weather1) )
print(weather1.shape)

[[ 0.25  0.25]
 [ 0.    0.5 ]
 [ 0.25  0.5 ]]
<class 'numpy.ndarray'>
(3, 2) 

[ 0.5   0.5   0.25]
<class 'numpy.ndarray'>
(3,)


## Now let's try to load another file weather_comma.txt
## np.loadtxt('weather_comma.txt') will NOT work!

In [5]:
# weather = np.loadtxt('weather_comma.txt')# does not work
weather = np.loadtxt('weather_comma.txt',delimiter=',')# does not work
print(weather)
print(type(weather) )
print(weather.shape)

[[ 0.5   0.25  0.25]
 [ 0.5   0.    0.5 ]
 [ 0.25  0.25  0.5 ]]
<class 'numpy.ndarray'>
(3, 3)


## Now let's try to load another file funny_weather.txt


In [6]:
#weather = np.loadtxt('funny_weather.txt',delimiter=',')# does not work because there are strings

weather = np.loadtxt('funny_weather.txt',delimiter=',',usecols=[1,2,3])
print(weather)
print(type(weather) )
print(weather.shape)

[[ 0.5   0.25  0.25]
 [ 0.5   0.    0.5 ]
 [ 0.25  0.25  0.5 ]]
<class 'numpy.ndarray'>
(3, 3)


# You can also skip rows

In [7]:
weather = np.loadtxt('funny_weather.txt',delimiter=',',usecols=[1,2,3],skiprows=2) # skip the top two rows
print(weather)
print(type(weather) )
print(weather.shape)

[ 0.25  0.25  0.5 ]
<class 'numpy.ndarray'>
(3,)


## Normally, people set a separate folder to store their data files, and you want to make a subfolder, inside your working directory to store data files

## In python, people store their data files in a subfolder  of your current working directory. you can run the following code to tell where your current file is
## i.e. the file path

In [8]:
import os

## store current working directory
cwd      = os.getcwd() 


## a universal standard is to store all data files in a subfolder 
## called "data" or "raw_data" or "blabla_data"

datapath = cwd + u'/data/'

# Now to access data stored in the subfolder data
## Now create a folder called "data" in your working folder (i.e. where this file is stored
## Then move all .txt data files into that folder
## You cannot access houston_weather.txt simply by :
## np.loadtxt('houston_weather.txt', delimiter=',')
## But by np.loadtxt(datapath+'houston_weather.txt', delimiter=',')


In [10]:
## You cannot access houston_weather.txt simply by np.loadtxt('houston_weather.txt', delimiter=',')
houston_weather = np.loadtxt(datapath+'houston_weather.txt', delimiter=',')
weather = np.loadtxt(datapath + 'weather_space.txt')

## np.matrix -- make an N by N 2d array into a matrix type so that you can truely do matrix operations!

In [11]:
weather = np.loadtxt(datapath + 'weather_space.txt')

In [12]:
print("This is NOT a matrix product, but entrywise squared!!!\n", weather**2)
print("\nThis is NOT a matrix product, but entrywise product!!!\n", weather*weather)
print("\nThis is NOT a matrix inverse, but entrywise inverse!!!\n", weather**(-1))

This is NOT a matrix product, but entrywise squared!!!
 [[ 0.25    0.0625  0.0625]
 [ 0.25    0.      0.25  ]
 [ 0.0625  0.0625  0.25  ]]

This is NOT a matrix product, but entrywise product!!!
 [[ 0.25    0.0625  0.0625]
 [ 0.25    0.      0.25  ]
 [ 0.0625  0.0625  0.25  ]]

This is NOT a matrix inverse, but entrywise inverse!!!
 [[  2.   4.   4.]
 [  2.  inf   2.]
 [  4.   4.   2.]]


  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
P = np.matrix(weather)
print("true matrix product: P times P\n", P**2)
print("\nweather times weather\n", weather**2)

true matrix product: P times P
 [[ 0.4375  0.1875  0.375 ]
 [ 0.375   0.25    0.375 ]
 [ 0.375   0.1875  0.4375]]

weather times weather
 [[ 0.25    0.0625  0.0625]
 [ 0.25    0.      0.25  ]
 [ 0.0625  0.0625  0.25  ]]
