In [1]:
from nose.tools import assert_equal, assert_true
from numpy.testing import assert_array_equal, assert_almost_equal
import numpy as np

In [2]:
!head ~/airports.csv

"iata","airport","city","state","country","lat","long"
"00M","Thigpen ","Bay Springs","MS","USA",31.95376472,-89.23450472
"00R","Livingston Municipal","Livingston","TX","USA",30.68586111,-95.01792778
"00V","Meadow Lake","Colorado Springs","CO","USA",38.94574889,-104.5698933
"01G","Perry-Warsaw","Perry","NY","USA",42.74134667,-78.05208056
"01J","Hilliard Airpark","Hilliard","FL","USA",30.6880125,-81.90594389
"01M","Tishomingo County","Belmont","MS","USA",34.49166667,-88.20111111
"02A","Gragg-Wade ","Clanton","AL","USA",32.85048667,-86.61145333
"02C","Capitol","Brookfield","WI","USA",43.08751,-88.17786917
"02G","Columbiana County","East Liverpool","OH","USA",40.67331278,-80.64140639


In [3]:
my_array = np.loadtxt('airports.csv', dtype=bytes,delimiter=",",usecols=(1,3,5)).astype("U")

In [4]:
my_array=np.core.defchararray.strip(my_array,'"')

In [27]:
new_array=my_array[my_array[:,1]=='CA']
print(new_array[np.argmin(new_array[:, 2]),0])

Brown  Municipal


## Problem 1

In [43]:
def north_south_airports(state):
    '''
    Finds the northernmost and southernmost airports from data.csv
    
    Parameters
    ----------
    state: the two-letter state code as given in the data.csv file
    
    Returns
    -------
    a 1-d numpy array of the form array(['Northernmost', 'Southernmost'])
    '''
    
    new_array=my_array[my_array[:,1]==state]
    north=new_array[np.argmax(new_array[:, 2]),0]
    south=new_array[np.argmin(new_array[:, 2]),0]
    return np.array([north, south])

In [49]:
north_south_airports('IL')

array(['Waukegan Regional', 'Cairo'], 
      dtype='<U17')

In [51]:
north_south_airports('HI')

array(['Princeville', 'Hilo International'], 
      dtype='<U18')

In [52]:
# assert tests for California
CA_airpts = north_south_airports("CA")
assert_equal(CA_airpts.shape, (2, ))
CA_airpts_list = [x.strip() for x in CA_airpts]
assert_true('Tulelake Municipal' in CA_airpts_list[0])
assert_true('Brown  Municipal' in CA_airpts_list[1])
# assert tests for Missouri
MO_airpts = north_south_airports("MO")
assert_equal(MO_airpts.shape, (2, ))
MO_airpts_list = [x.strip() for x in MO_airpts]
assert_true('Memphis Memorial' in MO_airpts_list[0])
assert_true('Caruthersville Memorial' in MO_airpts_list[1])

## Problem 2

In [50]:
def create_design_matrix(x):
    '''
    Creates a design matrix for use in regression 
    
    Parameters
    ----------
    x: a 1-d numpy array
    
    Returns
    -------
    a 2-d numpy array with 2 columns, the first is a column of 1's and
    the second is the original array x
    '''

    b=np.ones((x.shape[0],1))
    d=x.reshape(x.shape[0],1)
    c=np.hstack((b,d))
    return c

In [52]:
des_mtx = create_design_matrix(np.array([1.1,2.2,3.3]))
assert_array_equal(des_mtx, np.array([[1., 1.1], [1., 2.2], [1., 3.3]]))

### Problem 3

In [64]:
def slr(x, y):
    '''
    Estimates beta coefficients for a simple linear regression
    
    Parameters
    ----------
    x: a 1-d numpy array
    y: a 1-d numpy array
    
    Returns
    -------
    a 1-d numpy array with 2 elements, [beta0, beta1]
    '''

    e=create_design_matrix(x)
    i=np.dot(e.T,e)     
    j=np.linalg.inv(i)
    k=np.dot(e.T,y)
    result=np.dot(j,k) 
    return result

In [66]:
x = np.linspace(-20, 60, 10000)
# create true y values as a function of x and add random noise
y = 5 + 5/22 * x + np.random.randn(len(x))
# create the beta coefficient from the data
beta=slr(x, y)
print('Regression Report')
print('-'*30)
print('True beta0 = ', 5)
print("Pred beta0 = ", beta[0])
print('True beta1 = ', 5/22)
print("Pred beta1 = ", beta[1])
print('True ice cream sales for x=50: ', 5 + 5/22 * 50)
print('Pred ice cream sales for x=50: ', np.dot(beta, np.array([1, 50.])))
print('Error:', 5 + 5/22 * 50 - np.dot(beta, np.array([1, 50.])))

Regression Report
------------------------------
True beta0 =  5
Pred beta0 =  4.9828157012
True beta1 =  0.22727272727272727
Pred beta1 =  0.228116394422
True ice cream sales for x=50:  16.363636363636363
Pred ice cream sales for x=50:  16.3886354223
Error: -0.0249990586625


In [68]:
#create x and y with random noise
np.random.seed(1000)
x = np.random.randn(10000)
y = 3.4 + 1.3 * x + np.random.randn(10000)
# compute the beta vector
beta = slr(x, y)
# check that the true beta is close to the estimated beta
assert_almost_equal(np.array([3.4, 1.3]), beta, decimal=1)

# try another b0 and b1
y = 101.4 + 1176.1 * x + np.random.randn(10000)
# compute the beta vector
beta = slr(x, y)
# check that the true beta is close to the estimated beta
assert_almost_equal(np.array([101.4, 1176.1]), beta, decimal=1)