# Numpy Reference Guide 

Sources: 

http://www.engr.ucsb.edu/~shell/che210d/numpy.pdf

https://github.com/hallr/DAT_SF_19/blob/master/code/04_numpy.py

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

From here on, np can be used for numpy and plt for matplotlib.pyplot

# Lists, Arrays, List of Lists

In [2]:
data1 = [1,2,2.5,4]    #This is a simple list
print(data1)

[1, 2, 2.5, 4]


In [3]:
data1[2]  #refers to the 3rd element of data1

2.5

In [4]:
array1 = np.array(data1)      #we just created a 1d list
print(array1)

[ 1.   2.   2.5  4. ]


In [5]:
array1[1]       #refers to the second element of array1

2.0

In [6]:
data2 = 2*data1     
print(data2)  # pay attention to output

[1, 2, 2.5, 4, 1, 2, 2.5, 4]


In [8]:
array2 = 2*array1
print(array2)   # pay attention to output

[ 2.  4.  5.  8.]


In [9]:
ArrayToList = array2.tolist()  #that is how you convert arrays to list
print(ArrayToList) # pay attention to commas

[2.0, 4.0, 5.0, 8.0]


In [10]:
data3 = [[1,4,7],range(1,4)]  #list of lists - look at the output of range()
print(data3)

[[1, 4, 7], [1, 2, 3]]


In [11]:
data3[1][2]

3

In [12]:
data3[0]

[1, 4, 7]

In [13]:
data3[1:]

[[1, 2, 3]]

In [14]:
y = data3[1:]
print(y[0][1])

2


In [15]:
array2 = np.array(data3)
print(array2)

[[1 4 7]
 [1 2 3]]


In [16]:
array2[1][2]   #alternative command is array2[1,2]

3

In [17]:
print(2*data3)  #pay attention to output

[[1, 4, 7], [1, 2, 3], [1, 4, 7], [1, 2, 3]]


In [18]:
print(2*array2) #look out the output of array

[[ 2  8 14]
 [ 2  4  6]]


In [19]:
print(array2[1,2])

3


In [20]:
array3 = array2[:,0]   #First Column is returned
print(array3)

[1 1]


In [21]:
array4 = array2[0,:]   #First Row is returned
print(array4)

[1 4 7]


In [22]:
array5 = array2[0, 0:2] #the first two elements of the first row is returned
print(array5)

[1 4]


# Examining Arrays

In [23]:
print(array2.dtype)   #Returns int64 since all elements of array are integer numbers
print(array2.ndim)    #it is a 2-dimentional array - Rows and Columns
array1d = np.array([1,2,3])
print(array1d.ndim)   #this was a 1-dementional array thus the output is 1
print(array2.shape)   #returns (2,3) - there are 2 rows and 3 columns array2.shape[0] is number of rows
print(array2.size)    #returns 6 (total number of elements in this array 2*3 = 6)
print(len(array2))    #returns number of rows i.e. 2 - this is usually number of observations

int64
2
1
(2, 3)
6
2


# Some handy short-cuts to create special arrays

In [24]:
x1 = np.zeros(5) #it creates a one-dimentional array with 5 elements all equal to zero
print("x1 = ")
print(x1)
x2 = np.zeros((2, 4)) #it creates a two-dimentional array with 2 rows and 4 columns. All elements are set to zero
print("x2 = ")
print(x2)
x3 = np.ones(6)
print("x3 = ") #it creates a one-dimentional array with 5 elements all equal to one
print(x3)
x4 = np.linspace(0,6,4) #it generates 4 equally distanced points from 0 to 6 (inclusive) - i.e. 0,2,4,6
print("x4 = ")
print(x4)
x5 = np.logspace(1,3,4) #on logarithmic scale, it generates 4 equally distanced points from 10^1 to 10^3 (inclusive)
print("x5 = ")
print(x5)
x6 =np.arange(5) #it generates a one dimentional array with 5 elements starting from 0 
print("x6 = ")
print(x6)
x7 = x6.astype(float)  #we just change array type from Integer to Float
print("x7 = ")
print(x7)

x1 = 
[ 0.  0.  0.  0.  0.]
x2 = 
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
x3 = 
[ 1.  1.  1.  1.  1.  1.]
x4 = 
[ 0.  2.  4.  6.]
x5 = 
[   10.            46.41588834   215.443469    1000.        ]
x6 = 
[0 1 2 3 4]
x7 = 
[ 0.  1.  2.  3.  4.]


# Logical expressions and Boolean Arrays

In [25]:
cities = np.array(['SF', 'Seattle', 'SF', 'NY'])
Boolean1 = cities == 'SF'   # Pay attention to the difference between = and ==
print("Boolean1 = ")
print(Boolean1) #returns True and False
print(cities[~Boolean1]) #returns all cities that are not SF - i.e. 'Seattle' and 'NY'
Boolean2 = (cities == 'SF') | (cities == 'NY')
print("Boolean2 = ")
print(Boolean2) #returns true for elements that are either equal to 'SF' or 'NY'
print(cities[~Boolean2])  #returns 'Seattle'
print(np.unique(cities)) #returns unique values in this array - i.e. 'NY' 'SF' and 'Seattle'

Boolean1 = 
[ True False  True False]
['Seattle' 'NY']
Boolean2 = 
[ True False  True  True]
['Seattle']
['NY' 'SF' 'Seattle']


# Mathematical and Statistical Operations 

In [26]:
ArrayTest = np.arange(10)
print(ArrayTest*10)  # multiplies each element by 10
ArrayTest = ArrayTest ** 2.5 # Array Test to the power of 2.5
print(ArrayTest)

[ 0 10 20 30 40 50 60 70 80 90]
[   0.            1.            5.65685425   15.58845727   32.
   55.90169944   88.18163074  129.64181424  181.01933598  243.        ]


In [28]:
np.rint(ArrayTest)     #round into the nearest integer 

array([   0.,    1.,    6.,   16.,   32.,   56.,   88.,  130.,  181.,  243.])

In [29]:
np.ceil(ArrayTest)   #round up

array([   0.,    1.,    6.,   16.,   32.,   56.,   89.,  130.,  182.,  243.])

In [30]:
np.floor(ArrayTest)  #Round down

array([   0.,    1.,    5.,   15.,   32.,   55.,   88.,  129.,  181.,  243.])

In [31]:
Boolean3 = np.isnan(ArrayTest) #Returns true when an element has value NaN - very handy in cleaning data
print(Boolean3)  #it returns all False since all elements had values

[False False False False False False False False False False]


In [32]:
np.argmax(ArrayTest) #returns the index of the maximum element in the array. Also try argmin

9

In [33]:
np.max(ArrayTest)  #return maximum value of the array. Also try min

243.0

In [34]:
rnd = np.random.randn(4,2) 
"""returns a 2d array with 4 rows and 2 columns. Each element is a sample from a standard normal distribution. 
Standard Normal distribution has mean zero and sd 1. """
print(rnd)  

[[-0.30041561 -0.29990378]
 [ 0.76307283  0.13678016]
 [ 0.02411134  0.74315973]
 [ 0.39283294  0.29063517]]


In [35]:
rnd.mean() #returns mean of all elements

0.2187840971764537

In [36]:
rnd.std() #returns standard deviation of all elements

0.38566552709798702

In [37]:
rnd.var() #returns variance of all elements

0.14873789879176816

In [38]:
rnd.sum() #returns sum of all elements

1.7502727774116296

In [39]:
rnd.sum(axis=0) #returns sum of columns

array([ 0.8796015 ,  0.87067128])

In [40]:
rnd.sum(axis=1) #returns sum of rows

array([-0.60031939,  0.89985299,  0.76727107,  0.68346811])

# Scatter Plots

In [27]:
SampleSize = 20
x = np.arange(SampleSize)
print(x)
error = np.random.randn(1,SampleSize)
y = -2 + 3*x + 10*error
plt.scatter(x,y)
plt.show()

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [41]:
from IPython.display import Image
Image(filename = "/Users/hamed/Desktop/Test1/figure_1.png", width=500, height=500)

IOError: [Errno 2] No such file or directory: u'/Users/hamed/Desktop/Test1/figure_1.png'

# Conditional Logic

In [None]:
Logical = np.where(rnd > 0, 2, -2)   #Checks condition, if true then returns 2, if false returns -2
print(Logical)

In [None]:
Logical = np.where(rnd > 0, rnd, -2) #Checks condition, if true then returns the rnd number, if false returns -2
print(Logical)

In [None]:
(rnd > 1).sum()  #counts numer of elements that are more than 1
(rnd > 1).any()  #Checks if any value is more than 1, if it is, then returns True, if all values are <=1 returns False
(rnd > 1).all()  #Checks if all values are more than 1, if it is, then returns True, otherwise False

# Random Numbers

In [None]:
np.random.seed(213)   #Seed number is set to 213 
np.random.rand(2,4)   #Give you a 2 by 4 array of random numbers. Each element is between 0 and 1
np.random.randn(5)   # returns 5 random numbers based on standard normal distribution
np.random.seed()      #if you do not specify seed - then the current system time is used
np.random.rand(2,4)   

# Reshaping, Transposing, and Flattening arrays

In [None]:
Initial_1D_Array = np.arange(20)
print(" Initial_1D_Array = ")
print(Initial_1D_Array)
ReShaped_2D = Initial_1D_Array.reshape(5,4) #Reshape our original array to a 5 by 4 two-Dimenational Array
print("ReShaped_2D =")
print(ReShaped_2D)
Flatten_Array = ReShaped_2D.flatten()   #our 2-D array is flatten now
print("Flatten_Array = ")
print(Flatten_Array)   
Transposed_array = ReShaped_2D.T   #We just transposed our 5 by 4 array to a 4 by 5 array
print("Transposed_array  = ")
print(Transposed_array )

 Using IPython
### Review Python Basics

Test your skills by answering the following questions:

In [None]:
### Insert your code here and then uncomment | print A | when you are ready to test it. 
A = 10/20
print A
#print A

In [None]:
#### If you did not get a float (decimals), alter your equation to get the desired result (0.5) 
#### If you did not get a float (decimals) alter your equation to get the desired result (0.5) 
A = 10.0/20
print A

#### Question 2. Create a function called division that will divide any two numbers and prints the result (with decimals). 
Call your function. Confirm that the results are as expected.

In [None]:
# Remember functions start with def
def division(numerator, denominator):
    result = float(numerator)/denominator
    print result 
division(20, 10)
division(10, 20)

#### Question 3. Using .split() split my string into separate words in a variable named words

In [None]:
my_string = "the cow jumped over the moon"
#put your code here it should return ['the', 'cow', 'jumped', 'over', 'the', 'moon']
words = my_string.split()
#returns ['the', 'cow', 'jumped', 'over', 'the', 'moon']
print words

#print words

#### Question 4. How many words are in my_string?


In [None]:
word_count = len(words)
#returns the number of words- 6
print word_count

#### Question 5. Use a list comprehension to find the length of each word

result: [3, 3, 6, 4, 3, 4]

In [None]:
length_of_each_word = [len(word) for word in words]
print length_of_each_word

#### Question 6. Put the words back together in a variable called sentence using .join()
result:
the cow jumped over the moon

In [None]:
#put them back together with join
sentance = " ".join(words)
print sentance

#### Bonus: Add a "||" between each word
result: 
the||cow||jumped||over||the||moon

In [None]:
#the " " puts the space in between the words. or you could put anything else in
alternate_sentance = "||".join(words)
print alternate_sentance