# Numpy

In [1]:
fixed_salary = [100,200,300]
variable_salary = [10,20,30]
total_salary = fixed_salary + variable_salary
print(total_salary)

[100, 200, 300, 10, 20, 30]


In [2]:
type(fixed_salary)

list

In [3]:
import numpy as np
fixed = np.array(fixed_salary)
variable = np.array(variable_salary)
total = fixed + variable
print(total)

[110 220 330]


In [4]:
type(fixed)

numpy.ndarray

In [5]:
fixed

array([100, 200, 300])

In [6]:
variable

array([10, 20, 30])

In [7]:
type(total)

numpy.ndarray

In [8]:
total

array([110, 220, 330])

In [9]:
#Broadcasting
total+10

array([120, 230, 340])

In [10]:
#Broadcasting
total>200

array([False,  True,  True])

In [11]:
#Boolean Indexing
total[total>200]

array([220, 330])

In [12]:
total[total>200][1]

330

Numpy is great for doing vector arithmetic. If you compare its functionality with regular Python lists, however, some things have changed.
First of all, numpy arrays cannot contain elements with different types. If you try to build such a list, some of the elements' types are changed to end up with a homogeneous list. This is known as type coercion.
Second, the typical arithmetic operators, such as +, -, * and / have a different meaning for regular Python lists and numpy arrays.

In [13]:

np.array([True, 1, 2]) + np.array([3, 4, False])

array([4, 5, 2])

In [14]:
# Create baseball, a list of lists
baseball = [[180, 78.4],
            [215, 102.7],
            [210, 98.5],
            [188, 75.2]]

# Import numpy
import numpy as np

# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)

# Print out the type of np_baseballn 
print(type(np_baseball))

# Print out the shape of np_baseball
print(np_baseball.shape)
print(np_baseball)

<class 'numpy.ndarray'>
(4, 2)
[[180.   78.4]
 [215.  102.7]
 [210.   98.5]
 [188.   75.2]]


In [15]:
print(np_baseball[1][0])
print(np_baseball[1,0])

215.0
215.0


In [16]:
#Upcasting:

In [17]:
np.array([1, 2, 3.0])

array([1., 2., 3.])

In [18]:
#two dimensions

In [19]:
 np.array([[1, 2], [3, 4]])

array([[1, 2],
       [3, 4]])

In [20]:
print("A series of numbers:",np.arange(5,16)) # A series of numbers from low to high

A series of numbers: [ 5  6  7  8  9 10 11 12 13 14 15]


array([ 5,  7,  9, 11, 13, 15, 17, 19, 21])

In [21]:
print("Numbers spaced apart by 2:",np.arange(0,11,2)) # Numbers spaced apart by 2

Numbers spaced apart by 2: [ 0  2  4  6  8 10]


In [22]:
print("21 linearly spaced numbers between 1 and 5\n--------------------------------------------")
print(np.linspace(1,5,21))

21 linearly spaced numbers between 1 and 5
--------------------------------------------
[1.  1.2 1.4 1.6 1.8 2.  2.2 2.4 2.6 2.8 3.  3.2 3.4 3.6 3.8 4.  4.2 4.4
 4.6 4.8 5. ]


In [23]:
import numpy as np
my_mat = [[1,2,3],[4,5,6],[7,8,9]]
mat = np.array(my_mat)
print("Type/Class of this object:",type(mat))
print("Here is the matrix\n----------\n",mat,"\n----------")
print("Dimension of this matrix: ",mat.ndim,sep='') #ndim gives the dimensison, 2 for a matrix, 1 for a vector
print("Size of this matrix: ", mat.size,sep='') #size gives the total number of elements
print("Shape of this matrix: ", mat.shape,sep='') #shape gives the number of elements along each axes (dimension)
print("Data type of this matrix: ", mat.dtype,sep='') #dtype gives the data type contained in the array


Type/Class of this object: <class 'numpy.ndarray'>
Here is the matrix
----------
 [[1 2 3]
 [4 5 6]
 [7 8 9]] 
----------
Dimension of this matrix: 2
Size of this matrix: 9
Shape of this matrix: (3, 3)
Data type of this matrix: int32


# Matrix creation

In [24]:
print("Vector of zeroes\n---------------------")
print(np.zeros(5))

Vector of zeroes
---------------------
[0. 0. 0. 0. 0.]


In [25]:
print("Matrix of zeroes\n--------------------")
print(np.zeros((3,4))) # Notice Tuples

Matrix of zeroes
--------------------
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [26]:
print("Vector of ones\n---------------------")
print(np.ones(5))

Vector of ones
---------------------
[1. 1. 1. 1. 1.]


In [27]:
print("Matrix of ones\n---------------------")
print(np.ones((5,2))) # Note matrix dimension specified by Tuples


Matrix of ones
---------------------
[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


In [28]:
print("Matrix of 5's\n---------------------")
print(5*np.ones((3,5)))

Matrix of 5's
---------------------
[[5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]]


In [29]:
#Return numbers spaced evenly on a log scale.

In [30]:
np.logspace(2.0, 3.0, num=4)

array([ 100.        ,  215.443469  ,  464.15888336, 1000.        ])

In [31]:
np.logspace(2.0, 3.0, num=4, base=2.0)

array([4.        , 5.0396842 , 6.34960421, 8.        ])

In [32]:
x = np.arange(9).reshape((3,3))

In [33]:
x

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [34]:
np.diag(x)

array([0, 4, 8])

# Reshaping

In [35]:
from numpy.random import randint as ri
a = ri(1,100,30)
b = a.reshape(2,3,5)
c = a.reshape(6,5)

In [36]:
print ("Shape of a:", a.shape)
print ("Shape of b:", b.shape)
print ("Shape of c:", c.shape)

Shape of a: (30,)
Shape of b: (2, 3, 5)
Shape of c: (6, 5)


In [37]:
print("\na looks like\n",'-'*20,"\n",a,"\n",'-'*20)
print("\nb looks like\n",'-'*20,"\n",b,"\n",'-'*20)
print("\nc looks like\n",'-'*20,"\n",c,"\n",'-'*20)



a looks like
 -------------------- 
 [67 81  6 63 42 11  5 92 48 84 34  9 56 65 67 90 79  4 63 47 45  5 84 21
 18 19 27 54  9  2] 
 --------------------

b looks like
 -------------------- 
 [[[67 81  6 63 42]
  [11  5 92 48 84]
  [34  9 56 65 67]]

 [[90 79  4 63 47]
  [45  5 84 21 18]
  [19 27 54  9  2]]] 
 --------------------

c looks like
 -------------------- 
 [[67 81  6 63 42]
 [11  5 92 48 84]
 [34  9 56 65 67]
 [90 79  4 63 47]
 [45  5 84 21 18]
 [19 27 54  9  2]] 
 --------------------


In [38]:
A = ri(1,100,10) # Vector of random interegrs
print("\nVector of random integers\n",'-'*50,"\n",A)
print("\nHere is the sorted vector\n",'-'*50,"\n",np.sort(A, kind='mergesort'))



Vector of random integers
 -------------------------------------------------- 
 [16 83 13 81 69 36 16 66 23 54]

Here is the sorted vector
 -------------------------------------------------- 
 [13 16 16 23 36 54 66 69 81 83]


In [39]:
M = ri(1,100,25).reshape(5,5) # Matrix of random interegrs
print("\n5x5 Matrix of random integers\n",'-'*50,"\n",M)
print("\nHere is the sorted matrix along each row\n",'-'*50,"\n",np.sort(M, kind='mergesort')) # Default axis =1
print("\nHere is the sorted matrix along each column\n",'-'*50,"\n",np.sort(M, axis=0, kind='mergesort'))


5x5 Matrix of random integers
 -------------------------------------------------- 
 [[11 89 51 18 22]
 [61 92 25  1 74]
 [67  5 29 37 23]
 [73 69 66 20 69]
 [ 9 73 16 52 25]]

Here is the sorted matrix along each row
 -------------------------------------------------- 
 [[11 18 22 51 89]
 [ 1 25 61 74 92]
 [ 5 23 29 37 67]
 [20 66 69 69 73]
 [ 9 16 25 52 73]]

Here is the sorted matrix along each column
 -------------------------------------------------- 
 [[ 9  5 16  1 22]
 [11 69 25 18 23]
 [61 73 29 20 25]
 [67 89 51 37 69]
 [73 92 66 52 74]]


In [40]:
print("Max of a:", a.max())
print("Max of b:", b.max()) 

Max of a: 92
Max of b: 92


In [41]:
print("Max of a location:", a.argmax())
print("Max of b location:", b.argmax())
print("Max of c location:", c.argmax())

Max of a location: 7
Max of b location: 7
Max of c location: 7


# Indexing and slicing

In [42]:
arr = np.arange(0,11)
print("Array:",arr) 

Array: [ 0  1  2  3  4  5  6  7  8  9 10]


In [43]:
print("Element at 7th index is:", arr[7])


Element at 7th index is: 7


In [44]:
print("Elements from 3rd to 5th index are:", arr[3:6])


Elements from 3rd to 5th index are: [3 4 5]


In [45]:
print("Elements up to 4th index are:", arr[:4])


Elements up to 4th index are: [0 1 2 3]


In [46]:
arr = np.arange(0,21,2)
print("New array:",arr)

New array: [ 0  2  4  6  8 10 12 14 16 18 20]


In [47]:
print("Elements at 2nd, 4th, and 9th index are:", arr[[2,4,9]]) # Pass a list as a index to subset

Elements at 2nd, 4th, and 9th index are: [ 4  8 18]


In [48]:
mat = np.array(ri(10,100,15)).reshape(3,5)
print("Matrix of random 2-digit numbers\n--------------------------------\n",mat)

Matrix of random 2-digit numbers
--------------------------------
 [[88 63 30 45 24]
 [75 23 65 82 90]
 [16 44 51 83 79]]


In [49]:
print("\nDouble bracket indexing\n------------------------")
print("Element in row index 1 and column index 2:", mat[1][2])


Double bracket indexing
------------------------
Element in row index 1 and column index 2: 65


In [50]:
print("\nSingle bracket with comma indexing\n----------------------------------")
print("Element in row index 1 and column index 2:", mat[1,2])
print("\nRow or column extract\n----------------------")


Single bracket with comma indexing
----------------------------------
Element in row index 1 and column index 2: 65

Row or column extract
----------------------


In [51]:
print("Entire row at index 2:", mat[2])
print("Entire column at index 3:", mat[:,3])

Entire row at index 2: [16 44 51 83 79]
Entire column at index 3: [45 82 83]


In [52]:
print("\nSubsetting sub-matrices\n--------------------------")
print("Matrix with row indices 1 and 2 and column indices 3 and 4\n", mat[1:3,3:5])



Subsetting sub-matrices
--------------------------
Matrix with row indices 1 and 2 and column indices 3 and 4
 [[82 90]
 [83 79]]


In [53]:
print("Matrix with row indices 0 and 1 and column indices 1 and 3\n", mat[0:2,[1,3]])

Matrix with row indices 0 and 1 and column indices 1 and 3
 [[63 45]
 [23 82]]


# Subseting

In [54]:
mat = np.array(ri(10,100,15)).reshape(3,5)
print("Matrix of random 2-digit numbers\n--------------------------------\n",mat) 

Matrix of random 2-digit numbers
--------------------------------
 [[42 60 91 64 34]
 [43 49 13 60 69]
 [33 30 64 58 38]]


In [55]:
print ("Elements greater than 50\n", mat[mat>50])

Elements greater than 50
 [60 91 64 60 69 64 58]


# Slicing

In [56]:
mat = np.array([[11,12,13],[21,22,23],[31,32,33]])
print("Original matrix")
print(mat) 

Original matrix
[[11 12 13]
 [21 22 23]
 [31 32 33]]


In [57]:
mat_slice = mat[:2,:2]
print ("\nSliced matrix")
print(mat_slice)
print ("\nChange the sliced matrix")


Sliced matrix
[[11 12]
 [21 22]]

Change the sliced matrix


In [58]:
mat_slice[0,0] = 1000
print (mat_slice)

[[1000   12]
 [  21   22]]


# Universal Functions

In [59]:
from numpy.random import randint as ri
mat1 = np.array(ri(1,10,9)).reshape(3,3)
mat2 = np.array(ri(1,10,9)).reshape(3,3)
print("\n1st Matrix of random single-digit numbers\n----------------------------------------\n",mat1)
print("\n2nd Matrix of random single-digit numbers\n----------------------------------------\n",mat2) 


1st Matrix of random single-digit numbers
----------------------------------------
 [[2 7 6]
 [9 1 9]
 [9 6 3]]

2nd Matrix of random single-digit numbers
----------------------------------------
 [[8 6 7]
 [3 2 1]
 [5 4 2]]


In [60]:
print("\nAddition\n------------------\n", mat1+mat2)
print("\nMultiplication\n------------------\n", mat1*mat2) 


Addition
------------------
 [[10 13 13]
 [12  3 10]
 [14 10  5]]

Multiplication
------------------
 [[16 42 42]
 [27  2  9]
 [45 24  6]]


In [None]:
print("\nDivision\n------------------\n", mat1/mat2)
print("\nLineaer combination: 3*A - 2*B\n-----------------------------\n", 3*mat1-2*mat2) 

In [None]:
print("\nAddition of a scalar (100)\n-------------------------\n", 100+mat1) 

In [None]:
print("\nExponentiation, matrix cubed here\n----------------------------------------\n", mat1**3)
print("\nExponentiation, sq-root using pow function\n-------------------------------------------\n",pow(mat1,0.5))

# Array Math

In [None]:
mat1 = np.array(ri(1,10,9)).reshape(3,3)
mat2 = np.array(ri(1,10,9)).reshape(3,3)
print("\n1st Matrix of random single-digit numbers\n----------------------------------------\n",mat1)
print("\n2nd Matrix of random single-digit numbers\n----------------------------------------\n",mat2) 

In [None]:
print("\nSq-root of 1st matrix using np\n------------------\n", np.sqrt(mat1)) 

In [None]:
#Exponential with e (2.71)
print("\nExponential power of 1st matrix using np\n",'-'*50,"\n", np.exp(mat1)) 

In [None]:
print("\n10-base logarithm on 1st matrix using np\n",'-'*50,"\n", np.log10(mat1)) 

# basic stats

In [1]:
import numpy as np

In [4]:
from numpy.random import randint as ri
mat1 = np.array(ri(1,10,9)).reshape(3,3)
mat2 = np.array(ri(1,10,9)).reshape(3,3)

mat4 = np.array(ri(1,10,9)).reshape(3,3)
mat5 = np.array(ri(1,10,9)).reshape(3,3)
print("\n1st Matrix of random single-digit numbers\n","-"*50,"\n",mat1)
print("\n2nd Matrix of random single-digit numbers\n","-"*50,"\n",mat2) 

print("\n1st Matrix of random single-digit numbers\n","-"*50,"\n",mat4)
print("\n2nd Matrix of random single-digit numbers\n","-"*50,"\n",mat5) 


1st Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[3 6 6]
 [1 2 9]
 [7 5 9]]

2nd Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[8 9 1]
 [1 5 3]
 [2 2 6]]

1st Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[4 8 6]
 [8 4 2]
 [1 9 4]]

2nd Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[5 7 9]
 [8 6 6]
 [7 6 7]]


In [3]:
print("\nSum of all numbers in 1st matrix\n","-"*50,"\n",np.sum(mat1))
print("\nSum of all numbers in columns of 1st matrix\n","-"*50,"\n",np.sum(mat1,axis=0)) 


Sum of all numbers in 1st matrix
 -------------------------------------------------- 
 45

Sum of all numbers in columns of 1st matrix
 -------------------------------------------------- 
 [ 8 20 17]


In [7]:
print("\nSum of all numbers in rows of 1st matrix\n","-"*50,"\n",np.sum(mat1,axis=1))
print("\nProduct of all numbers in rows of 1st matrix\n","-"*50,"\n",np.prod(mat1,axis=1))

print("\nSum of all numbers in columns of 1st matrix\n","-"*50,"\n",np.sum(mat1,axis=0))
print("\nProduct of all numbers in columns of 1st matrix\n","-"*50,"\n",np.prod(mat1,axis=0)) 

print("\nSum of all numbers of 1st matrix\n","-"*50,"\n",np.sum(mat1))
print("\nProduct of all numbers of 1st matrix\n","-"*50,"\n",np.prod(mat1)) 


Sum of all numbers in rows of 1st matrix
 -------------------------------------------------- 
 [15 12 21]

Product of all numbers in rows of 1st matrix
 -------------------------------------------------- 
 [108  18 315]

Sum of all numbers in columns of 1st matrix
 -------------------------------------------------- 
 [11 13 24]

Product of all numbers in columns of 1st matrix
 -------------------------------------------------- 
 [ 21  60 486]

Sum of all numbers of 1st matrix
 -------------------------------------------------- 
 48

Product of all numbers of 1st matrix
 -------------------------------------------------- 
 612360


In [8]:
print("\nProduct of all numbers in columns of 2nd matrix\n","-"*50,"\n",np.prod(mat2,axis=0))
print("\nMean of all numbers in 1st matrix\n","-"*50,"\n",np.mean(mat1)) 


Product of all numbers in columns of 2nd matrix
 -------------------------------------------------- 
 [16 90 18]

Mean of all numbers in 1st matrix
 -------------------------------------------------- 
 5.333333333333333


In [9]:
print("\nStandard deviation of all numbers in 1st matrix\n","-"*50,"\n",np.std(mat1))  


Standard deviation of all numbers in 1st matrix
 -------------------------------------------------- 
 2.70801280154532


In [10]:
print("\n Variance of all numbers in 1st matrix\n","-"*50,"\n",np.var(mat1))



 Variance of all numbers in 1st matrix
 -------------------------------------------------- 
 7.333333333333333


In [11]:
print("\n50th percentile of all numbers in the modified matrix\n","-"*60,"\n",np.percentile(mat1,50))



50th percentile of all numbers in the modified matrix
 ------------------------------------------------------------ 
 6.0


In [12]:
print("\n90th percentile of all numbers in the modified matrix\n","-"*60,"\n",np.percentile(mat1,90))


90th percentile of all numbers in the modified matrix
 ------------------------------------------------------------ 
 9.0


In [13]:
print("\nMedian of all numbers in the modified matrix\n","-"*60,"\n",np.median(mat1))



Median of all numbers in the modified matrix
 ------------------------------------------------------------ 
 6.0


In [14]:
# Create arrays
import numpy as np
my_house = np.array([18.0, 20.0, 10.75, 9.50])
your_house = np.array([14.0, 24.0, 14.25, 9.0])

# my_house greater than 18.5 or smaller than 10
print(np.logical_or(my_house > 18.5, my_house < 10))

# Both my_house and your_house smaller than 11
print(np.logical_and(my_house < 11, your_house < 11))

[False  True False  True]
[False False False  True]


In [15]:
(my_house > 18.5) | (my_house < 10)

array([False,  True, False,  True])

In [16]:
(my_house < 11) & (your_house < 11)

array([False, False, False,  True])

In [17]:
np_baseball = np.array([[ 74, 180],[ 74, 215],[ 72, 210],[ 75, 205],[ 75, 190],[ 73, 195]])
np_height = np.array([74, 74, 72, 75, 75, 73])

In [18]:
for x in np_baseball:
    print(x)

[ 74 180]
[ 74 215]
[ 72 210]
[ 75 205]
[ 75 190]
[ 73 195]


In [19]:
# Import numpy as np
import numpy as np

# For loop over np_height
for x in np_height:
    print(str(x) + " inches")

# For loop over np_baseball
for x in np.nditer(np_baseball):
    print(str(x))


74 inches
74 inches
72 inches
75 inches
75 inches
73 inches
74
180
74
215
72
210
75
205
75
190
73
195


## Pandas

In [1]:
import pandas as pd
import numpy as np
# Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(dict)
print(cars)

# Definition of row_labels
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars again
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45
           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


In [21]:
#Renamed
cars.columns = ['Cars Per Capita','Country Name','Drives Right']
cars

Unnamed: 0,Cars Per Capita,Country Name,Drives Right
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [22]:
#Building DataFrames with broadcasting

# Make a string with the value 'PA': state
planet = 'Earth'
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
# Construct a dictionary: data
data = {'Country':names, 'Planet':planet}

# Construct a DataFrame from dictionary data: df
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

         Country Planet
0  United States  Earth
1      Australia  Earth
2          Japan  Earth
3          India  Earth
4         Russia  Earth
5        Morocco  Earth
6          Egypt  Earth


In [23]:
import pandas as pd

# Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(dict)
print(cars)

# Definition of row_labels
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars again
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45
           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


In [24]:
# Print out country column as Pandas Series
cars['country']

US     United States
AUS        Australia
JAP            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

In [25]:
cars[['country']]

Unnamed: 0,country
US,United States
AUS,Australia
JAP,Japan
IN,India
RU,Russia
MOR,Morocco
EG,Egypt


In [27]:
# Print out first 3 observations
print(cars[0:3])

# Print out fourth, fifth and sixth observation
print(cars[3:6])

           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
     country  drives_right  cars_per_cap
IN     India         False            18
RU    Russia          True           200
MOR  Morocco          True            70


In [33]:
# Print out observation for Japan
print(cars.iloc[2])
print('***'*20)
# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

country         Japan
drives_right    False
cars_per_cap      588
Name: JAP, dtype: object
************************************************************
       country  drives_right  cars_per_cap
AUS  Australia         False           731
EG       Egypt          True            45


In [37]:
# Print out DataFrame with country and drives_right columns
cars[['country', 'drives_right']]

Unnamed: 0,country,drives_right
US,United States,True
AUS,Australia,False
JAP,Japan,False
IN,India,False
RU,Russia,True
MOR,Morocco,True
EG,Egypt,True


In [28]:
cars

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [51]:
# Print out drives_right value of Morocco
print(cars.iloc[5, 2])

print('***'*20)
# Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

print('***'*20)
# Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ])

print('***'*20)
# Print sub-DataFrame
print(cars.loc[:,['country', 'drives_right']])


70
************************************************************
     country  drives_right
RU    Russia          True
MOR  Morocco          True
************************************************************
     country  drives_right  cars_per_cap
RU    Russia          True           200
MOR  Morocco          True            70
************************************************************
           country  drives_right
US   United States          True
AUS      Australia         False
JAP          Japan         False
IN           India         False
RU          Russia          True
MOR        Morocco          True
EG           Egypt          True


In [52]:
# Print out drives_right column as Series
print(cars.iloc[:, 2])

# Print out drives_right column as DataFrame
print(cars.iloc[:, [2]])

# Print out cars_per_cap and drives_right as DataFrame
print(cars.loc[:, ['cars_per_cap', 'drives_right']])

US     809
AUS    731
JAP    588
IN      18
RU     200
MOR     70
EG      45
Name: cars_per_cap, dtype: int64
     cars_per_cap
US            809
AUS           731
JAP           588
IN             18
RU            200
MOR            70
EG             45
     cars_per_cap  drives_right
US            809          True
AUS           731         False
JAP           588         False
IN             18         False
RU            200          True
MOR            70          True
EG             45          True


In [53]:
# Import numpy, you'll need this
import numpy as np

# Create medium: observations with cars_per_cap between 100 and 500
cpc = cars['cars_per_cap']
between = np.logical_and(cpc > 100, cpc < 500)
medium = cars[between]

# Print medium
medium

Unnamed: 0,country,drives_right,cars_per_cap
RU,Russia,True,200


In [54]:
cpc > 100

US      True
AUS     True
JAP     True
IN     False
RU      True
MOR    False
EG     False
Name: cars_per_cap, dtype: bool

In [55]:
cpc < 500

US     False
AUS    False
JAP    False
IN      True
RU      True
MOR     True
EG      True
Name: cars_per_cap, dtype: bool

In [56]:
np.logical_and(cpc > 100, cpc < 500)

US     False
AUS    False
JAP    False
IN     False
RU      True
MOR    False
EG     False
Name: cars_per_cap, dtype: bool

In [57]:
# Iterate over rows of cars
for lab, row in cars.iterrows() :
    print(lab)
    print(row)

US
country         United States
drives_right             True
cars_per_cap              809
Name: US, dtype: object
AUS
country         Australia
drives_right        False
cars_per_cap          731
Name: AUS, dtype: object
JAP
country         Japan
drives_right    False
cars_per_cap      588
Name: JAP, dtype: object
IN
country         India
drives_right    False
cars_per_cap       18
Name: IN, dtype: object
RU
country         Russia
drives_right      True
cars_per_cap       200
Name: RU, dtype: object
MOR
country         Morocco
drives_right       True
cars_per_cap         70
Name: MOR, dtype: object
EG
country         Egypt
drives_right     True
cars_per_cap       45
Name: EG, dtype: object


In [58]:
# Adapt for loop
for lab, row in cars.iterrows() :
    print(lab + ": " + str(row['cars_per_cap']))

US: 809
AUS: 731
JAP: 588
IN: 18
RU: 200
MOR: 70
EG: 45


In [None]:
# Code for loop that adds COUNTRY column
for lab, row in cars.iterrows() :
    cars.loc[lab, "COUNTRY"] = row["country"].upper()
    
# Print cars
print(cars)

In [None]:
# Use .apply(str.upper)
cars["COUNTRY"] = cars["country"].apply(str.upper)

In [None]:
cars["name_length"] = cars["country"].apply(len)