# Numpy

In [1]:
fixed_salary = [100,200,300]
variable_salary = [10,20,30]
total_salary = fixed_salary + variable_salary
print(total_salary)

[100, 200, 300, 10, 20, 30]


In [2]:
import numpy as np
fixed = np.array(fixed_salary)
variable = np.array(variable_salary)
total = fixed + variable
print(total)

[110 220 330]


In [3]:
type(total)

numpy.ndarray

In [4]:
total

array([110, 220, 330])

In [5]:
total+10

array([120, 230, 340])

In [6]:
total[total>200][1]

330

Numpy is great for doing vector arithmetic. If you compare its functionality with regular Python lists, however, some things have changed.
First of all, numpy arrays cannot contain elements with different types. If you try to build such a list, some of the elements' types are changed to end up with a homogeneous list. This is known as type coercion.
Second, the typical arithmetic operators, such as +, -, * and / have a different meaning for regular Python lists and numpy arrays.

In [7]:
np.array([True, 1, 2]) + np.array([3, 4, False])

array([4, 5, 2])

In [8]:
# Create baseball, a list of lists
baseball = [[180, 78.4],
            [215, 102.7],
            [210, 98.5],
            [188, 75.2]]

# Import numpy
import numpy as np

# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)

# Print out the type of np_baseball
print(type(np_baseball))

# Print out the shape of np_baseball
print(np_baseball.shape)
print(np_baseball)

<class 'numpy.ndarray'>
(4, 2)
[[180.   78.4]
 [215.  102.7]
 [210.   98.5]
 [188.   75.2]]


In [9]:
print(np_baseball[1][0])
print(np_baseball[1,0])

215.0
215.0


# Indexing and slicing

In [10]:
arr = np.arange(0,11)
print("Array:",arr)


Array: [ 0  1  2  3  4  5  6  7  8  9 10]


In [11]:
print("Element at 7th index is:", arr[7])


Element at 7th index is: 7


In [12]:
print("Elements from 3rd to 5th index are:", arr[3:6])


Elements from 3rd to 5th index are: [3 4 5]


# Universal Functions

In [13]:
from numpy.random import randint as ri
mat1 = np.array(ri(1,10,9)).reshape(3,3)
mat2 = np.array(ri(1,10,9)).reshape(3,3)
print("\n1st Matrix of random single-digit numbers\n----------------------------------------\n",mat1)
print("\n2nd Matrix of random single-digit numbers\n----------------------------------------\n",mat2)



1st Matrix of random single-digit numbers
----------------------------------------
 [[7 9 3]
 [8 3 2]
 [3 8 1]]

2nd Matrix of random single-digit numbers
----------------------------------------
 [[1 2 5]
 [9 2 4]
 [8 8 5]]


In [14]:
print("\nAddition\n------------------\n", mat1+mat2)
print("\nMultiplication\n------------------\n", mat1*mat2)



Addition
------------------
 [[ 8 11  8]
 [17  5  6]
 [11 16  6]]

Multiplication
------------------
 [[ 7 18 15]
 [72  6  8]
 [24 64  5]]


In [15]:
print("\nDivision\n------------------\n", mat1/mat2)
print("\nLineaer combination: 3*A - 2*B\n-----------------------------\n", 3*mat1-2*mat2)



Division
------------------
 [[7.         4.5        0.6       ]
 [0.88888889 1.5        0.5       ]
 [0.375      1.         0.2       ]]

Lineaer combination: 3*A - 2*B
-----------------------------
 [[19 23 -1]
 [ 6  5 -2]
 [-7  8 -7]]


In [16]:
print("\nAddition of a scalar (100)\n-------------------------\n", 100+mat1)



Addition of a scalar (100)
-------------------------
 [[107 109 103]
 [108 103 102]
 [103 108 101]]


In [17]:
print("\nExponentiation, matrix cubed here\n----------------------------------------\n", mat1**3)
print("\nExponentiation, sq-root using pow function\n-------------------------------------------\n",pow(mat1,0.5))


Exponentiation, matrix cubed here
----------------------------------------
 [[343 729  27]
 [512  27   8]
 [ 27 512   1]]

Exponentiation, sq-root using pow function
-------------------------------------------
 [[2.64575131 3.         1.73205081]
 [2.82842712 1.73205081 1.41421356]
 [1.73205081 2.82842712 1.        ]]


In [18]:
print("\n10-base logarithm on 1st matrix using np\n",'-'*50,"\n", np.log10(mat1))



10-base logarithm on 1st matrix using np
 -------------------------------------------------- 
 [[0.84509804 0.95424251 0.47712125]
 [0.90308999 0.47712125 0.30103   ]
 [0.47712125 0.90308999 0.        ]]


# basic stats

In [19]:
from numpy.random import randint as ri
mat1 = np.array(ri(1,10,9)).reshape(3,3)
mat2 = np.array(ri(1,10,9)).reshape(3,3)
print("\n1st Matrix of random single-digit numbers\n","-"*50,"\n",mat1)
print("\n2nd Matrix of random single-digit numbers\n","-"*50,"\n",mat2)



1st Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[7 8 4]
 [3 9 1]
 [2 3 8]]

2nd Matrix of random single-digit numbers
 -------------------------------------------------- 
 [[7 7 7]
 [7 3 6]
 [2 8 2]]


In [20]:
print("\nSum of all numbers in 1st matrix\n","-"*50,"\n",np.sum(mat1))
print("\nSum of all numbers in columns of 1st matrix\n","-"*50,"\n",np.sum(mat1,axis=0))



Sum of all numbers in 1st matrix
 -------------------------------------------------- 
 45

Sum of all numbers in columns of 1st matrix
 -------------------------------------------------- 
 [12 20 13]


In [21]:
print("\nSum of all numbers in rows of 1st matrix\n","-"*50,"\n",np.sum(mat1,axis=1))
print("\nProduct of all numbers in rows of 1st matrix\n","-"*50,"\n",np.prod(mat1,axis=1))



Sum of all numbers in rows of 1st matrix
 -------------------------------------------------- 
 [19 13 13]

Product of all numbers in rows of 1st matrix
 -------------------------------------------------- 
 [224  27  48]


In [22]:
print("\nProduct of all numbers in columns of 2nd matrix\n","-"*50,"\n",np.prod(mat2,axis=0))
print("\nMean of all numbers in 1st matrix\n","-"*50,"\n",np.mean(mat1))



Product of all numbers in columns of 2nd matrix
 -------------------------------------------------- 
 [ 98 168  84]

Mean of all numbers in 1st matrix
 -------------------------------------------------- 
 5.0


In [23]:
print("\nStandard deviation of all numbers in 1st matrix\n","-"*50,"\n",np.std(mat1))



Standard deviation of all numbers in 1st matrix
 -------------------------------------------------- 
 2.8284271247461903


In [24]:
print("\n Variance of all numbers in 1st matrix\n","-"*50,"\n",np.var(mat1))



 Variance of all numbers in 1st matrix
 -------------------------------------------------- 
 8.0


In [25]:
print("\n50th percentile of all numbers in the modified matrix\n","-"*60,"\n",np.percentile(mat1,50))



50th percentile of all numbers in the modified matrix
 ------------------------------------------------------------ 
 4.0


In [26]:
print("\n90th percentile of all numbers in the modified matrix\n","-"*60,"\n",np.percentile(mat1,90))


90th percentile of all numbers in the modified matrix
 ------------------------------------------------------------ 
 8.2


In [27]:
print("\nMedian of all numbers in the modified matrix\n","-"*60,"\n",np.median(mat1))



Median of all numbers in the modified matrix
 ------------------------------------------------------------ 
 4.0


In [28]:
import pandas as pd

# Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(dict)
print(cars)

# Definition of row_labels
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars again
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45
           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


In [29]:
#Renamed
cars.columns = ['Cars Per Capita','Country Name','Drives Right']
cars

Unnamed: 0,Cars Per Capita,Country Name,Drives Right
US,United States,True,809
AUS,Australia,False,731
JAP,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [30]:
#Building DataFrames with broadcasting

# Make a string with the value 'PA': state
planet = 'Earth'
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
# Construct a dictionary: data
data = {'Country':names, 'Planet':planet}

# Construct a DataFrame from dictionary data: df
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

         Country Planet
0  United States  Earth
1      Australia  Earth
2          Japan  Earth
3          India  Earth
4         Russia  Earth
5        Morocco  Earth
6          Egypt  Earth


In [31]:
import pandas as pd

# Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(dict)
print(cars)

# Definition of row_labels
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars again
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45
           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


In [32]:
# Print out country column as Pandas Series
cars['country']

US     United States
AUS        Australia
JAP            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

In [33]:
cars[['country']]

Unnamed: 0,country
US,United States
AUS,Australia
JAP,Japan
IN,India
RU,Russia
MOR,Morocco
EG,Egypt


In [34]:
# Print out DataFrame with country and drives_right columns
cars[['country', 'drives_right']]

Unnamed: 0,country,drives_right
US,United States,True
AUS,Australia,False
JAP,Japan,False
IN,India,False
RU,Russia,True
MOR,Morocco,True
EG,Egypt,True


In [35]:
# Print out first 3 observations
print(cars[0:3])

# Print out fourth, fifth and sixth observation
print(cars[3:6])

           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
     country  drives_right  cars_per_cap
IN     India         False            18
RU    Russia          True           200
MOR  Morocco          True            70


In [36]:
# Print out observation for Japan
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

country         Japan
drives_right    False
cars_per_cap      588
Name: JAP, dtype: object
       country  drives_right  cars_per_cap
AUS  Australia         False           731
EG       Egypt          True            45


In [37]:
# Print out drives_right value of Morocco
print(cars.iloc[5, 2])

# Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

70
     country  drives_right
RU    Russia          True
MOR  Morocco          True


In [38]:
# Print out drives_right column as Series
print(cars.iloc[:, 2])

# Print out drives_right column as DataFrame
print(cars.iloc[:, [2]])

# Print out cars_per_cap and drives_right as DataFrame
print(cars.loc[:, ['cars_per_cap', 'drives_right']])

US     809
AUS    731
JAP    588
IN      18
RU     200
MOR     70
EG      45
Name: cars_per_cap, dtype: int64
     cars_per_cap
US            809
AUS           731
JAP           588
IN             18
RU            200
MOR            70
EG             45
     cars_per_cap  drives_right
US            809          True
AUS           731         False
JAP           588         False
IN             18         False
RU            200          True
MOR            70          True
EG             45          True


In [39]:
# Import numpy, you'll need this
import numpy as np

# Create medium: observations with cars_per_cap between 100 and 500
cpc = cars['cars_per_cap']
between = np.logical_and(cpc > 100, cpc < 500)
medium = cars[between]

# Print medium
medium

Unnamed: 0,country,drives_right,cars_per_cap
RU,Russia,True,200


In [40]:
# Use .apply(str.upper)
cars["COUNTRY"] = cars["country"].apply(str.upper)

In [41]:
cars["name_length"] = cars["country"].apply(len)

In [42]:
cars

Unnamed: 0,country,drives_right,cars_per_cap,COUNTRY,name_length
US,United States,True,809,UNITED STATES,13
AUS,Australia,False,731,AUSTRALIA,9
JAP,Japan,False,588,JAPAN,5
IN,India,False,18,INDIA,5
RU,Russia,True,200,RUSSIA,6
MOR,Morocco,True,70,MOROCCO,7
EG,Egypt,True,45,EGYPT,5
