In [None]:

#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## DAY 2 DATA WRANGLING WITH PYTHON ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs



In [None]:
#=================================================-
#### Slide 5: Creating arrays  ####

# Import numpy as 'np' sets 'np' as the shortcut/alias.
import numpy as np

# Create an array from a list.
arr = np.array([17, -10, 16.8, 11])
print(arr)

# Check the type of the object.
print(type(arr))



In [None]:
#=================================================-
#### Slide 6: Dtype in arrays  ####

# Check the data type stored in the array.
print(arr.dtype)



In [None]:
#=================================================-
#### Slide 7: Using ndarray  ####

x = np.array([3, 19, 7, 11])
print(x)



In [None]:
#=================================================-
#### Slide 9: Building an array with linspace  ####

y = np.linspace(-2, -1, 25)
print(y)



In [None]:
#=================================================-
#### Slide 10: Alternative ways of accessing functions  ####

from numpy import array, linspace
x = array([0.01, 0.45, -0.3])
y = linspace(0, 1, 50)



In [None]:
#=================================================-
#### Slide 13: Arrays vs Lists  ####

mixed_array = np.array([1, 2, "apple", "XYZ", 5.5])
print(mixed_array)
print(mixed_array.dtype)
mixed_array = np.array([3, 12, 5.56])
print(mixed_array)
print(mixed_array.dtype)



In [None]:
#=================================================-
#### Slide 15: Arrays from sequences (cont'd)  ####

rng = np.arange(0, 51)
print(rng)



In [None]:
#=================================================-
#### Slide 16: Arrays from sequences - using a step size  ####

evens = np.arange(0, 23, 2)
print(evens)

quarters = np.arange(0, 1, .25)  #<- contains 0 to 0.75
print(quarters)



In [None]:
#=================================================-
#### Slide 17: Helper functions: min, max, and sum  ####

# Generate 5 numbers between 15 and 19.
x = np.linspace(15, 19, 5)
# Find the min of x.
np.amin(x)
# Find the max of x.
np.amax(x)
# Find the max of x.
np.sum(x)



In [None]:
#=================================================-
#### Slide 18: Convert an array to a list  ####

print(list(evens))



In [None]:
#=================================================-
#### Slide 19: Operations on arrays  ####

# Save two arrays as variables.
a = np.array([1,1,1,1])
b = np.array([2,2,2,2])

# Addition of arrays.
print(a + b)

# Subtraction of arrays.
print(a - b)
# Multiplication of arrays.
print(a * b)
# Division of arrays.
print(a / b)



In [None]:
#=================================================-
#### Slide 20: Mathematical functions on lists   ####

abs([-2, -7, 1])



In [None]:
#=================================================-
#### Slide 21: Mathematical functions on arrays  ####

print(np.abs(-3))
print(np.abs([-2, -7, 1]))
nums = np.arange(20, 30, .5)
print(len(nums))



In [None]:
#=================================================-
#### Slide 22: User-defined functions on arrays  ####

# Define a function to multiply every element in array with 3 and add 1
def some_calculation(arr):
    return 3*arr+1

print(some_calculation(nums))



In [None]:
#=================================================-
#### Slide 24: Exercise 1  ####





In [None]:
#=================================================-
#### Slide 26: Accessing array values  ####

# Import numpy as 'np' sets 'np' as the shortcut/alias.
import numpy as np

nums = np.arange(20, 30, .5) #<- Create array
print(len(nums)) #<- get the length of array
print(nums[1])  #<- get the second element

print(nums[0:3]) #<- get the first three elements



In [None]:
#=================================================-
#### Slide 27: Logical filtering  ####

print(nums)

large_nums = nums[nums > 26]
print(large_nums)



In [None]:
#=================================================-
#### Slide 28: Logical filtering (cont'd)  ####

print(nums)

large_nums = nums[nums > 26]
print(large_nums)



In [None]:
#=================================================-
#### Slide 29: Two-dimensional arrays  ####

mat = np.array([
        [8, 2, 6, 8],
        [4, 5, 7, 2],
        [3, 9, 7, 1]
       ])
print(mat)



In [None]:
#=================================================-
#### Slide 31: Two-dimensional arrays - shape (cont'd)  ####

print(mat.shape) #<- 3 rows and 4 columns -- returned as a tuple
nrows, ncols = mat.shape
print(nrows)



In [None]:
#=================================================-
#### Slide 32: Two-dimensional arrays - extracting elements  ####

print(mat[1, 3]) #<- 2nd row 4th column - remember that indexing starts at 0!



In [None]:
#=================================================-
#### Slide 33: Two-dimensional arrays - rows  ####

print(mat[0, :]) #<- first row
print(mat[0, 0:2]) #<- first row and just first 2 columns



In [None]:
#=================================================-
#### Slide 34: Two-dimensional arrays - columns  ####

print(mat[:, 2]) #<- 3rd column
print(mat[1:3, 2]) #<- 3rd column but skipping over the first row
print(mat[1:3, 2:3]) #<- same as previous, but maintains the vertical structure of the column



In [None]:
#=================================================-
#### Slide 35: Reshaping arrays  ####

arr = np.arange(1,13)
print(arr)
print(arr.reshape(3, 4))



In [None]:
#=================================================-
#### Slide 36: Reshaping arrays (cont'd)  ####

print(arr.reshape(2,      #<- specify number of rows=2
                  -1))    #<- number of columns=-1 lets Python infer it
print(arr.reshape(5,      #<- specify number of rows=5
                  -2))    #<- number of columns=-2 lets Python infer it



In [None]:
#=================================================-
#### Slide 41: Import Pandas and os  ####

import pandas as pd
import os



In [None]:
#=================================================-
#### Slide 42: Directory settings  ####

# Set `main_dir` to the location of your `skill-soft` folder (for Linux).
main_dir = "/home/[username]/Desktop/skill-soft"
# Set `main_dir` to the location of your `skill-soft` folder (for Mac).
main_dir = '/Users/[username]/Desktop/skill-soft'
# Set `main_dir` to the location of your `skill-soft` folder (for Windows).
main_dir = "C:\\Users\\[username]\\Desktop\\skill-soft"
# Make `data_dir` from the `main_dir` and
# remainder of the path to data directory.
data_dir = main_dir + "/data"




In [None]:
#=================================================-
#### Slide 43: Working directory  ####

# Set working directory.
os.chdir(data_dir)
# Check working directory.
print(os.getcwd())

