A module is a file ending with _.py_ located in the same directory. Importing them allows you to access the functions and variables in that file.

### Datetime

In [1]:
from datetime import datetime, date, time

dt = datetime(2021, 12, 9, 11, 43, 23)
print("Day ", dt.day)
print("Hour", dt.hour)
print("Date", dt.date())

Day  9
Hour 11
Date 2021-12-09


In [4]:
print(dt)
dt

2021-12-09 11:43:23


datetime.datetime(2021, 12, 9, 11, 43, 23)

In [5]:
#format a datetime as a string
dt.strftime('%m/%d/%Y %H:%M') 

# %Y indicates 4 digit year and %y indicates 2 digit year
# %H indicates 24 hr clock, %I indicates 12 hr clock

'12/09/2021 11:43'

In [6]:
#parse string into datetime object
datetime.strptime('20211010', '%Y%m%d')

datetime.datetime(2021, 10, 10, 0, 0)

In [7]:
dt.replace(minute=33)

#datetime creates an immutable object.A new object is produced by this method

datetime.datetime(2021, 12, 9, 11, 33, 23)

In [8]:
dt # remains unchanged

datetime.datetime(2021, 12, 9, 11, 43, 23)

In [9]:
# Time difference

dt2 = dt.replace(minute=33)
delta = dt - dt2 
delta #the offset, datetime.timedelta type

datetime.timedelta(seconds=600)

### CSV

In [7]:
# We are going to be using a lot of CSV files
# though we will use Pandas mainly, it is handy to learn to use the csv module
import csv 

# magic command to maintain floating point precision up to two decimals
%precision 2 

with open('insurance.csv') as csvfile:
    my_csv = list(csv.DictReader(csvfile))

my_csv[:2]

[{'age': '19',
  'sex': 'female',
  'bmi': '27.9',
  'children': '0',
  'smoker': 'yes',
  'region': 'southwest',
  'charges': '16884.924'},
 {'age': '18',
  'sex': 'male',
  'bmi': '33.77',
  'children': '1',
  'smoker': 'no',
  'region': 'southeast',
  'charges': '1725.5523'}]

In [9]:
print("Number of rows in my csv file: ", len(my_csv))

print("Keys in the dict read from the csv file:")
my_csv[0].keys()

Number of rows in my csv file:  1338
Keys in the dict read from the csv file:


dict_keys(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'])

In [10]:
# Let's find the unique regions in this dataset 
# A set only contains unique values
region_set = set(reg['region'] for reg in my_csv)
region_set

{'northeast', 'northwest', 'southeast', 'southwest'}

### Numpy
For fast processing of data in the form of arrays (lists/lists of lists)

In [5]:
import numpy as np 

np_array = np.array(([1, 2], [7, 8]))
print("The array\n", np_array)
print("Its dimensions", np_array.ndim)
print("Its shape", np_array.shape)

The array
 [[1 2]
 [7 8]]
Its dimensions 2
Its shape (2, 2)


In [15]:
# some common numpy methods
# notice the argument formats
%precision 2

zeros_arr = np.zeros(4)
print(zeros_arr)
ones_arr = np.ones((2, 3))
print(ones_arr)
rand_arr = np.random.rand(4, 4)
print(rand_arr)

# let's print a boolean array to pass some condition
print(rand_arr > 0.3)

# another common method is for reshaping an array
reshaped_rand_arr = rand_arr.reshape(2, 2, 4) # pass the desired dimensions
print(reshaped_rand_arr)

[0. 0. 0. 0.]
[[1. 1. 1.]
 [1. 1. 1.]]
[[0.72 0.05 0.11 0.28]
 [0.09 0.58 0.42 0.57]
 [0.   0.09 0.13 0.47]
 [0.95 0.57 0.44 0.44]]
[[ True False False False]
 [False  True  True  True]
 [False False False  True]
 [ True  True  True  True]]
[[[0.72 0.05 0.11 0.28]
  [0.09 0.58 0.42 0.57]]

 [[0.   0.09 0.13 0.47]
  [0.95 0.57 0.44 0.44]]]


In [19]:
# a tiny thing to remember when it comes to array multiplications
array1 = np.ones((2, 2)) * 2
array2 = np.ones((2, 2)) * 3
print("Array 1\n", array1)
print("Array 2\n", array2)

print("Dot multiplication:\n", np.dot(array1, array2))
print("Elementwise multiplication:\n", array1*array2) 
# another way to perform element-wise miltiplication is 
# np.multiply(array1, array2)

Array 1
 [[2. 2.]
 [2. 2.]]
Array 2
 [[3. 3.]
 [3. 3.]]
Dot multiplication:
 [[12. 12.]
 [12. 12.]]
Elementwise multiplication:
 [[6. 6.]
 [6. 6.]]


In [32]:
# slicing an array is much like that of a list
array3 = np.array((([1, 1], [2, 2], [3, 3])))
print("Array 3\n", array3)
array3[:2, :1] # if thise slice is modified the original array will be changed too, as it's a reference

Array 3
 [[1 1]
 [2 2]
 [3 3]]


array([[1],
       [2]])