# Data Manipulation with Python

Dates and times with Python

In [3]:
import datetime as dt
import time as tm

In [4]:
tm.time()

1686010368.4746933

In [5]:
dtnow = dt.datetime.fromtimestamp(tm.time())
dtnow

datetime.datetime(2023, 6, 6, 7, 13, 18, 747761)

In [8]:
dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second, dtnow.microsecond

(2023, 6, 6, 7, 13, 18, 747761)

In [15]:
delta = dt.timedelta(days=1,weeks=4)
delta

datetime.timedelta(days=29)

In [12]:
today = dt.date.today()
today

datetime.date(2023, 6, 6)

In [16]:
today+delta

datetime.date(2023, 7, 5)

Advanced Python Objects, map(), lambda and List Comprehension

In [4]:
class Bag:
    brand = "Degrey" # this will be the variable that all instances share together (Class Attribute)
    # Methods
    def __init__(self, name, stuff): #once you instantiate an object, that object has full access to all attributes and methods of that class
        self.name = name
        self.stuff = stuff
    def set_name(self, new_name):
        self.name = new_name
    def set_stuffs(self, new_stuffs):
        self.stuff = self.stuff + new_stuffs

In [5]:
bag1 = Bag("hello", ["pens", "ruler", "calculator"])

In [6]:
bag1.name

'hello'

In [7]:
bag1.set_name("world")

In [8]:
bag1.set_stuffs(["eraser", "books"])

In [13]:
# Functional Programming with map()
store1 = [1, 2, 3, 9, 8]
store2 = [1, 6, 9, 2, 1]
cheapest = list(map(min, store1, store2))
cheapest

[1, 2, 3, 2, 1]

In [15]:
# lambda is a function with no name. In lambda, there is only 1 expression to be evaluated => return a function reference
my_function = lambda a,b,c : a+b
my_function(1,2,3)

3

In [16]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]

#option 1
for person in people:
    print(split_title_and_name(person) == (lambda x: x.split()[0] + ' ' + x.split()[-1])(person))

#option 2
list(map(split_title_and_name, people)) == list(map(lambda person: person.split()[0] + ' ' + person.split()[-1], people))

True
True
True
True


True

In [17]:
li_com = [number for number in range(100) if number % 7 == 0]
li_com

[0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98]

In [18]:
def times_tables():
    lst = []
    for i in range(10):
        for j in range (10):
            lst.append(i*j)
    return lst

times_tables() == [i*j for i in range(10) for j in range(10)]

True

In [20]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'

answer = [str(a+b+c+d) for a in lowercase for b in lowercase for c in digits for d in digits]

In [22]:
answer[:10]

['aa00',
 'aa01',
 'aa02',
 'aa03',
 'aa04',
 'aa05',
 'aa06',
 'aa07',
 'aa08',
 'aa09']

# Numerical Python Library (Numpy)

## Array Creation

In [1]:
import numpy as np
import math as m

In [2]:
a = np.array([[1,2,3],[4,5,6]])
print(a)
print(a.ndim) # number of dimension is the number of rows

[[1 2 3]
 [4 5 6]]
2


In [3]:
print(a.shape)
print(a.dtype)

(2, 3)
int32


In [4]:
# If we know the shape of array we want to create. These built-in functions will provide you with the random fillers
b = np.ones((2,3))
print(b)
c = np.zeros((2,3))
print(c)

print(c+b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]


In [5]:
np.random.rand(2,3)

array([[0.82457959, 0.26463149, 0.6675414 ],
       [0.29660439, 0.13215296, 0.44560319]])

In [6]:
# arange() function
ara = np.arange(1,10,2)
ara

array([1, 3, 5, 7, 9])

In [7]:
# linspace() function => create an array with length == c from a to b
np.linspace(0,2,15)

array([0.        , 0.14285714, 0.28571429, 0.42857143, 0.57142857,
       0.71428571, 0.85714286, 1.        , 1.14285714, 1.28571429,
       1.42857143, 1.57142857, 1.71428571, 1.85714286, 2.        ])

## Array Operations

In [8]:
# Elementwise manipulation
x = np.array([10,20,30,40])
y = np.array([1,2,3,4])
print(x-y)
print(x+y)
print(x/y)
print(x*y)

[ 9 18 27 36]
[11 22 33 44]
[10. 10. 10. 10.]
[ 10  40  90 160]


In [9]:
# Boolean array
x > 10

array([False,  True,  True,  True])

In [10]:
# Matrix manipulation
X = np.array([[1,2],[3,4]])
Y = np.array([[5,6],[7,8]])
print(X*Y) # elementwise product
print(X@Y) # matrix product. We can also use the dot() function

[[ 5 12]
 [21 32]]
[[19 22]
 [43 50]]


In [14]:
X.dtype.name

'int32'

In [15]:
# Aggregation function like sum() max() min() mean()
print(X.sum())
print(X.max())
print(X.min())
print(X.mean())

10
4
1
2.5


In [29]:
# reshape() function
v = np.arange(1,21,1).reshape(5,4)
print(v.astype(float))

u = np.full(v.shape,1111)
print(u.astype(str))

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]
 [13. 14. 15. 16.]
 [17. 18. 19. 20.]]
[['1111' '1111' '1111' '1111']
 ['1111' '1111' '1111' '1111']
 ['1111' '1111' '1111' '1111']
 ['1111' '1111' '1111' '1111']
 ['1111' '1111' '1111' '1111']]


In [31]:
# indexing - slicing and iterating
v[:2,:4]

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [17]:
# Rememeber to use the copy() function to create a newly seperated array in avoidance of affecting the original one
X_new = X[1]
X_new[0] = 9
print(X)
print(X_new)

[[1 2]
 [9 4]]
[9 4]


In [19]:
X_new = X.copy()
X_new[1,0] = 777
print(X)
print(X_new)

[[1 2]
 [9 4]]
[[  1   2]
 [777   4]]


In [34]:
X_indexed = [X[0,0], X[1,1], X[0,1]]
X_indexed  

[1, 4, 2]

In [37]:
# Boolean indexing 
print(X >= 2)
print(X[X>=2])

[[False  True]
 [ True  True]]
[2 9 4]


In [39]:
slie = np.array([[1,2,3,4],[5,6,7,8]])
slie[:2, 1:3]

array([[2, 3],
       [6, 7]])

In [41]:
slie[:, 1:4]

array([[2, 3, 4],
       [6, 7, 8]])

# Regular Expression for Pattern Matching

## Common RegEx built-in functions

In [1]:
# importing the re modules for using regular expressions in python
import re

In [3]:
text = "This is a very good day"
# search() method checks for a match at anywhere in the string and return a boolena
if re.search("good", text):
    print("Wonderful!")
else:
    print("Atlas :(")

Wonderful!


In [4]:
# There are other methods that check for conditionals by segmenting the string => tokenizing: string is devided into substrings based on the patterns => NLP
text = "I'm Tien. Tien live positively. Tien is learning new things. Tien is proud of him"
# split use patterns to create substrings from a string
re.split("Tien", text)

["I'm ",
 '. ',
 ' live positively. ',
 ' is learning new things. ',
 ' is proud of him']

In [5]:
# findall() output all occurences of the substring in the string
re.findall("Tien", text)

['Tien', 'Tien', 'Tien', 'Tien']

In [22]:
# Anchors: ^, $. Clustering with ()
 
# search() ~ match()
re.search("^.+(Tien)", text)
re.match("^.+(Tien)", text)

<re.Match object; span=(0, 65), match="I'm Tien. Tien live positively. Tien is learning >

## Patterns and Character Classes

In [49]:
grades = "AA ABBCC DDAC BC"
re.findall('[AB]', grades)

['A', 'A', 'A', 'B', 'B', 'A', 'B']

In [50]:
print(re.findall('(A)(B|C)', grades))
print(re.findall('[A][BC]', grades))
print(re.findall('AB|AC', grades))

[('A', 'B'), ('A', 'C')]
['AB', 'AC']
['AB', 'AC']


In [51]:
# using ^ for negative
print(re.findall('^A+', grades)) # => first 
print(re.findall('[^A]', grades)) # => not there

['AA']
[' ', 'B', 'B', 'C', 'C', ' ', 'D', 'D', 'C', ' ', 'B', 'C']


In [52]:
print(re.match('^[^A]', grades))

None


## Quantifiers

In [55]:
re.findall('A{2,}', grades)

['AA']

In [71]:
# * => 0 or more
# + => 1 or more
# ? => 0 or 1
text = '128njjfoaabc9184jjdjn8972pacnu22qnx'
re.findall("\d+[a-z].+?", text)

['128nj', '9184jj', '8972pa', '22qn']

## Groups

In [73]:
# In RegEx, we do groupings with () to specify the patterns that we want to output
re.findall("(\d+)([a-z]).+?", text)

[('128', 'n'), ('9184', 'j'), ('8972', 'p'), ('22', 'q')]

In [78]:
# group() function
# finditer() with return a list of match patterns
for item in re.finditer("(\d+)([a-z]).+?", text):
    print(item.groups())
    print(item.group(2))

('128', 'n')
n
('9184', 'j')
j
('8972', 'p')
p
('22', 'q')
q


In [79]:
# The syntax (?P<name>) to form a diction with name-value pairs
for item in re.finditer("(?P<digits>\d+)(?P<letter>[a-z]).+?", text):
    print(item.groupdict())

{'digits': '128', 'letter': 'n'}
{'digits': '9184', 'letter': 'j'}
{'digits': '8972', 'letter': 'p'}
{'digits': '22', 'letter': 'q'}


In [88]:
# The ?= syntax will help us to look for that pattern (capture) but not let them occur on the output
for item in re.finditer("(\d+)(?=[a-z].+?)", text):
    print(item)

<re.Match object; span=(0, 3), match='128'>
<re.Match object; span=(12, 16), match='9184'>
<re.Match object; span=(21, 25), match='8972'>
<re.Match object; span=(30, 32), match='22'>
