# Collections Module

### Counter
Counter is a dict subclass which helps count hashable objects. Inside of it elements are stored as dictionary keys and the counts of the objects are stored as the value.

In [1]:
from collections import Counter

In [2]:
lst = [1,2,2,2,2,3,3,3,1,2,1,12,3,2,32,1,21,1,223,1]

Counter(lst)

Counter({1: 6, 2: 6, 3: 4, 12: 1, 32: 1, 21: 1, 223: 1})

In [3]:
Counter('aabsbsbsbhshhbbsbs')

Counter({'b': 7, 's': 6, 'h': 3, 'a': 2})

In [4]:
s = 'How many times does each word show up in this sentence word times each each word'

words = s.split()

Counter(words)

Counter({'each': 3,
         'word': 3,
         'times': 2,
         'How': 1,
         'many': 1,
         'does': 1,
         'show': 1,
         'up': 1,
         'in': 1,
         'this': 1,
         'sentence': 1})

# Opening and Reading Files

In [5]:
f = open('practice.txt','w+') # create/open a file
f.write('test') # write in the file
f.close() # close file

In [6]:
# Getting Directories
import os

In [7]:
pwd

'c:\\Users\\Estudos\\OneDrive\\TECH\\Python\\Udemy'

In [8]:
os.getcwdb()

b'c:\\Users\\Estudos\\OneDrive\\TECH\\Python\\Udemy'

In [9]:
os.listdir() # In the current directory, what are the files inside it

['01 - Objects and Data Structures Assessment Test.ipynb',
 '02 - Statements (Guessing Game Challenge).ipynb',
 '02 - Statements Assessment Test.ipynb',
 '03 - Function Practice Exercises.ipynb',
 '03 - Functions and Methods Homework.ipynb',
 '04 - Exercises Warm Up Project.ipynb',
 '04 - Milestone Project 1 - TIC TAC TOE.ipynb',
 '05 - Object Oriented Programming Homework.ipynb',
 '06 - Object Oriented Programming Challenge.ipynb',
 '07 - Errors and Exceptions Homework.ipynb',
 '08 - Exercises 2 Warmup Project.ipynb',
 '08 - Milestone Project 2 - Blackjack.ipynb',
 '09 - Iterators and Generators Homework.ipynb',
 '10 - Advanced Python Modules.ipynb',
 'practice.txt',
 'unzip_me_for_instructions.zip']

In [10]:
os.listdir("C:\\Users")

['All Users',
 'Default',
 'Default User',
 'desktop.ini',
 'Estudos',
 'Nicolas Souza',
 'Public',
 'Todos os Usuários',
 'Usuário Padrão']

In [11]:
import shutil # Move files to different locations

In [12]:
#shutil.move('file', 'path')

# Datetime

In [13]:
import datetime

In [14]:
today = datetime.date.today()
print(today)
print('ctime:', today.ctime())
print('tuple:', today.timetuple())
print('ordinal:', today.toordinal())
print('Year :', today.year)
print('Month:', today.month)
print('Day  :', today.day)

2024-05-25
ctime: Sat May 25 00:00:00 2024
tuple: time.struct_time(tm_year=2024, tm_mon=5, tm_mday=25, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=146, tm_isdst=-1)
ordinal: 739031
Year : 2024
Month: 5
Day  : 25


In [15]:
t = datetime.time(5,25,1)
print(t)
print('hour  :', t.hour)
print('minute:', t.minute)
print('second:', t.second)
print('microsecond:', t.microsecond)
print('tzinfo:', t.tzinfo)

05:25:01
hour  : 5
minute: 25
second: 1
microsecond: 0
tzinfo: None


# Math

In [16]:
import math

In [17]:
value = 4.35
print(math.floor(value))
print(math.ceil(value))
print(round(value))

4
5
4


# Random

In [18]:
import random

In [19]:
mylist = list(range(0,20))

### Sample with Replacement
Take a sample size, allowing picking elements more than once. Imagine a bag of numbered lottery balls, you reach in to grab a random lotto ball, then after marking down the number, you place it back in the bag, then continue picking another one.

In [20]:
random.choices(population=mylist,k=10)

[2, 8, 7, 9, 19, 6, 4, 4, 4, 19]

### Sample without Replacement
Once an item has been randomly picked, it can't be picked again. Imagine a bag of numbered lottery balls, you reach in to grab a random lotto ball, then after marking down the number, you leave it out of the bag, then continue picking another one

In [21]:
random.sample(population=mylist,k=10)

[19, 15, 6, 1, 7, 17, 4, 11, 0, 14]

# Regular Expressions

In [22]:
import re

In [23]:
text = "The person's phone number is 408-555-1234. Call soon!"

In [24]:
pattern = 'phone'
re.search(pattern,text)

<re.Match object; span=(13, 18), match='phone'>

In [25]:
matches = re.findall("phone",text)
matches

['phone']

In [26]:
text2 = "My telephone number is 408-555-1234"
phone = re.search(r'\d\d\d-\d\d\d-\d\d\d\d',text2)
phone.group() # Inside group you can change like 1,2,3 to see other finds

'408-555-1234'

In [27]:
re.search(r'\d{3}-\d{3}-\d{4}',text2).group()

'408-555-1234'

### Starts with and Ends With
We can use the ^ to signal starts with, and the $ to signal ends with:

In [28]:
# Ends with a number
re.findall(r'\d$','This ends with a number 2')

['2']

In [29]:
# Starts with a number
re.findall(r'^\d','1a is the loneliest number.')

['1']

### Exclusion
To exclude characters, we can use the ^ symbol in conjunction with a set of brackets []. Anything inside the brackets is excluded. For example:

In [30]:
phrase = "there are 3 numbers 34 inside 5 this sentence."
re.findall(r'[^\d]+',phrase) # The + get the words together

['there are ', ' numbers ', ' inside ', ' this sentence.']

In [31]:
# Remove punctuation
test_phrase = 'This is a string! But it has punctuation. How can we remove it?'
re.findall('[^!.? ]+',test_phrase)
clean = ' '.join(re.findall('[^!.? ]+',test_phrase))
clean

'This is a string But it has punctuation How can we remove it'

## Timing your code

In [32]:
import time

In [33]:
def func_one(n):
    return [str(num) for num in range(n)]

In [34]:
start_time = time.time()
result = func_one(1000000)
end_time = time.time() - start_time
end_time

0.64198899269104

In [35]:
def func_two(n):
    return list(map(str,range(n)))

In [36]:
start_time = time.time()
result = func_two(1000000)
end_time = time.time() - start_time
end_time

0.5680272579193115

In [37]:
import timeit

In [38]:
%%timeit
func_one(1000)

477 µs ± 31 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [39]:
%%timeit
func_two(1000)

461 µs ± 30.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Unzipping and Zipping Files

In [40]:
import shutil

In [41]:
shutil.unpack_archive('unzip_me_for_instructions.zip','','','zip'

SyntaxError: incomplete input (3517079672.py, line 1)

In [None]:
with open("extracted_content\\Instructions.txt") as f:
    print(f.read())


Good work on unzipping the file!
You should now see 5 folders, each with a lot of random .txt files.
Within one of these text files is a telephone number formated ###-###-#### 
Use the Python os module and regular expressions to iterate through each file, open it, and search for a telephone number.
Good luck!


In [None]:
import re
pattern = r'\d{3}-\d{3}-\d{4}'

In [None]:
test_string = 'Here is a test phone number 123-123-4444'
re.findall(pattern, test_string)

['123-123-4444']

In [None]:
def search(file,pattern= r'\d{3}-\d{3}-\d{4}'):
    f = open(file,'r')
    text = f.read()
    
    if re.search(pattern,text):
        return re.search(pattern,text)
    else:
        return ''

In [None]:
import os

In [None]:
results = []
for folder , sub_folders , files in os.walk(os.getcwd()+"\\extracted_content"):
    
    for f in files:
        full_path = folder+'\\'+f
         
        results.append(search(full_path)) 

In [None]:
for r in results:
    if r != '':
        print(r.group())