# Chapter 3: Built-In Data Structures, Functions, and Files

## 3.1 Data Structures and Sequences

### Tuple

In [1]:
tup = (1, 2, 3)
type(tup)

tuple

In [3]:
tup = 4, 5, 6
type(tup)

tuple

In [68]:
list_no = [3, 6, 9]
tuple(list_no)

(3, 6, 9)

In [10]:
tup_str = tuple("hello_world")

In [12]:
tup_str[:4]

('h', 'e', 'l', 'l')

In [14]:
nested_tup = (4, 5, 6), (7, 8)
nested_tup

((4, 5, 6), (7, 8))

In [15]:
nested_tup[0]

(4, 5, 6)

In [16]:
nested_tup[1]

(7, 8)

In [17]:
tup = tuple(["foo", [1, 2], True])
tup[2] = False

TypeError: 'tuple' object does not support item assignment

In [20]:
tup[1].append(3)
tup

('foo', [1, 2, 3, 3], True)

In [21]:
tup + (False, [7, 8]) + ("bar",)

('foo', [1, 2, 3, 3], True, False, [7, 8], 'bar')

In [25]:
tup * 3

('foo',
 [1, 2, 3, 3],
 True,
 'foo',
 [1, 2, 3, 3],
 True,
 'foo',
 [1, 2, 3, 3],
 True)

In [26]:
tup = [1, 2, 3]
a, b, c = tup

In [28]:
b

2

In [29]:
# swaping variable names

a, b = 1, 2
a, b = b, a

In [36]:
# A common use of variable unpacking is iterating over sequences of tuples or lists:

seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

for a, b, c in seq:
    print(f"a={a}, b={b}, c={c}")

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [53]:
# "pluck" a few elements from the beginning

values = 1, 2, 3, 4, 5, 6
a, b, *rest = values

In [54]:
a, b

(1, 2)

In [55]:
rest

[3, 4, 5, 6]

In [57]:
# tuple methods:

a = (1, 2, 2, 2, 3, 4, 2)

a.count(2)

4

In [59]:
a.index(3)

4

### List

In [72]:
a_list = [2, 3, 4, None]

tup = ("wu", "shang", "clan")

b_list = list(tup)

In [77]:
# Mutable

b_list[1] = "chang"

In [78]:
b_list

['wu', 'chang', 'clan']

In [82]:
# The list built-in function is frequently used in data processing as a way to materialize an iterator or generator expression:

gen = range(10)
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [84]:
b_list.append("teri")

In [89]:
b_list

['wu', 'chang', 'clan', 'tera', 'baap']

In [86]:
b_list.insert(4, "baap")

In [88]:
b_list[3] = "tera"

In [91]:
b_list.pop(4)

'baap'

In [92]:
b_list

['wu', 'chang', 'clan', 'tera']

In [93]:
b_list.remove("tera")

In [94]:
b_list

['wu', 'chang', 'clan']

In [98]:
# slower than dict and sets

"chang" in b_list

True

In [99]:
"chang" not in b_list

False

In [100]:
b_list.extend([7, 14, (49, 56)])

In [114]:
b_list

['chang', 'clan', 'wu']

In [None]:
# preferable than concatenating (everything = everything + chunk)

everything = []
for chunk in list_of_lists:
    everything.extend(chunk)

In [137]:
a = [7, 21, 49, 35, 56, 14, 28, 42]

a.sort()
a

[7, 14, 21, 28, 35, 42, 49, 56]

In [117]:
# sorts a collection of strings by their lengths

b_list.sort(key=len)

In [118]:
b_list

['wu', 'clan', 'chang']

In [128]:
# list[start:stop]

a[1:5]

[14, 21, 28, 35]

In [133]:
a[3:5] = [63, 70]

In [138]:
a

[7, 14, 21, 28, 35, 42, 49, 56]

In [140]:
a[1:8:2]

[14, 28, 42, 56]

In [154]:
# starts from 5th idx

a[5:]

[42, 49, 56]

In [155]:
# ends at 3rd idx

a[:3]

[7, 14, 21]

In [161]:
a[-4:-1]

[35, 42, 49]

In [164]:
a[:-1]

[7, 14, 21, 28, 35, 42, 49]

In [166]:
a[1::2]

[14, 28, 42, 56]

In [169]:
# reversing the list

a[::-1]

[56, 49, 42, 35, 28, 21, 14, 7]

### Dictionary

In [170]:
empty_dict = {}

In [174]:
d1 = {"a": "some value", "b": [1, 2, 3]}
d1

{'a': 'some value', 'b': [1, 2, 3]}

In [175]:
# same as list for mutating and accessing

d1[7] = "an integer"

In [177]:
d1["c"] = "a string"

In [180]:
d1["b"]

[1, 2, 3]

In [185]:
# same as list or tuple for checking a value(key) in dict

"c" in d1

True

In [194]:
# deleting a key value pair

d1[5] = "some value"
d1["dummy"] = "another value"

In [195]:
d1

{'a': 'some value',
 'b': [1, 2, 3],
 7: 'an integer',
 'c': 'a string',
 5: 'some value',
 'dummy': 'another value'}

In [196]:
del d1[5]

In [197]:
d1.pop("dummy")

'another value'

In [199]:
list(d1.keys())

['a', 'b', 7, 'c']

In [201]:
list(d1.values())

['some value', [1, 2, 3], 'an integer', 'a string']

In [204]:
list(d1.items())

('b', [1, 2, 3])

In [205]:
list(d1.items())[2]

(7, 'an integer')

In [206]:
d1.update({"wu": "shang clan", "d": "an alphabet"})

In [207]:
d1

{'a': 'some value',
 'b': [1, 2, 3],
 7: 'an integer',
 'c': 'a string',
 'wu': 'shang clan',
 'd': 'an alphabet'}

In [None]:
# two sequences(lists) that you want to pair up element-wise in a dictionary

# og way:
for key, value in zip(key_list, value_list):
    mapping[key] = value

In [217]:
# dict way:
tuples = zip(range(5), reversed(range(5)))
tuples

<zip at 0x1a6f3cda640>

In [218]:
# the dict function accepts a list of 2-tuples:

dict(tuples)

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

In [228]:
# categorizing a list of words by their first letters as a dictionary of lists:

words = ["apple", "bat", "bar", "atom", "book", "ass"]

by_letter = {}

for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)

In [229]:
by_letter

{'a': ['apple', 'atom', 'ass'], 'b': ['bat', 'bar', 'book']}

In [235]:
# collections module way:
from collections import defaultdict

by_letter = defaultdict(list)

for word in words:
    by_letter[word[0]].append(word)

In [238]:
by_letter

defaultdict(list, {'a': ['apple', 'atom', 'ass'], 'b': ['bat', 'bar', 'book']})

In [239]:
# hash function:

hash("string")

-7291658651132774742

In [241]:
hash((1, 2, 3, (4, 5)))

-6766371705598827417

In [242]:
hash((1, 2, 3, [4, 5]))

TypeError: unhashable type: 'list'

In [243]:
# making list as a key:

d = {}

d[tuple([1, 2, 3])] = 4

In [244]:
d

{(1, 2, 3): 4}

### Set

In [1]:
# One way

set([1, 2, 3, 3, 2, 4, 4, 5, 6, 6])

{1, 2, 3, 4, 5, 6}

In [2]:
# Another way

{2, 2, 1, 2, 3, 5, 4, 5}

{1, 2, 3, 4, 5}

In [18]:
# The union of these two sets is the set of distinct elements occurring in either set:

a = {1, 2, 3, 4, 5, 6}
b = {3, 4, 5, 6, 7, 8}

In [7]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [9]:
# Another way

a | b

{1, 2, 3, 4, 5, 6, 7, 8}

In [10]:
# elements occurring in both sets:

a.intersection(b)

{3, 4, 5, 6}

In [11]:
a & b

{3, 4, 5, 6}

In [16]:
a.intersection_update(b)

In [17]:
a

{3, 4, 5, 6}

In [19]:
c = a.copy()

In [26]:
# union
c |= b

In [22]:
c

{1, 2, 3, 4, 5, 6, 7, 8}

In [27]:
# intersection
d = a.copy()
d &= b

In [28]:
d

{3, 4, 5, 6}

In [34]:
# converting a list into tuple for storing it in a set:

my_list = [1, 2, 3, 4]

my_set = {tuple(my_list)}

In [46]:
a_set = {1, 2, 3, 4, 5}

In [47]:
# is contained in or not:

{1, 2, 3}.issubset(a_set)

True

In [48]:
# contains all the elements or not:

a_set.issuperset({1, 2, 3})

True

In [53]:
# Sets are equal if and only if their contents are equal:

{1, 2, 3, 2} == {3, 2, 1, 1}

True

### Built-In Sequence Functions

#### enumerate

In [58]:
# when you want to keep track of the index along with the values (index, values):

collection = [0, 7, 14, 21, 28, 35, 42, 49]

for index, value in enumerate(collection):
    print(index, value)

0 0
1 7
2 14
3 21
4 28
5 35
6 42
7 49


#### sorted

In [59]:
# The sorted function returns a new sorted list from the elements of any sequence:

sorted("race_car")

['_', 'a', 'a', 'c', 'c', 'e', 'r', 'r']

In [60]:
sorted([11, 55, 22, 44, 33])

[11, 22, 33, 44, 55]

#### zip

In [70]:
# pairs up the elements of a sequence into a tuple:

seq1 = [1, 2, 3, 4, 5]
seq2 = ["one", "two", "three", "four", "five"]
seq3 = ["I", "II", "III", "IV", "V"]
seq4 = ["ek", "do", "teen", "chaar"]

zipped = zip(seq1, seq2, seq3)

In [71]:
list(zipped)

[(1, 'one', 'I'),
 (2, 'two', 'II'),
 (3, 'three', 'III'),
 (4, 'four', 'IV'),
 (5, 'five', 'V')]

In [72]:
# The result is determined by the shortest sequence:

list(zip(seq1, seq2, seq3, seq4))

[(1, 'one', 'I', 'ek'),
 (2, 'two', 'II', 'do'),
 (3, 'three', 'III', 'teen'),
 (4, 'four', 'IV', 'chaar')]

In [75]:
# Iterating over multiple sequences with enumerate:

for index, (a,b,c,d) in enumerate(zip(seq1, seq2, seq3, seq4)):
    print(f"{index}: {a, b, c, d}")

0: (1, 'one', 'I', 'ek')
1: (2, 'two', 'II', 'do')
2: (3, 'three', 'III', 'teen')
3: (4, 'four', 'IV', 'chaar')


#### reversed

In [85]:
# Iterates over the elements of a sequence in reverse order:

list(reversed(range(2, 21, 2)))

[20, 18, 16, 14, 12, 10, 8, 6, 4, 2]

### List, Set, and Dictionary Comprehensions

In [105]:
strings = ["a", "as", "bat", "car", "dove", "python"]

[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [20]:
# List of squares from 1 to 10:

[x*x for x in range(1, 11)]

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [18]:
# List of even numbers from 0 to 20:

[x for x in range(21) if x % 2 == 0]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

In [23]:
# Convert to uppercase:

words = ["hello", "world", 'python']

[x.upper() for x in words]

['HELLO', 'WORLD', 'PYTHON']

In [27]:
# Extract digits from a string:

text = "a1b2c3d4"

[x for x in text if x.isdigit()]

['1', '2', '3', '4']

In [46]:
# Get lengths of each word in a sentence:

sentence = "List comprehensions are powerful"

[len(x) for x in sentence.split()]

[4, 14, 3, 8]

In [55]:
# Filter out vowels from a string:

string = "beaUtiFul"

vowels = "aeiou"

[x for x in string if x.lower() not in vowels]

['b', 't', 'F', 'l']

In [58]:
# Flatten a matrix using list comprehension:

matrix = [[1, 2], [3, 4], [5, 6]]

[x for sublist in matrix for x in sublist]

[1, 2, 3, 4, 5, 6]

In [69]:
# Numbers from 1 to 100 that are divisible by 3 and 5:

[x for x in range(1, 101) if x % 3 == 0 and x % 5 == 0]

[15, 30, 45, 60, 75, 90]

In [85]:
# Get words that start with a vowel:

words = ["apple", "banana", "orange", "umbrella", "grape"]

[x for x in words if x[0].lower() in vowels]

['apple', 'orange', 'umbrella']

In [93]:
# Create a dictionary using list comprehension:

items = ['a', 'b', 'c']

{x: x.upper() for x in items}

{'a': 'A', 'b': 'B', 'c': 'C'}

In [103]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

# Creating flattened list:
numbers = [x for numbers in some_tuples for x in numbers]

# Creating nested_list:
[[x for x in numbers] for numbers in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [104]:
# Extracting names starting with vowels from a nested list:

data = [["John", "Emily", "Michael", "Mary", "Steven"],
       ["Maria", "Juan", "Javier", "Natalia", "Pilar"]]

[name for names in data for name in names if name[0].lower() in vowels]

['Emily']

## 3.2 Functions

In [106]:
def add(x, y):
    return x + y

In [107]:
add(34, 35)

69

In [108]:
def func_without_return(x):
    print(x)

In [109]:
func_without_return("hello_world")

hello_world


In [110]:
print(func_without_return("hello_world"))

hello_world
None


### Positional arguments and Keyword arguments

In [111]:
def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

In [118]:
my_function(12, 13, z=1.2)

30.0

In [119]:
my_function(14, 23, 0.5)

0.013513513513513514

In [120]:
my_function(34, 35)

103.5

### Namespaces

In [6]:
# The local namespace is created when the function is called and is immediately populated by the function’s arguments. 
# After the function is finished, the local namespace is destroyed.

def func():
    a = []
    for i in range(5):
        a.append(i)

func()

In [20]:
# Each call to func will modify list a if it's outside the function:

a = []

def func():
    for i in range(5):
        a.append(i)

func()

func()

a

[0, 1, 2, 3, 4, 0, 1, 2, 3, 4]

### Returning Multiple Values

In [33]:
def f():
    a = 14
    b = 28
    c = 42
    return a, b, c

In [28]:
x, y, z = f()
x, y, z

(14, 28, 42)

In [29]:
return_value = f()
return_value

(14, 28, 42)

In [30]:
# Alternative in a dict form:

def f():
    a = 7
    b = 14
    c = 21
    return {"a": a, "b": b, "c": c}

In [31]:
f()

{'a': 7, 'b': 14, 'c': 21}

### Functions Are Objects

In [35]:
states = ["   Alabama ", "Georgia!", "Georgia", "georgia", "FlOrIda", "south   carolina##", "West virginia?"]

In [82]:
def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip(" [!#?]")     # removes whitespaces from both ends
        value = value.title()             # capitalizes first letter
        result.append(value)
    return result

In [83]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

### Anonymous (Lambda) Functions

In [99]:
add_ten = lambda x: x + 10

add_ten(59)

69

In [98]:
multiply_nos = lambda x, y: x * y

multiply_nos(10,20)

200

In [100]:
square = lambda x: x*x

square(7)

49

In [111]:
nums = [1, 2, 3, 4, 5, 6, 7, 8]

list(filter(lambda x: x % 2 == 0, nums))

[2, 4, 6, 8]

In [129]:
pairs = [(1, 3), (3, 2), (5, 1)]

pairs.sort(key=lambda x: x[1])
pairs

[(5, 1), (3, 2), (1, 3)]

In [131]:
celsius = [0, 10, 20, 30]

list(map(lambda c: c*(9/5) + 32, celsius))

[32.0, 50.0, 68.0, 86.0]

In [133]:
s = "ab1c3d4"

list(filter(lambda x: x.isdigit(), s))

['1', '3', '4']

In [135]:
words = ['madam', 'racecar', 'python', 'noon']

list(filter(lambda x: x == x[::-1], words))

['madam', 'racecar', 'noon']

In [156]:
students = [
    {"name": "Alice", "score": 88},
    {"name": "Bob", "score": 95},
    {"name": "Charlie", "score": 91}
]

max(students, key=lambda x: x["score"])

{'name': 'Bob', 'score': 95}

In [158]:
odd_even = lambda x: "Even" if x % 2 == 0 else "Odd"

In [161]:
odd_even(11)

'Odd'

In [163]:
strings = ["foo", "card", "bar", "aaaa", "abab"]

strings.sort(key=lambda x: len(set(x)))

In [164]:
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

### Generators

#### iter()

In [14]:
# Iterating over a dictionary yields the dictionary keys:

some_dict = {"a": 1, "b": 2, "c": 3}

for key in some_dict:
    print(key)

a
b
c


In [15]:
# The Python interpreter to creates an iterator out of some_dict

list(iter(some_dict))

['a', 'b', 'c']

In [20]:
list(iter("hello_world"))

['h', 'e', 'l', 'l', 'o', '_', 'w', 'o', 'r', 'l', 'd']

#### Generator expressions

In [41]:
# To create one, enclose what would otherwise be a list comprehension within parentheses instead of brackets:
# Generator expressions can be used instead of list comprehensions

list(x*x for x in range(1, 11))

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [39]:
dict((x, x*x*x) for x in range(1, 21))

{1: 1,
 2: 8,
 3: 27,
 4: 64,
 5: 125,
 6: 216,
 7: 343,
 8: 512,
 9: 729,
 10: 1000,
 11: 1331,
 12: 1728,
 13: 2197,
 14: 2744,
 15: 3375,
 16: 4096,
 17: 4913,
 18: 5832,
 19: 6859,
 20: 8000}

#### itertools module

In [61]:
import itertools

names = ["Alan", "Adam", "Wes", "Will", "Albert", "Steven", "Jyoti", "Jatin", "Sachin"]
names.sort()

for letter, names in itertools.groupby(names, key=lambda x: x[0]):
    print(f"{letter}: {list(names)}")     # names is the generator

A: ['Adam', 'Alan', 'Albert']
J: ['Jatin', 'Jyoti']
S: ['Sachin', 'Steven']
W: ['Wes', 'Will']


### Errors and Exception Handling

In [78]:
# Returns the input if it's a string without the ValueError
# You might want to suppress only ValueError, since a TypeError might indicate a legitimate bug in your program.
# The code in the except part will only be executed if float(x) raises an exception:

def attempt_float(x):
    try:
        return float(x)
    except ValueError:     # can use multiple exceptions with using parentheses (ValueError, TypeError)
        return x

In [68]:
attempt_float("21")

21.0

In [76]:
attempt_float("hello_world")

'hello_world'

In [75]:
attempt_float((1,2))

TypeError: float() argument must be a string or a real number, not 'tuple'

In [None]:
# When you want to suppress an exception whether or not try: succeeds, use finally:

f = open(path, mode="w")

try:
    write_to_file(f)
finally:
    f.close()     # file f will always get closed

In [None]:
# When you want try: to succeed, use it with else:

f = open(path, mode="w")

try:
    write_to_file(f)
except:
    print("failed")
else:
    print("succeeded")
finally:
    f.close()

## 3.3 Files and the Operating System

In [96]:
f = open("project_report.txt", encoding="utf-8")

In [98]:
for line in f:
    print(line)

f.close()

In [99]:
# An EOL-free list of lines in a file:

[x.rstrip() for x in open("project_report.txt", encoding="utf-8")]

f.close()

In [1]:
# This will automatically close the file f when exiting with block:

with open("project_report.txt", encoding="utf-8") as f:
    lines = [line.rstrip() for line in f]

In [2]:
# Commonly used methods:

f1 = open("star_wars.txt")

f2 = open("star_wars.txt", mode="rb")

In [3]:
# The "read" method advances the file object position by the number of bytes read. "tell" gives you the current pointer position:

f1.read()

'may the force be with you'

In [227]:
f2.read()

b'may \r\nthe \r\nforce \r\nbe \r\nwith \r\nyou'

In [6]:
f1.tell()

10

In [228]:
f2.tell()

35

In [229]:
# Checking the default encoder:

import sys

sys.getdefaultencoding()

'utf-8'

In [5]:
# "seek" changes the file position to the indicated byte in the file:

f1.seek(10)

10

In [244]:
f1.read(2)

'\nf'

In [245]:
f1.tell()

13

In [7]:
# Closing the files:

f1.close()
f2.close()

In [256]:
# Writing:

with open("new_file.txt", "w") as handle:
    handle.writelines(x for x in open("star_wars.txt") if len(x) > 1)

with open("new_file.txt") as f:
    lines = f.readlines()

lines

['may \n', 'the \n', 'force \n', 'be \n', 'with \n', 'you']

### Bytes and Unicode with Files

In [270]:
# The default behavior for Python files is text mode (Python string i.e. the Unicode):
# This contrasts with binary mode, which you can obtain by appending b to the file mode:

with open("star_wars.txt") as f:
    chars = f.read(25)

len(chars)

25

In [272]:
chars

'may the force be with you'

In [279]:
# "rb" mode reads requests that exact number of bytes:

with open("star_wars.txt", "rb") as f:
    data = f.read(25)

data

b'may the force be with you'

In [286]:
# decoding the bytes to a str object (possible only if each of the encoded Unicode characters is fully formed):

data.decode("utf-8")

'may the force be with you'

In [289]:
data[8:13].decode("utf-8")

'force'

In [304]:
f = open("star_wars.txt")

In [305]:
# Beware using seek when opening files in any mode other than binary:

f.read(5)

'may t'

In [306]:
f.seek(4)

4

In [307]:
f.read(1)

't'