# Data Types in Python

## 1 Fundamental Sequence Data Types

### Introduction and Lists

In [None]:
''' 
Lists are mutable, and their elements are usually homogeneous and are accessed by iterating over the list.
'''

In [1]:
# lists
cookies = ['chocolate chip', 'peanut butter', 'sugar']
print(cookies)
print(cookies[0])

# combine lists
cookies = cookies + ['gingersnap', 'shortbread']
print(cookies)

# append to list
cookies.append('biscotti')
print(cookies)

# using extend
more_cookies = ['cinnamon', 'chocolate']
cookies.extend(more_cookies)
print(cookies)

# insert into list
cookies.insert(0, 'biscotti')
print(cookies)

# remove from list
cookies.remove('biscotti')
print(cookies)

# pop from list
cookies.pop()
print(cookies)

# finding elements using index
print(cookies.index('sugar'))

# iterate over list
for cookie in cookies:
    print(cookie)

# sorting list
cookies.sort()
print(cookies)


['chocolate chip', 'peanut butter', 'sugar']
chocolate chip
['chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread']
['chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread', 'biscotti']
['chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread', 'biscotti', 'cinnamon', 'chocolate']
['biscotti', 'chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread', 'biscotti', 'cinnamon', 'chocolate']
['chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread', 'biscotti', 'cinnamon', 'chocolate']
['chocolate chip', 'peanut butter', 'sugar', 'gingersnap', 'shortbread', 'biscotti', 'cinnamon']
2
chocolate chip
peanut butter
sugar
gingersnap
shortbread
biscotti
cinnamon
['biscotti', 'chocolate chip', 'cinnamon', 'gingersnap', 'peanut butter', 'shortbread', 'sugar']


### Meet the tuples

In [2]:
''' Tuples are immutable, and usually contain a heterogeneous sequence of elements that are accessed via unpacking or indexing. '''
# tuples
cookies = ('chocolate chip', 'peanut butter', 'sugar')
print(cookies)

# unpacking
cookie1, cookie2, cookie3 = cookies

# zip creates a list of tuples by zipping two lists
us_cookies = ['chocolate chip', 'peanut butter', 'sugar']
uk_cookies = ['hobnob', 'digestive', 'rich tea']
cookies = list(zip(us_cookies, uk_cookies))
print(cookies)

# enumerate provides the index and value of an iterable as a tuple
for index, cookie in enumerate(cookies):
    print(index, cookie)

# tuples are made using (), zip(), or enumerate()

('chocolate chip', 'peanut butter', 'sugar')
[('chocolate chip', 'hobnob'), ('peanut butter', 'digestive'), ('sugar', 'rich tea')]
0 ('chocolate chip', 'hobnob')
1 ('peanut butter', 'digestive')
2 ('sugar', 'rich tea')


### Strings

In [4]:
# Creating formatted strings
cookie_name = 'chocolate chip'
cookie_price = 2

# old way
print('The %s cookie costs %d dollars.' % (cookie_name, cookie_price))

# new way - python 3.6+ 
print('The {} cookie costs {} dollars.'.format(cookie_name, cookie_price))

# f-string - python 3.6+ but looks cleaner and better
print(f'The {cookie_name} cookie costs {cookie_price} dollars.')

The chocolate chip cookie costs 2 dollars.
The chocolate chip cookie costs 2 dollars.
The chocolate chip cookie costs 2 dollars.


In [9]:
# join strings
cookies = ['chocolate chip', 'peanut butter', 'sugar']
print(', '.join(cookies))

# matching parts of a string
boy_names = ['james', 'john', 'robert']
print([name for name in boy_names if name.startswith('j')])

# Searching for things in strings
'long' in 'Life is a long journey', 'long'  in 'Life is a Long journey'

chocolate chip, peanut butter, sugar
['james', 'john']


(True, False)

In [7]:
# an approach to being case insensitive
'long' in 'Life is a long journey'.lower()

True

## 2 Dictionaries - The Root of Python

In [11]:
# Dictionaries are mutable, unordered collections of key-value pairs. {} or dict() can be used to create dictionaries.

art_galleries = {}
for name, zip_code in [('Gagosian', '10007'), ('David Zwirner', '10011')]:
    art_galleries[name] = zip_code
print(art_galleries)

# safely finding the dictionary key
print(art_galleries.get('Pace', 'No gallery found'))

{'Gagosian': '10007', 'David Zwirner': '10011'}
No gallery found


### Altering Dictonaries

In [13]:
# update dictionary
art_galleries.update({'Pace': '10025'})
print(art_galleries)

# pop from dictionary
art_galleries.pop('Pace')
print(art_galleries)

# delete from dictionary
del art_galleries['David Zwirner']
print(art_galleries)

{'Gagosian': '10007', 'David Zwirner': '10011', 'Pace': '10025'}
{'Gagosian': '10007', 'David Zwirner': '10011'}
{'Gagosian': '10007'}


### Pythonically using dictionaries

In [None]:
# Working with dictionaries more pythonically
art_galleries = {'Gagosian': '10007', 'David Zwirner': '10011'}
for name, zip_code in art_galleries.items():
    print(name, zip_code)

# in operator
print('Gagosian' in art_galleries)

### Mixed data types in dictionaries

In [14]:
# Working with nested dictionaries
art_galleries = {'Gagosian': {'zip': '10007', 'neighborhood': 'Tribeca'}, 'David Zwirner': {'zip': '10011', 'neighborhood': 'Chelsea'}}
print(art_galleries['Gagosian']['neighborhood'])

Tribeca


## 3 Numeric Data Types, Booleans, and Sets

### Built-in Numeric Types

In [21]:
# Integer - whole numbers and Large numbers
# Float - decimal numbers and scientific notation
int(123456789123456789), float(123456789123456789)

(123456789123456789, 1.2345678912345678e+17)

In [24]:
# Decimal - floating point arithmetic, Excat decimal representation, Currency calculations
from decimal import Decimal
Decimal('10.1') + Decimal('0.2')

# printing Floats
print(0.000001)
print(f"{0.0000011:f}")
print(f"{0.0000001:.7f}")

1e-06
0.000001
0.0000001


In [25]:
# Python division types
print(5/2, 5//2) # float division

2.5 2


In [15]:
float1 = 0.0001
float2 = 1e-05
float3 = 1e-07

# Print floats 1, 2, and 3
print(float1)
print(float2)
print(float3)

# Print floats 2 and 3 using the f string formatter
print(f"{float2:f}")
print(f"{float3:f}")

# Print float 3 with a 7 f string precision
print(f"{float3:.7f}")

0.0001
1e-05
1e-07
0.000010
0.000000
0.0000001


In [16]:
# Print the result of 2/1 and 1/2
print(2/1)
print(1/2)

# Print the floored division result of 2//1 and 1//2
import math
print(math.floor(2//1))
print(math.floor(1//2))

# Print the type of 2/1 and 2//1
print(type(2/1))
print(type(2//1))

2.0
0.5
2
0
<class 'float'>
<class 'int'>


### Booleans - the logical data type

In [26]:
# Booleans as a data type
out_of_cookies = True
if out_of_cookies:
    print('Get more cookies!')

Get more cookies!


In [19]:
# Boolean Data Type
out_of_cookies = True
if out_of_cookies:
    print('Get more cookies!')

# Truthy and Falsy
print(bool(0))
print(bool(1))
# any non-zero number is truthy, any zero number is falsy

# Boolean evaluation
print(bool(''))
print(bool('I have cookies'))


Get more cookies!
False
True
False
True


In [28]:
# Floats are approximately an issue
print(0.1 + 0.1 + 0.1 == 0.3) # False because of floating point arithmetic
print(0.1 + 0.1 + 0.1)


False
0.30000000000000004


In [20]:
# Create an empty list
my_list = []

# Check the truthiness of my_list
print(bool(my_list))

# Append the string 'cookies' to my_list
my_list.append('cookies')

# Check the truthiness of my_list
print(bool(my_list))

False
True


#### Sets (unordered data with optimized logic operations)

In [31]:
# Set is unique and unordered collection of elements, mutable, and can be created using set() or {}

# Create a set of cookie flavors
cookie_flavors = {'chocolate chip', 'peanut butter', 'chocolate chip', 'oatmeal'}
print(cookie_flavors)

# modify set
cookie_flavors.add('shortbread')
print(cookie_flavors)

# updating set
more_flavors = {'chocolate chip', 'mint chocolate chip'}
cookie_flavors.update(more_flavors)
print(cookie_flavors)

# remove from set
cookie_flavors.remove('mint chocolate chip')
print(cookie_flavors)
# pop from set
cookie_flavors.pop()
print(cookie_flavors)

# union and intersection
us_cookies = {'chocolate chip', 'oatmeal', 'sugar'}
uk_cookies = {'hobnob', 'digestive', 'rich tea'}
print(us_cookies.union(uk_cookies))
print(us_cookies.intersection(uk_cookies))

# set difference
print(us_cookies.difference(uk_cookies))


{'chocolate chip', 'oatmeal', 'peanut butter'}
{'shortbread', 'chocolate chip', 'oatmeal', 'peanut butter'}
{'mint chocolate chip', 'chocolate chip', 'oatmeal', 'peanut butter', 'shortbread'}
{'chocolate chip', 'oatmeal', 'peanut butter', 'shortbread'}
{'oatmeal', 'peanut butter', 'shortbread'}
{'sugar', 'chocolate chip', 'oatmeal', 'rich tea', 'digestive', 'hobnob'}
set()
{'chocolate chip', 'sugar', 'oatmeal'}


## 4 Advanced Data Types

### Counting made easy

In [33]:
# collections module
# Counter
from collections import Counter
cookie_counts = Counter(['chocolate chip', 'peanut butter', 'chocolate chip', 'oatmeal'])
print(cookie_counts)

cookie_counts.update(['chocolate chip', 'shortbread'])
print(cookie_counts)

# most common
print(cookie_counts.most_common(1))


Counter({'chocolate chip': 2, 'peanut butter': 1, 'oatmeal': 1})
Counter({'chocolate chip': 3, 'peanut butter': 1, 'oatmeal': 1, 'shortbread': 1})
[('chocolate chip', 3)]


### Dictionaries of unknown structure - defaultdict

In [35]:
### Dictionary Handling
# defaultdict
from collections import defaultdict
cookie_counts = defaultdict(int)
cookie_counts['chocolate chip'] += 1
print(cookie_counts)

defaultdict(<class 'int'>, {'chocolate chip': 1})


### Named Tuple

In [37]:
# Whats a namedtuple
''' A named tuple is a tuple where each position has a name. '''

from collections import namedtuple
Cookie = namedtuple('Cookie', ['name', 'quantity'])
cookie = Cookie('chocolate chip', 1)
print(cookie.name)
print(cookie.quantity)

chocolate chip
1


### Dataclasses

In [40]:
# Why user Dataclasses
# Dataclasses are a way to create classes that primarily store data. They provide a concise way to create classes that mainly store attributes without any methods.
# Dataclasses are mutable, and their attributes are accessed using dot notation.
# Dataclasses are created using the @dataclass decorator.
# They are easy to convert to tuple or dictionary.
# Frozen dataclasses are immutable.

from dataclasses import dataclass
from decimal import Decimal
@dataclass
class Cookie:
    name: str
    cost: Decimal
    quantity: int

    @property
    def total_cost(self):
        return self.cost * self.quantity

cookie = Cookie('chocolate chip', Decimal("1.20"), 1)
print(cookie.name)
print(cookie.quantity)
print(cookie.total_cost)


chocolate chip
1
1.20


In [46]:
# Frozen Dataclasses
@dataclass(frozen=True)
class Cookie:
    name: str
    cost: Decimal
    quantity: int

    @property
    def total_cost(self):
        return self.cost * self.quantity

cookie = Cookie('chocolate chip', Decimal("1.20"), 1)
print(cookie.name)
print(cookie.quantity)
print(cookie.total_cost)

# error check
from dataclasses import FrozenInstanceError
try:
    cookie.quantity = 2
except FrozenInstanceError as e:
    print(e) # FrozenInstanceError

chocolate chip
1
1.20
cannot assign to field 'quantity'
