In [280]:
import numpy as np
import pandas as pd

## List

In [281]:
# Create a list
a = [1, 2, 3]

In [282]:
# Shorthand of print(a)
a

[1, 2, 3]

In [283]:
# Remove element from list
a.remove(2)

In [284]:
a

[1, 3]

In [285]:
# Show object properties
dir(a)

['__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [286]:
## Show help
a.append?

In [287]:
# Append an element
a.append(5)

In [288]:
a

[1, 3, 5]

In [289]:
# Concatenation
b = [3, 4]

In [290]:
c = a + b

In [291]:
c

[1, 3, 5, 3, 4]

In [292]:
# Short hand of
# a = a + [100, 200]
a += [100, 200]

In [293]:
a

[1, 3, 5, 100, 200]

In [294]:
# Reverse list
list(reversed(a))

[200, 100, 5, 3, 1]

In [295]:
# Reverse using indexing
a[::-1]

[200, 100, 5, 3, 1]

In [296]:
# Check length
len(a)

5

## String

In [297]:
s = 'abcdefg'

In [298]:
len(s)

7

In [299]:
s[::-1]

'gfedcba'

In [300]:
s.upper()

'ABCDEFG'

In [301]:
s.upper().lower()

'abcdefg'

In [302]:
s.capitalize()

'Abcdefg'

## Dictionary

In [307]:
d = {'a': 1, 'b': 2}
d

{'a': 1, 'b': 2}

In [None]:
d = {'a': 1, 'b': 2}

In [308]:
## Loop over keys, not values
for dd in d:
    print(dd)

a
b


In [309]:
## Access values
for key in d:
    print(d[key])

1
2


In [312]:
# KeyError when key is absent
d['c']

KeyError: 'c'

In [313]:
# Do not raise when key is absent
result = d.get('c')
result is None

True

In [317]:
# Giving a default value other than None
result = d.get('c', 100)
result

100

## Set

In [318]:
s1 = set([1, 2, 3])

In [319]:
s2 = set([3, 4, 5])

In [320]:
len(s1)

3

In [323]:
s3 = set([1, 1, 2, 2, 3])

In [324]:
s3

{1, 2, 3}

In [325]:
## Intersection
s1 & s2

{3}

In [326]:
## Union
s1 | s2

{1, 2, 3, 4, 5}

## Tuple

In [332]:
# Create a tuple
b = (1, 2)
b

(1, 2)

In [333]:
# Comma separated expressions without parenthesis also create a tuple
1, 2

(1, 2)

In [334]:
# Tuple is immutable (cannot be modified)
b.append(5)

AttributeError: 'tuple' object has no attribute 'append'

In [335]:
# Only immutable objects can become key of dictionary
{b: 100}

{(1, 2): 100}

In [336]:
{[1, 2]: 100}

TypeError: unhashable type: 'list'

In [337]:
# Function arguments are tuple
def func(*args):
    print(type(args))

func()

<class 'tuple'>


## f-string

In [338]:
num = 12.3456
num

12.3456

In [339]:
f'number = {num:.2f}'

'number = 12.35'

In [340]:
f'number = {num:.10f}'  # Show 10 digits below floating point

'number = 12.3456000000'

In [341]:
f'number = {num:10.2f}' # Add spaces so that the total length is 10 chars

'number =      12.35'

In [342]:
num = 123

In [343]:
f'number = {num:5d}'  

'number =   123'

In [344]:
f'number = {num:05d}'  # Pad with zeros so that the total lengh is 5 chars

'number = 00123'

In [345]:
# A useful case
for i in range(10):
    print(f'file_{i:05}.txt')

file_00000.txt
file_00001.txt
file_00002.txt
file_00003.txt
file_00004.txt
file_00005.txt
file_00006.txt
file_00007.txt
file_00008.txt
file_00009.txt


## Range

In [352]:
## range function creates a range object
range(10)

range(0, 10)

In [353]:
## list function converts it to a list
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [354]:
for i in range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [355]:
# Step 1
list(range(10, 20, 1))

[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [356]:
# Step 2
list(range(10, 20, 2))

[10, 12, 14, 16, 18]

## Enumeration in for loop

In [357]:
for i, x in enumerate(list('abcd')):
    print(f'i = {i}, x = {x}')

i = 0, x = a
i = 1, x = b
i = 2, x = c
i = 3, x = d


## Ternary operator

In [358]:
x = 1
a = 'foo' if x > 3 else 'bar'
a

'bar'

In [359]:
1, 2

(1, 2)

## List comprehension

In [360]:
x1 = [1, 2, 3]
x2 = [a ** 10 for a in x1]
x2

[1, 1024, 59049]

In [361]:
x1 = ['a', 'b', 'c']
x2 = [x + '_X' for x in x1]
x2

['a_X', 'b_X', 'c_X']

In [362]:
x2 = []
for x in x1:
    x2.append(x + '_X')
x2

['a_X', 'b_X', 'c_X']


## File I/O

In [383]:
# Open a file with write mode and writes to it
text = '''a,b,c
1,2,3
4,5,6
'''
print(text)
with open('test.csv', 'w') as f:
    f.write(text)

a,b,c
1,2,3
4,5,6



In [364]:
# Equivalent code with the above, but it is always better practice to use with-clause
f = open('test.csv', 'w')
f.write(text)
f.close()  # You might forget to close the file object and the content is not saved, or causing locks

In [365]:
# Check if file is actually there
!ls test.csv

test.csv


In [366]:
pd.read_csv('test.csv')

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [367]:
# Read file
with open('test.csv', 'r') as f:
    text = f.read()
print(text)

a,b,c
1,2,3
4,5,6



In [368]:
# file object is properly closed
f.closed

True

In [369]:
f = open('test.csv', 'r')

In [370]:
# Not closed!
f.closed

False

In [371]:
f.close()
f.closed

True

In [372]:
# text has newlines
text[:-1]

'a,b,c\n1,2,3\n4,5,6'

## Debug

In [373]:
a = 0; b = 1
b / a

ZeroDivisionError: division by zero

In [376]:
# Do not forget to quit, otherwise the cell is stuck
%debug

> [0;32m/home/mknz/dev/ds-tutorials/env/lib/python3.9/site-packages/pandas/core/ops/array_ops.py[0m(165)[0;36m_na_arithmetic_op[0;34m()[0m
[0;32m    163 [0;31m[0;34m[0m[0m
[0m[0;32m    164 [0;31m    [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 165 [0;31m        [0mresult[0m [0;34m=[0m [0mfunc[0m[0;34m([0m[0mleft[0m[0;34m,[0m [0mright[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    166 [0;31m    [0;32mexcept[0m [0mTypeError[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    167 [0;31m        [0;32mif[0m [0;32mnot[0m [0mis_cmp[0m [0;32mand[0m [0;34m([0m[0mis_object_dtype[0m[0;34m([0m[0mleft[0m[0;34m.[0m[0mdtype[0m[0;34m)[0m [0;32mor[0m [0mis_object_dtype[0m[0;34m([0m[0mright[0m[0;34m)[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> quit


In [375]:
# Sometimes you want to dive into the code where the error happened, and inspect variables there
s =  pd.Series([1, 2, 3])
s + 'a'

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('int64'), dtype('<U1')) -> None

## Exercise

1. Create a list starting from 1 to 30 with 1 offset
2. Reverse the list and subtract 5 from each element and print it

In [None]:
## step 1
x1 = ...

In [None]:
## step 2
x2 = ...
print(x2)

## Class

In [377]:
class MyClass:
    
    def __init__(self, a):
        self.a = a
        
    def show(self):
        print(self.a)

In [378]:
# You need first instantiate class to use the method
MyClass.show()

TypeError: show() missing 1 required positional argument: 'self'

In [379]:
myclass1 = MyClass(10)
myclass2 = MyClass(20)

In [380]:
myclass1.show()
myclass2.show()

10
20


In [381]:
# Type of a class (MyClass) is type
type(MyClass)

type

In [382]:
# Type of myclass is MyClass
type(myclass)

__main__.MyClass