# Python basics

### List comprehension

In [1]:
l1 = ['a', 'b', 'c']
l2 = ['d', 'e', 'a']

l = [i+j for i in l1 for j in l2 if not(i==j)]
l

['ad', 'ae', 'bd', 'be', 'ba', 'cd', 'ce', 'ca']

In [2]:
l1 = ['a1', 'a2', 'a3']
l2 = ['b1', 'b2', 'b3']

for a, b in zip(l1, l2):
    print(a, b)

a1 b1
a2 b2
a3 b3


### lambda

In [5]:
f = lambda x, y: x+y

In [4]:
def f(x, y):
    return x+y

In [6]:
print(f(1, 4))

5


### map

In [7]:
l = [1, 2, 3, 4, 5]
f = lambda x: x**2
list(map(f, l))

# map(function, sequence)
# sequence형 데이터에 mapping을 해줌

[1, 4, 9, 16, 25]

### 하지만 map은 사용 권장 ㄴㄴ

In [8]:
def f(x):
    return x**2

[f(x) for x in l]

[1, 4, 9, 16, 25]

In [9]:
list(map(lambda x: x**2 if x%2==0 else x, l))

[1, 4, 3, 16, 5]

In [10]:
[x**2 if x%2==0 else x for x in l]

[1, 4, 3, 16, 5]

### Asterisk stuff

In [11]:
# function passing arguments

# variable-length arguments
# 입력된 값은 tuple type

def args_test(a, b, *args):
    return a + b + sum(args)

print(args_test(1, 2, 3, 4, 5))

15


In [12]:
# keyword variable-length arguments
# 입력된 값은 dict type

def kwargs_test(one, two, *args, **kwargs):
    print(args)
    print(kwargs)
    print(kwargs['first'])
    
kwargs_test(1, 2, 3, 4, 5, first=6, second=7, third=8)

(3, 4, 5)
{'first': 6, 'second': 7, 'third': 8}
6


In [13]:
# asterisk unpacking

def unpacking_1(a, *args):
    print(a, args)
    print(type(args))
    
unpacking_1(1, *(2, 3, 4, 5, 6))

1 (2, 3, 4, 5, 6)
<class 'tuple'>


In [14]:
def unpacking_2(a, b, c, d):
    print(a, b, c, d)
    
data = {'b': 1, 'c': 2, 'd': 3}

unpacking_2(10, **data)

10 1 2 3


# OOP

In [15]:
# attribute(variable) and action(method)

# class: blueprint, instance: realization

# attribute 추가하려면 __init__

# method에 self를 추가해야 class의 함수로 인정됨

class SoccerPlayer(object):
    def __init__(self, name: str, back_num: int):
        self.name = name
        self.back_num = back_num
        
    def __str__(self):
        return f'name: {self.name}, back number: {self.back_num}'
    
    def change_back_num(self, new_num):
        self.back_num = new_num

son = SoccerPlayer('Son', 7)

print(son)

son.change_back_num(10)
print(son)

son.back_num = 20
print(son)

name: Son, back number: 7
name: Son, back number: 10
name: Son, back number: 20


In [21]:
# Inheritance
# 부모 클래스로부터 attribute와 method를 물려받음

class Person(object):
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def about_me(self):
        print(f'name: {self.name}, age: {self.age}')
        
class Employee(Person):
    def __init__(self, name, age, salary):
        super().__init__(name, age) # 부모 객체 사용
        self.salary = salary
        
    def about_me(self):
        super().about_me()
        print(f'salary: {self.salary}')

e1 = Employee('Lee', 22, 200)

e1.about_me()

name: Lee, age: 22
salary: 200


# File handling

### read, readlines, readline

In [37]:
# read()

f = open('a.txt', 'r')
l = f.read() # returns everything within the file as a single string
print(len(l))
print(l)
f.close()

20
aaaa
bbbb

cccc
dddd


In [38]:
# with 

with open('a.txt', 'r') as f:
    l = f.read()
    print(len(l))
    print(l)

20
aaaa
bbbb

cccc
dddd


In [39]:
# readlines()

with open('a.txt', 'r') as f:
    l = f.readlines() # all data into a list
    print(len(l))
    print(l)

5
['aaaa\n', 'bbbb\n', '\n', 'cccc\n', 'dddd']


In [48]:
# readline()

with open('a.txt', 'r') as f:
    for i in range(5):
        l = f.readline().strip() # single line separated by newline
        print(len(l))
        print(l)

4
aaaa
4
bbbb
0

4
cccc
4
dddd


In [53]:
with open('a.txt', 'r') as f:
    for l in f.readlines():
        l = l.strip()
        print(len(l))
        print(l)

4
aaaa
4
bbbb
0

4
cccc
4
dddd


In [58]:
with open('b.txt', 'w', encoding='utf8') as f:
    for i in range(10):
        f.write(str(i)+'\n')

### directory

In [143]:
import os

os.mkdir('d')

In [144]:
try:
    os.mkdir('d')
except FileExistsError as e:
    print('File already exists')

File already exists


In [145]:
os.path.exists('d')

True

In [146]:
os.path.isdir('d')

True

In [147]:
import shutil

source = 'b.txt'
dest = os.path.join('.', 'c.txt')
shutil.copy(source, dest)

'./c.txt'

In [149]:
import pathlib

cwd = pathlib.Path.cwd()
cwd

In [69]:
cwd.parent

### pickle

In [150]:
import pickle

l = [1, 2, 3, 4, 5]

with open('d/pickle_example.pickle', 'wb') as f:
    pickle.dump(l, f)

In [151]:
with open('d/pickle_example.pickle', 'rb') as f:
    p = pickle.load(f)

print(p)

[1, 2, 3, 4, 5]


# Data handling

### csv

In [166]:
header_flag = True
data_header = []
customer_list = []

with open('d/csv_example.csv') as f:
    while True:
        data = f.readline()
        
        if not data:
            break
        
        if header_flag:
            data_header = data.split(',')
        
        else:
            customer_list.append(data.split(','))
        
        header_flag = False
        
print(data_header)
for i in range(5):
    print(customer_list[i])
print(len(customer_list))

['customerNumber', 'customerName', 'contactLastName', 'contactFirstName', 'phone', 'addressLine1', 'addressLine2', 'city', 'state', 'postalCode', 'country', 'salesRepEmployeeNumber', 'creditLimit\n']
['103', '"Atelier graphique"', 'Schmitt', '"Carine "', '40.32.2555', '"54', ' rue Royale"', 'NULL', 'Nantes', 'NULL', '44000', 'France', '1370', '21000\n']
['112', '"Signal Gift Stores"', 'King', 'Jean', '7025551838', '"8489 Strong St."', 'NULL', '"Las Vegas"', 'NV', '83030', 'USA', '1166', '71800\n']
['114', '"Australian Collectors', ' Co."', 'Ferguson', 'Peter', '"03 9520 4555"', '"636 St Kilda Road"', '"Level 3"', 'Melbourne', 'Victoria', '3004', 'Australia', '1611', '117300\n']
['119', '"La Rochelle Gifts"', 'Labrune', '"Janine "', '40.67.8555', '"67', ' rue des Cinquante Otages"', 'NULL', 'Nantes', 'NULL', '44000', 'France', '1370', '118200\n']
['121', '"Baane Mini Imports"', 'Bergulfsen', '"Jonas "', '"07-98 9555"', '"Erling Skakkes gate 78"', 'NULL', 'Stavern', 'NULL', '4110', 'Norw

In [167]:
customer_USA_only_list = []

with open('d/csv_example.csv', 'r') as f:
    while True:
        data = f.readline()
        
        if not data:
            break
        
        else:
            if data.split(',')[10]=='USA':
                customer_USA_only_list.append(data.split(','))
                
for i in range(5):
    print(customer_USA_only_list[i])
print(len(customer_USA_only_list))

with open('d/csv_w_example.csv', 'w') as f:
    for customer in customer_USA_only_list:
        f.write(','.join(customer))

['112', '"Signal Gift Stores"', 'King', 'Jean', '7025551838', '"8489 Strong St."', 'NULL', '"Las Vegas"', 'NV', '83030', 'USA', '1166', '71800\n']
['124', '"Mini Gifts Distributors Ltd."', 'Nelson', 'Susan', '4155551450', '"5677 Strong St."', 'NULL', '"San Rafael"', 'CA', '97562', 'USA', '1165', '210500\n']
['129', '"Mini Wheels Co."', 'Murphy', 'Julie', '6505555787', '"5557 North Pendale Street"', 'NULL', '"San Francisco"', 'CA', '94217', 'USA', '1165', '64600\n']
['131', '"Land of Toys Inc."', 'Lee', 'Kwai', '2125557818', '"897 Long Airport Avenue"', 'NULL', 'NYC', 'NY', '10022', 'USA', '1323', '114900\n']
['151', '"Muscle Machine Inc"', 'Young', 'Jeff', '2125557413', '"4092 Furth Circle"', '"Suite 400"', 'NYC', 'NY', '10022', 'USA', '1286', '138500\n']
34


### html

In [78]:
import re
import urllib.request

url = 'http://goo.gl/U7mSQl'
html = urllib.request.urlopen(url)
html_contents = str(html.read())
# regular expression
id_results = re.findall(r'([A-Za-z0-9]+\*\*\*)', html_contents)

id_results

['codo***',
 'outb7***',
 'dubba4***',
 'multicuspi***',
 'crownm***',
 'triformo***',
 'spania***',
 'magazin***',
 'presby***',
 'trophody***',
 'nontr***',
 'enranck***',
 'canc***',
 'uncanker***',
 'wrymo***',
 'non***',
 'luminat***',
 'oblig***',
 'anna***',
 'hyperth***',
 'toplabl***',
 'dolce0***',
 'rudals2***',
 'jjw980***',
 'elvlz***',
 'skmid***',
 'qkep***',
 'kisslov***',
 'maskman***',
 'sungt***']

In [169]:
url = 'http://finance.naver.com/item/main.nhn?code=005930'
html = urllib.request.urlopen(url)
html_contents = str(html.read().decode('ms949'))

# <dl class="blind"> 에서 시작해서 </dl> 사이에 아무 글자나
stock_results = re.findall('(\<dl class=\"blind\"\>)([\s\S]+?)(\<\/dl\>)', html_contents)
samsung_stock = stock_results[0]
samsung_index = samsung_stock[1]
# <dd> 에서 시작해서 </dd> 사이에 아무 글자나
index_list = re.findall('(\<dd\>)([\s\S]+?)(\<\/dd\>)', samsung_index)

for index in index_list:
    print(index[1])

2022년 02월 03일 16시 11분 기준 장마감
종목명 삼성전자
종목코드 005930 코스피
현재가 73,300 전일대비 보합 0  0.00 퍼센트
전일가 73,300
시가 74,900
고가 74,900
상한가 95,200
저가 73,300
하한가 51,400
거래량 17,400,508
거래대금 1,289,052백만


### xml

In [171]:
from bs4 import BeautifulSoup

with open('d/xml_example.xml', 'r', encoding='utf8') as f:
    books_xml = f.read()

soup = BeautifulSoup(books_xml, 'html')

for book_info in soup.find_all('author'):
    print(book_info)
    print(book_info.get_text())

<author>Carson</author>
Carson
<author>Sungchul</author>
Sungchul


### json

In [154]:
import json

with open('d/json_example.json', 'r', encoding='utf8') as f:
    contents = f.read()
    json_data = json.loads(contents)
    print(json_data["employees"])

[{'firstName': 'John', 'lastName': 'Doe'}, {'firstName': 'Anna', 'lastName': 'Smith'}, {'firstName': 'Peter', 'lastName': 'Jones'}]
