# Python snippets 常用语法语句示例

# String 字符串

## 即使是从外部文件获取的字符串，%s的格式化也能使用

In [None]:
url = 'https://api.github.com/repos/%s/%s/issues'

print url%('solomonxie', 'gitissues')
print url%(config['username'], config['repos'][0])

# List 数组

### Superscript 将所有数字变成上标

In [6]:
content = '1第一句话 2第二句话 3第三句话'

numbers = [
    ['0','1','2','3','4','5','6','7','8','9'],
    ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']
]


for i in numbers[0]:
    content = content.replace(i, numbers[1][int(i)])

print(content)

¹第一句话 ²第二句话 ³第三句话


# Set 集合

## Set集的对比和运算

In [None]:
# I want to get all changes in the NEW, which should be [3.1, 4, 6, 7]
old = [1,2,3,4,5]           # 5 articles in total
new = [1,2,3.1,5,6,7,3.1]   # updated no.3, deleted no.4, anded no.6 and no.7

olds = set(old)
news = set(new)

print news - olds   # `only` get all added items
print olds - news   # `only` get all deleted items

print 'These need to be retrieved from internet: %s'%str(news-olds)
print 'These need to be deleted from local: %s'%str(olds-news)

# RegularExpression 正则表达式

In [3]:
import re
s = '![image](https://user-images.githubusercontent.com/14041622/40041663-65775634-5851-11e8-9382-e6de982dc496.png)'

pattern = re.compile(r'\!\[.+\]\((.+)\)')
result = pattern.findall(s)

print(result)

['https://user-images.githubusercontent.com/14041622/40041663-65775634-5851-11e8-9382-e6de982dc496.png']


# JSON数据

In [16]:
import json

with open('dataset/baidu-ocr-acurate.json', 'r') as f:
    data = json.loads(f.read())
    results = data['words_result']

line = results[0]
words = line['words']
loc_block = line['min_finegrained_vertexes_location']
loc_chars = line['finegrained_vertexes_location']

print(len(words))
print(len(loc_chars))

41
68


## JSON对象的对比和运算

In [19]:
j1 = {"a":"1","b":"2","c":"3","d":"4"}
j2 = {"d":"5","e":"6","f":"7","g":"8"}
arr = [j1,j2]

arr.remove(j1)
arr

[{'d': '5', 'e': '6', 'f': '7', 'g': '8'}]

## 不同类型的字符串list列表join()后出来的字符串格式

In [None]:
s1 = '\n'.join( ['你好','世界'] )
s1, type(s1)

In [None]:
s2 = '\n'.join( [u'你好',u'世界'] )
s2, type(s2)

In [None]:
s3 = '\n'.join( [u'你好','世界'] )
s3, type(s3)

# Exception 异常捕获

In [None]:
try:
    hello('hi')    # there's no such thing as hello()
except:
    print 'something wrong.'

In [None]:
try:
    hello('hi')
except Exception as e:     # try to catch the error and print
    print str(e), '\n'
    
    print 'e.message:\n' + e.message

In [None]:
try:
    hello('hi')
except:
    pass          # does anything run after "pass"? yes
    print 'something wrong.'

In [None]:
i=0
j=0
while i < 10: 
    try:
        i+=1
        hello('hi')
        j+=1
    except:
        continue    # continue loop works

print i, j

# Python sys 系统参数

## 获取脚本参数

In [None]:
import sys

# 输出文件名
print sys.argv

# 输出第一个参数: 文件路径
print sys.argv[1]

## 执行命令行获取输出内容

In [None]:
# 方法一：subprocess
import subprocess

p = subprocess.Popen(['git', 'push'], stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)
out, err = p.communicate()

print out
print err
# 结论：完全可以捕捉stdout和stderr

In [None]:
# 方法二：os.popen()
import os

with os.popen('git status') as p:
    print p.read()

# 结论：只能捕捉到stdout

In [None]:
# 方法二：os.popen()取巧
import os

with os.popen('git push 2>&1') as p:
    print p.read()

# 结论：由于2>&1合流，就可以同时捕捉到stdout和stderr了

# Python转图片为Base64编码

In [3]:
import base64

with open('dataset/pic.png', 'rb') as f:
    pic = f.read()

output = base64.b64encode(pic)

print(output[:20])

b'iVBORw0KGgoAAAANSUhE'


In [None]:
# 最简单的日志记录
import logging

logging.info("打印信息")
logging.error("出现了错误")
logging.warning("警告信息")

# 低于warning等级的info被忽略了

In [None]:
# 设置日志的等级（控制哪种信息会被记录）
import logging
 
logging.basicConfig(level=logging.INFO)
logging.info("打印信息")
logging.error("出现了错误")
logging.warning("警告信息")

# 不低于Info等级的信息都被记录了(Jupyter的问题没有显示出来)

In [None]:
# 保持到文件的日志记录
import logging
 
logging.basicConfig(level=logging.INFO,filename='test.log')
logging.error("出现了错误")
logging.info("打印信息")
logging.warning("警告信息")

# 文件被保存到了test.log文件 (Jupyter的问题没有显示出来)
!cat test.log

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger('example')

logger.error("出现了错误")
logger.info("打印信息")
logger.warning("警告信息")

# Python Logging 日志记录

# Python操作图片 PIL & Pillow

## 在图片上画框

In [1]:
from PIL import Image, ImageDraw

im = Image.open('./dataset/pic2.jpg')
draw = ImageDraw.Draw(im)

# start position
x, y = 1615, 739

# box dimensions
width, height = 2610, 92

line = 20

for i in range(1, line + 1):
    draw.rectangle((x +(line-i), y+(line-i), x+width+i, y+height+i), outline='red')

im.save("./dataset/pic2-drawed.jpg")
print('OK.')

OK.


# 文件路径操作 Path operation

In [1]:
from os import path

s1 = '/Users/me/movie'

In [2]:
path.realpath(s1)

'/Users/me/movie'

In [3]:
path.basename(s1)

'movie'

In [4]:
path.dirname(s1)

'/Users/me'

In [5]:
# It's so different with a / at the end
s2 = '/Users/me/movie/'

In [6]:
path.realpath(s2)

'/Users/me/movie'

In [7]:
path.basename(s2)

''

In [8]:
path.dirname(s2)

'/Users/me/movie'

In [9]:
# Now let's try files

In [10]:
s3 = '/Users/me/movie/abc.mp4'

In [11]:
path.basename(s3)

'abc.mp4'

In [12]:
path.splitext(s3)

('/Users/me/movie/abc', '.mp4')

In [13]:
# get file name without filetype
path.basename(path.splitext(s3)[0])

'abc'

## 获取当前脚本目录

In [None]:
import os
import sys

print os.getcwd()                     # 返回执行当前文件的命令行工作区

print sys.argv[0]                     # 返回当前脚本完整路径，比`__file__`可靠

print os.path.dirname(sys.argv[0])    # 返回当前脚本所在目录

# Python Sending Email 发送邮件 

## Sending Email with HTML format

In [None]:
import smtplib
from email.mime.text import MIMEText

# Settings of sender's server
host = 'smtp.aliyun.com'
sender = 'solomonxie@aliyun.com'
user = 'solomonxie@aliyun.com'
password = input('Please type your password: ')
to = ['solomonxie@outlook.com']

# Content of email
subject = 'Python send html email test'
with open('./dataset/out.html', 'r') as f:
    content = f.read()

# Settings of the email string
email = MIMEText(content,'html','utf-8')
email['Subject'] = subject
email['From'] = sender
email['To'] = to[0]
msg = email.as_string()

# Login the sender's server
print('Logging with server...')
smtpObj = smtplib.SMTP() 
smtpObj.connect(host, 25)
smtpObj.login(user, password)
print('Login successful.')

# Send email
smtpObj.sendmail(sender, to, msg) 
smtpObj.quit() 
print('Email has been sent')

## Sending email with attachments

In [None]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage

# Settings of sender's server
host = 'smtp.aliyun.com'
sender = 'solomonxie@aliyun.com'
user = 'solomonxie@aliyun.com'
password = input('Please type your password: ')
to = ['solomonxie@outlook.com']

# Make content of email
subject = 'Python send email with attachments'
with open('./dataset/out.html', 'r') as f:
    content = MIMEText(f.read(),'html','utf-8')
    content['Content-Type'] = 'text/html'
    print('Loaded content.')

# Make txt attachment
with open('./dataset/in.md', 'r') as f:
    txt = MIMEText(f.read(),'plain','utf-8')
    txt['Content-Type'] = 'application/octet-stream'
    txt['Content-Disposition'] = 'attachment;filename="in.md"'
    print('Loaded txt attachment file.')

# Make image attachment
with open('./dataset/pic.png', 'rb') as f:
    img = MIMEImage(f.read())
    img['Content-Type'] = 'application/octet-stream'
    img['Content-Disposition'] = 'attachment;filename="pic.png"'
    print('Loaded image attachment file.')

# Attach content & attachments to email
email = MIMEMultipart()
email.attach(content)
email.attach(txt)
email.attach(img)

# Settings of the email string
email['Subject'] = subject
email['From'] = sender
email['To'] = to[0]
msg = email.as_string()

# Login the sender's server
print('Logging with server...')
smtpObj = smtplib.SMTP() 
smtpObj.connect(host, 25)
smtpObj.login(user, password)
print('Login successful.')

# Send email
smtpObj.sendmail(sender, to, msg) 
smtpObj.quit() 
print('Email has been sent')

## Receive email (POP3)

In [None]:
import poplib

host = 'pop3.sina.com'
port_ssl = 993
email = 'xie_xiaobo_vip@sina.com'
password = input('Please type your password: ')

# Make instance of an email server
server = poplib.POP3_SSL(host, port_ssl)
server.set_debuglevel(2)

print(server.getwelcome())

# Log in
server.user(email)
server.pass_(password)

## Receive Email (IMAP)

In [None]:
import imaplib
import email

host = 'pop3.sina.com'
port_ssl = '993'
addr = 'xie_xiaobo_vip@sina.com'
password = input('Please type your password: ')

server = imaplib.IMAP4_SSL(port=port_ssl,host=host)
print('Connected with server.')

server.login(addr, password)
print('Logged in server.')

In [None]:
# Choose a Folder on server to download
status, data = server.select('INBOX')
print(status)
print(data)

status, data = server.search(None, 'ALL')
print(status)
print(data)

indeces = data[0].split()
print('Count of mails: %s'% len(indeces))

# resp, mails = server.fetch (indeces[len(indeces)-1],'(RFC822)')

for n in indeces:
    resp, mails = server.fetch(n, '(RFC822)')
    for m_string in mails[0]:
        mail = email.message_from_bytes(m_string)
        
        for part in mail.walk():
            print(part.get_content_maintype())
            print(part.get('Content-Disposition'))
            print(part.get_filename())

# Parse mail strings


# Quit
#server.logout()

# Python Convert Markdown to HTML 格式转换

In [None]:
import markdown

# convert from markdown string
#html = markdown.markdown('# Title', extensions=['markdown.extensions.extra'])

# Convert from .md file
html = markdown.markdownFromFile('./dataset/in.md', extensions=['markdown.extensions.extra'], output_format='html5')
print(html)

# Python 操作文件&文件夹

## os.mkdir对已有文件夹覆盖情况

In [None]:
import os
folder = './dataset/doc/src'

os.mkdir(folder)

with open('%s/test.txt'%folder, 'w') as f:
    f.write('你好')

## shutil打包文件夹

In [None]:
import shutil
shutil.make_archive(
        base_name= '/Volumes/SD/Downloads/gitissues2018',
        format   = 'zip',
        root_dir = '/Volumes/SD/Workspace/autobackup/solomonxie',
        base_dir = 'gitissues')

## os.walk遍历文件及文件夹

In [4]:
import os
folder = './dataset/'

for root, subdir, files in os.walk(folder):
    print(root, subdir)
    for filename in files:
        print(filename)


./dataset/ ['heibanke-crawler-quiz-lesson4-password-list_files']
issues-new.json
issues-old.json
sample-issue-list.json
py3-encoding-error-output.txt
py3-encoding-error-input.txt
py3-encoding-error-input-2.txt
._heibanke-crawler-quiz-lesson4-password-list_files
heibanke-crawler-quiz-lesson4-password-list.html
._heibanke-crawler-quiz-lesson4-password-list.html
in.md
pic.png
out.html
github.css
out2.html
._pic.jpg
._in.md
baidu-ocr-acurate.json
._baidu-ocr-acurate.json
pic.jpg
pic2-drawed.jpg
pic2.jpg
./dataset/heibanke-crawler-quiz-lesson4-password-list_files []
bootstrap.min.js
._bootstrap.min.js
jquery-ui.min.js
._jquery-ui.min.js
jquery-ui.min.css
._jquery-ui.min.css
jquery.js
._jquery.js
d3.min.js
._d3.min.js
bootstrap.min.css
._bootstrap.min.css


In [None]:
import os
folder = './dataset/'

# 指定深度范围
depth = 1
for root, subdir, files in os.walk(folder):
    if depth == 3:
        break
    print(root, subdir)
    for filename in files:
        print(filename)
    
    depth += 1


In [9]:
!tree ./dataset

./dataset
├── baidu-ocr-acurate.json
├── github.css
├── heibanke-crawler-quiz-lesson4-password-list.html
├── heibanke-crawler-quiz-lesson4-password-list_files
│   ├── bootstrap.min.css
│   ├── bootstrap.min.js
│   ├── d3.min.js
│   ├── jquery-ui.min.css
│   ├── jquery-ui.min.js
│   └── jquery.js
├── in.md
├── issues-new.json
├── issues-old.json
├── out.html
├── out2.html
├── pic.jpg
├── pic.png
├── pic2-drawed.jpg
├── pic2.jpg
├── py3-encoding-error-input-2.txt
├── py3-encoding-error-input.txt
├── py3-encoding-error-output.txt
└── sample-issue-list.json

1 directory, 22 files


# Python时间操作

## date & time

In [None]:
from datetime import date
# str( date.today() )
'%H'%(date.today())

In [None]:
import time
str( time.time() )

# Python Git

In [None]:
# 结论 ipython经常性无法导入外来包 即使已经安装好了且在别的地方可以正确导入
import git

# 直接用系统命令行执行
import os

cwd = os.getcwd() # get a path to a git repo, such as: here.

# 在别的目录执行git命令，需要加参数`-C <路径>`
os.system('git -C %s <> status'%cwd)

# Python requests

## Python `requests`检查访问状态

In [None]:
import requests

r = requests.get('http://www.baidu.com')

r.status_code

## Python `requests` 自带返回json数据的中文编码

In [None]:
import requests, json

r = requests.get('https://api.github.com/repos/solomonxie/starter-practice/issues')

print type(r.json()[0]['title'])
print type(json.loads(r.content)[0]['title'])

# 事实证明结果是一样的，所以还是用requests自带的划算

# Python3 Encoding Error 乱码处理

In [None]:
pin = './dataset/py3-encoding-error-input.txt'
pout = './dataset/py3-encoding-error-output.txt'

with open(pin, 'r', encoding='gbk') as f:
    content = f.read()
    print(content)
    
with open(pout, 'w') as f:
    f.write(content)

    
pin = './dataset/py3-encoding-error-input-2.txt'
with open(pin, 'r', encoding='utf-8') as f:
    content = f.read()
    print(content)

In [None]:
pin = './dataset/py3-encoding-error-input-2.txt'
pout = './dataset/py3-encoding-error-output-2.txt'

def smart_reading(path):
    """
    Try different encodings
    """
    charsets = ['utf-8', 'gbk','UTF-16LE']

    for enc in charsets:
        try:
            with open(path, 'r', encoding=enc) as f:
                return f.read()
        except UnicodeDecodeError:
            print('Failed loading with encoding [%s] for %s.'%(enc, path))
            continue

    return 'n/a'

smart_reading(pin)