/
unicode_basics.py
51 lines (40 loc) · 1.26 KB
/
unicode_basics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#-*- coding: utf-8-*-
#!!!we talk only about Python2 in these examples!!!
# two types that deal with text:
#---->str
#---->unicode
#for example:
str_type = 'some text'
print type(str_type)
unicode_type = u'some text'
print type(unicode_type)
#print them
print str_type
print unicode_type
#are these equal?
'some text' == u'some text'
###############################################################
#decode bytes
print 'decoded bytes from ascii:', 'some text'.decode('ascii')
print 'decoded bytes from utf-8:', 'some text'.decode('utf-8')
print 'decoded bytes from utf-8:', u'some text: ÅÅÅ'.encode('utf-8')
#this works
print 'some str stuff'
#this works too
print u'some unicode stuff: ÅÅÅÅ'
###############################################################
#writing to a file requires bytes
with open ('test', 'w') as f:
f.write('ÄÄÄÄ')
f.write(u'ÖÖÖ'.encode('utf-8'))
#text read from the file is in bytes -> str
with open('test', 'r') as f:
res = f.readline()
print 'type:', type(res)
unicode_res = res.decode('utf-8')
print unicode_res.encode('utf-8'), type(unicode_res)
#writing to a file without encoding it
#error, because print tries to encode into 'ascii'
#with open('test', 'a+') as f:
#f.write(u'ÅÅÅÅ')
print u'ÅÅÅÅ'.encode('utf-8')