# 集合set
- set是一个无序且不重复的元素集合
- 因为集合本身是无序的，不可以为集合创建索引或执行切片(slice)操作

In [1]:
# 创建方法
s = set() 
print(s)

set()


In [5]:
a=set('boys like football')
print(a)

{'o', 'y', 'k', 'i', 's', 'a', ' ', 'f', 'e', 't', 'l', 'b'}


In [3]:
d={'k1','k2','k2'}
print(d)

{'k2', 'k1'}


In [4]:
b=set(['y', 'b', 'o','o'])
print(b)

{'y', 'o', 'b'}


## 集合基本操作

In [14]:
d.add('add1')
d

{' ', 'a', 'add1', 'b', 'e', 'f', 'i', 'k', 'k2', 'l', 'o', 's', 't', 'y'}

In [15]:
d.update(a)
print(d)

{'o', 'add1', 'y', 'k', 'i', 's', 'k2', 'l', 'b', 'a', ' ', 'f', 'e', 't'}


In [12]:
d.discard('22')
d.discard('k1')
print(d)

{'o', 'add1', 'y', 'k', 'i', 's', 'k2', 'l', 'b', 'a', ' ', 'f', 'e', 't'}


In [13]:
d.remove('22')

KeyError: '22'

## 集合间运算

In [52]:
s1 = {1,2,3,4,5}
s2 = {4,5,6,6,7}
s3 = {2,3,4}
print('s1|s2:', s1|s2)
print('s1&s2:', s1&s2)
print('s1-s2:', s1-s2)
print('s2-s1:', s2-s1)

s1|s2: {1, 2, 3, 4, 5, 6, 7}
s1&s2: {4, 5}
s1-s2: {1, 2, 3}
s2-s1: {6, 7}


In [54]:
# 规律是什么？
s1 = {1,2,3,4,5}
s2 = {4,5,6,6,7}
s3 = {2,3,4}
print(s2>s1)
print(s2<s1)
print(s3>s1)
print(s3<s1)

False
False
False
True


## 类型转换
> 各种类型间基本都可以相互转换

In [55]:
li = list(s1)
tu = tuple(s1)
st = str(s1)
print(li,type(li))
print(tu,type(tu))
print(st,type(st))

[1, 2, 3, 4, 5] <class 'list'>
(1, 2, 3, 4, 5) <class 'tuple'>
{1, 2, 3, 4, 5} <class 'str'>


## query speed comparing: set vs list 

In [1]:
ilist = [i for i in range(1000000)]
iset = set(ilist)
print(len(ilist), len(iset))

1000000 1000000


In [2]:
%%time
r1 = []
for i in range(-1000,1000):
    r1.append(i in ilist)
print(r1[:10],r1[-10:])

[False, False, False, False, False, False, False, False, False, False] [True, True, True, True, True, True, True, True, True, True]
Wall time: 10.1 s


In [3]:
%%time
r2 = []
for i in range(-1000,1000):
    r2.append(i in iset)
print(r2[:10],r2[-10:])

[False, False, False, False, False, False, False, False, False, False] [True, True, True, True, True, True, True, True, True, True]
Wall time: 998 µs


# 字典Dict
- 字典和列表一样使用非常广泛
- 字典管理的是由key和value组成的成对的数据
- 和列表不同的是，字典用key来索引

In [19]:
tel = {'jack': 4098, 'sape': 4139}
tel

{'jack': 4098, 'sape': 4139}

In [21]:
tel['jack'], tel['sape']

(4098, 4139)

In [22]:
tel['jac']

KeyError: 'jac'

## 创建方法

In [32]:
dict([('sape', 4139), ('guido', 4127), ('jack', 4098)])

{'sape': 4139, 'guido': 4127, 'jack': 4098}

In [33]:
{x: x**2 for x in (2, 4, 6)}

{2: 4, 4: 16, 6: 36}

In [34]:
dict(sape=4139, guido=4127, jack=4098)

{'sape': 4139, 'guido': 4127, 'jack': 4098}

## key条件
key必须是不可变类型
- string
- 数字
- 元祖（只包含string、数值和元祖）

In [25]:
key_d = {1:'number','2':'string',(2,'56'):'tuple'}
key_d

{1: 'number', '2': 'string', (2, '56'): 'tuple'}

In [26]:
key_d = {1:'number','2':'string',[2,'56']:'tuple'}
key_d

TypeError: unhashable type: 'list'

## 字典里的‘=’
- 赋值
- 新建
>使用时必须注意

In [27]:
tel = {'jack': 4098, 'sape': 4139}

In [28]:
tel['jack'] = 4500
tel

{'jack': 4500, 'sape': 4139}

In [29]:
tel['poo'] = 4213
tel

{'jack': 4500, 'sape': 4139, 'poo': 4213}

## keys和values

In [35]:
tel.keys()

dict_keys(['jack', 'sape', 'poo'])

In [36]:
tel.values()

dict_values([4500, 4139, 4213])

## 遍历

In [40]:
for key in tel:
    print(key, tel[key])

jack 4500
sape 4139
poo 4213


In [45]:
for k, v in tel.items():
    print(k, v)

jack 4500
sape 4139
poo 4213


## 实际场景创建字典

In [48]:
data = [ ('a',2),
        ('b',2),
        ('c',7),
        ('a',3),
        ('b',6),
]

In [49]:
d = dict(data)
d

{'a': 3, 'b': 6, 'c': 7}

In [50]:
d = {}
for k,v in data:
    d[k] = v
d

{'a': 3, 'b': 6, 'c': 7}

In [51]:
d = {}
for k,v in data:
    if k not in d:
        d[k] = 0
    d[k] += v
d

{'a': 5, 'b': 8, 'c': 7}

In [52]:
d = {}
for k,v in data:
    if k not in d:
        d[k] = []
    d[k].append(v)
d

{'a': [2, 3], 'b': [2, 6], 'c': [7]}