In [1]:
usuarios_data_science = [15, 23, 43, 56]
usuarios_machine_learning = [13, 23, 56, 42] # existem usuários em comum nas duas listas

In [3]:
assistiram = usuarios_data_science.copy()
assistiram.extend(usuarios_machine_learning)
assistiram # vão ter elementos repetidos

[15, 23, 43, 56, 13, 23, 56, 42]

In [4]:
len(assistiram)

8

In [5]:
set(assistiram) # não possuem elementos repetidos

{13, 15, 23, 42, 43, 56}

In [23]:
set([1,2,3,1]) # set ou {} são CONJUNTOS. Nota: Conjuntos são mutáveis

{1, 2, 3}

In [10]:
{4,1,2,3,1}

{1, 2, 3, 4}

In [12]:
usuarios_data_science = {15, 23, 43, 56}
usuarios_machine_learning = {13, 23, 56, 42}

In [13]:
usuarios_machine_learning[3]

TypeError: 'set' object is not subscriptable

In [15]:
for usuario in set(assistiram):
    print(usuario)

42
43
13
15
23
56


In [16]:
usuarios_data_science | usuarios_machine_learning

{13, 15, 23, 42, 43, 56}

In [17]:
usuarios_data_science & usuarios_machine_learning

{23, 56}

In [18]:
usuarios_data_science - usuarios_machine_learning

{15, 43}

In [19]:
fez_ds_mas_nao_fez_ml = usuarios_data_science - usuarios_machine_learning
15 in fez_ds_mas_nao_fez_ml

True

In [20]:
23 in fez_ds_mas_nao_fez_ml

False

In [21]:
usuarios_data_science ^ usuarios_machine_learning # XOR

{13, 15, 42, 43}

In [22]:
usuarios = {1,5,76,34,52,13,17}
len(usuarios)

7

In [24]:
usuarios.add(13)
len(usuarios)

7

In [25]:
usuarios.add(765)
len(usuarios)

8

In [26]:
# Se quisermos tornar um conjunto imutável:
usuarios = frozenset(usuarios)
usuarios

frozenset({1, 5, 13, 17, 34, 52, 76, 765})

In [27]:
type(usuarios)

frozenset

In [28]:
usuarios.add(134)

AttributeError: 'frozenset' object has no attribute 'add'

In [31]:
meu_texto = "a b c d e a a a a a a a a"
meu_texto.split()

['a', 'b', 'c', 'd', 'e', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']

In [32]:
set(meu_texto.split())

{'a', 'b', 'c', 'd', 'e'}

# Dicionários

In [34]:
aparicoes = {
    "Matheus" : 1,
    "cachorro": 2,
    "nome": 2,
    "vindo": 1    
}

In [35]:
type(aparicoes)

dict

In [36]:
aparicoes["Matheus"]

1

In [37]:
aparicoes["cachorro"]

2

In [38]:
aparicoes["xpto"]

KeyError: 'xpto'

In [39]:
aparicoes.get("xpto", 0)

0

In [40]:
aparicoes.get("cachorro", 0)

2

In [41]:
aparicoes = dict(Guilherme = 2, cachorro = 1)
aparicoes

{'Guilherme': 2, 'cachorro': 1}

In [46]:
aparicoes["Carlos"] = 1
aparicoes

{'Guilherme': 2, 'cachorro': 1, 'Carlos': 1}

In [47]:
aparicoes["Carlos"] = 2
aparicoes

{'Guilherme': 2, 'cachorro': 1, 'Carlos': 2}

In [48]:
del aparicoes["Carlos"]
aparicoes

{'Guilherme': 2, 'cachorro': 1}

In [49]:
"cachorro" in aparicoes

True

In [50]:
"Carlos" in aparicoes

False

In [53]:
for elemento in aparicoes:
    print(elemento, ":", aparicoes[elemento])

Guilherme : 2
cachorro : 1


In [54]:
for elemento in aparicoes.values():
    print(elemento)

2
1


In [55]:
for elemento in aparicoes.items():
    print(elemento)

('Guilherme', 2)
('cachorro', 1)


In [57]:
["palavra {}".format(chave) for chave in aparicoes.keys()]

['palavra Guilherme', 'palavra cachorro']

In [63]:
meu_texto = "Bem vindo meu nome é Matheus e meu sobrenome é Cortez"
meu_texto = meu_texto.lower()
meu_texto.split()

aparicoes = {}

for palavra in meu_texto.split():
    ate_agora = aparicoes.get(palavra, 0)
    aparicoes[palavra] = ate_agora + 1
    
aparicoes

{'bem': 1,
 'vindo': 1,
 'meu': 2,
 'nome': 1,
 'é': 2,
 'matheus': 1,
 'e': 1,
 'sobrenome': 1,
 'cortez': 1}

In [67]:
from collections import defaultdict

aparicoes = defaultdict(int) # defaultdict chama uma função que retorna 0

for palavra in meu_texto.split():
    ate_agora = aparicoes[palavra]
    aparicoes[palavra] = ate_agora + 1
    
aparicoes

defaultdict(int,
            {'bem': 1,
             'vindo': 1,
             'meu': 2,
             'nome': 1,
             'é': 2,
             'matheus': 1,
             'e': 1,
             'sobrenome': 1,
             'cortez': 1})

In [68]:
dicionario = defaultdict(int)
dicionario['guilherme']

0

In [69]:
dicionario['guilherme'] = 15
dicionario['guilherme']

15

In [70]:
aparicoes = defaultdict(int)

for palavra in meu_texto.split():
    aparicoes[palavra] += 1
    
aparicoes

defaultdict(int,
            {'bem': 1,
             'vindo': 1,
             'meu': 2,
             'nome': 1,
             'é': 2,
             'matheus': 1,
             'e': 1,
             'sobrenome': 1,
             'cortez': 1})

In [76]:
class Conta:
    def __init__(self):
        print("Criando uma conta")

In [77]:
contas = defaultdict(Conta)
contas[15]

Criando uma conta


<__main__.Conta at 0x7f23ec83b7f0>

In [78]:
contas[17]

Criando uma conta


<__main__.Conta at 0x7f23ecc95b80>

In [79]:
contas[15]

<__main__.Conta at 0x7f23ec83b7f0>

In [81]:
from collections import Counter

aparicoes = Counter(meu_texto.split())

aparicoes

Counter({'bem': 1,
         'vindo': 1,
         'meu': 2,
         'nome': 1,
         'é': 2,
         'matheus': 1,
         'e': 1,
         'sobrenome': 1,
         'cortez': 1})

# Testando o uso de diversas coleções

In [104]:
texto1 = """
Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum."""
texto2 = """
Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.

The standard chunk of Lorem Ipsum used since the 1500s is reproduced below for those interested. Sections 1.10.32 and 1.10.33 from "de Finibus Bonorum et Malorum" by Cicero are also reproduced in their exact original form, accompanied by English versions from the 1914 translation by H. Rackham."""

texto3 = """
It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
"""

texto4 = """
There are many variations of passages of Lorem Ipsum available, but the majority have suffered alteration in some form, by injected humour, or randomised words which don't look even slightly believable. If you are going to use a passage of Lorem Ipsum, you need to be sure there isn't anything embarrassing hidden in the middle of text. All the Lorem Ipsum generators on the Internet tend to repeat predefined chunks as necessary, making this the first true generator on the Internet. It uses a dictionary of over 200 Latin words, combined with a handful of model sentence structures, to generate Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is therefore always free from repetition, injected humour, or non-characteristic words etc.
"""

In [105]:
def analisa_frequencia_de_letras(texto):
    aparicoes = Counter(texto.lower())
    total_de_caracteres = sum(aparicoes.values())
    proporcoes = [(letra, frequencia / total_de_caracteres) for letra, frequencia in aparicoes.items()]
    proporcoes = Counter(dict(proporcoes))
    mais_comuns = proporcoes.most_common(10)
    for caractere, proporcao in mais_comuns:
        print("{} => {:.2f}%".format(caractere, proporcao*100))

In [106]:
analisa_frequencia_de_letras(texto1)

  => 15.65%
e => 10.26%
t => 7.48%
s => 6.78%
i => 6.61%
n => 6.61%
a => 5.04%
o => 4.35%
r => 4.17%
l => 3.83%


In [108]:
analisa_frequencia_de_letras(texto3)

  => 16.75%
e => 10.24%
t => 6.99%
i => 6.34%
s => 6.18%
a => 6.18%
o => 6.18%
n => 5.04%
r => 4.39%
l => 4.23%
