## Remove duplicates from a list

In [1]:
test_list = ['hello', 'world', 'this', 'is', 'my', 'hello', 'world', 'list!']

### Remove duplicates without preserving order using set()

In [2]:
%%timeit
jumbled_list = list(set(test_list))

2.14 µs ± 738 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [3]:
jumbled_list = list(set(test_list))
print(jumbled_list)

['hello', 'list!', 'this', 'is', 'world', 'my']


### Preserve order using dict.fromkeys() (from [Raymond Hettinger's answer](https://stackoverflow.com/a/39835527))

In [4]:
%%timeit
preserved_order_list = list(dict.fromkeys(test_list))

3 µs ± 444 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
preserved_order_list = list(dict.fromkeys(test_list))
print(preserved_order_list)

['hello', 'world', 'this', 'is', 'my', 'list!']


## Set lookup is much faster than list lookup

In [6]:
million_list = [num for num in range(10**6)]
million_set = set(million_list)

In [7]:
import random

In [8]:
%%timeit -r10
random.randint(0, 2*10**6) in million_list

18.6 ms ± 6.55 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [9]:
%%timeit -r10
random.randint(0, 2*10**6) in million_set

4.24 µs ± 1.56 µs per loop (mean ± std. dev. of 10 runs, 100000 loops each)


## itertools.repeat() is faster than range() for running N loops

In [10]:
from itertools import repeat

In [11]:
%%timeit -r10
for _ in range(10**6): pass

42.5 ms ± 7.88 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [12]:
%%timeit -r10
for _ in repeat(None, 10**6): pass

18.2 ms ± 6.43 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


## Use map() to cast all elements to a type

In [13]:
str_list = ['1', '2', '3']
int_list = list(map(int, str_list))
print(int_list)

[1, 2, 3]


## collections.Counter() for getting number of times item is in a list

In [14]:
from collections import Counter

rand_nums_under_ten = [random.randint(0, 9) for _ in repeat(None, 10**3)]
rand_under_ten_counter = Counter(rand_nums_under_ten)
print("Num\tCount")
for num, count in rand_under_ten_counter.most_common():
    print(f"{num}\t{count}")

Num	Count
4	105
5	105
2	105
6	101
9	101
0	99
1	99
8	98
7	97
3	90
