## Remove duplicates from a list

In [1]:
test_list = ['hello', 'world', 'this', 'is', 'my', 'hello', 'world', 'list!']

### Remove duplicates without preserving order using set()

In [2]:
%%timeit
jumbled_list = list(set(test_list))

717 ns ± 24.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [3]:
jumbled_list = list(set(test_list))
print(jumbled_list)

['hello', 'is', 'world', 'list!', 'this', 'my']


### Preserve order using dict.fromkeys() (from [Raymond Hettinger's answer](https://stackoverflow.com/a/39835527))

In [4]:
%%timeit
preserved_order_list = list(dict.fromkeys(test_list))

925 ns ± 45 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [5]:
preserved_order_list = list(dict.fromkeys(test_list))
print(preserved_order_list)

['hello', 'world', 'this', 'is', 'my', 'list!']


## Set lookup is much faster than list lookup

In [6]:
million_list = [num for num in range(10**6)]
million_set = set(million_list)

In [7]:
import random

In [8]:
%%timeit -r10
random.randint(0, 2*10**6) in million_list

10.7 ms ± 378 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [9]:
%%timeit -r10
random.randint(0, 2*10**6) in million_set

1.8 µs ± 105 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)


## itertools.repeat() is faster than range() for running N loops

In [10]:
from itertools import repeat

In [11]:
%%timeit -r10
for _ in range(10**6): pass

31.2 ms ± 1.44 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [12]:
%%timeit -r10
for _ in repeat(None, 10**6): pass

12.6 ms ± 689 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


## Use map() to cast all elements to a type

In [13]:
str_list = ['1', '2', '3']
int_list = list(map(int, str_list))
print(int_list)

[1, 2, 3]


## collections.Counter() for getting number of times item is in a list

In [14]:
from collections import Counter

rand_nums_under_ten = [random.randint(0, 9) for _ in repeat(None, 10**3)]
rand_under_ten_counter = Counter(rand_nums_under_ten)
print("Num\tCount")
for num, count in rand_under_ten_counter.most_common():
    print(f"{num}\t{count}")

Num	Count
2	115
7	114
4	103
8	100
1	99
5	98
0	96
6	96
9	95
3	84


## collections.deque() for very quick left appends

In [15]:
from collections import deque

In [16]:
%%timeit
test_list = []
for _ in repeat(None, 10**4):
    test_list.insert(0, 1)

32.2 ms ± 5.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
%%timeit
test_deque = deque()
for _ in repeat(None, 10**4):
    test_deque.appendleft(1)

884 µs ± 39.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### lists and deques are comparable for right appends

In [18]:
%%timeit
test_list = []
for _ in repeat(None, 10**4):
    test_list.append(1)

969 µs ± 166 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [19]:
%%timeit
test_deque = deque()
for _ in repeat(None, 10**4):
    test_deque.append(1)

1.18 ms ± 252 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
