# Optimizing Code: Common Books
Here's the code your coworker wrote to find the common book ids in `books_published_last_two_years.txt` and `all_coding_books.txt` to obtain a list of recent coding books.

In [1]:
import time
import pandas as pd
import numpy as np

In [2]:
with open('books_published_last_two_years.txt') as f:
    recent_books = f.read().split('\n')
    
with open('all_coding_books.txt') as f:
    coding_books = f.read().split('\n')
    
print(f"size of books_published_last_two_years.txt: {len(recent_books)}")
print(f"size of all_coding_books.txt: {len(coding_books)}")

size of books_published_last_two_years.txt: 24159
size of all_coding_books.txt: 32250


In [3]:
start = time.time()
recent_coding_books = []

for book in recent_books:
    if book in coding_books:
        recent_coding_books.append(book)

print(len(recent_coding_books))
print('Duration: {} seconds'.format(time.time() - start))

96
Duration: 13.189358234405518 seconds


In [4]:
start = time.time()
recent_coding_books = []

for book in coding_books:
    if book in recent_books:
        recent_coding_books.append(book)

print(len(recent_coding_books))
print('Duration: {} seconds'.format(time.time() - start))

96
Duration: 15.141213655471802 seconds


### Tip #1: Use vector operations over loops when possible

Use numpy's `intersect1d` method to get the intersection of the `recent_books` and `coding_books` arrays.

In [7]:
start = time.time()
recent_coding_books =  np.intersect1d(recent_books, coding_books) # TODO: compute intersection of lists
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

96
Duration : 0.035904645919799805 seconds


In [8]:
start = time.time()
recent_coding_books =  np.intersect1d(coding_books, recent_books) # TODO: compute intersection of lists
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

96
Duration : 0.0358738899230957 seconds


### Tip #2: Know your data structures and which methods are faster
Use the set's `intersection` method to get the common elements in `recent_books` and `coding_books`.

In [9]:
start = time.time()
recent_coding_books =  list(set.intersection(set(recent_books), set(coding_books)))
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

96
Duration : 0.009975671768188477 seconds


In [13]:
start = time.time()
recent_coding_books =  set(recent_books).intersection(coding_books)
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

96
Duration : 0.005983591079711914 seconds


In [11]:
start = time.time()
recent_coding_books =  [set.intersection(set(recent_books), set(coding_books))]
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

1
Duration : 0.005983114242553711 seconds


In [12]:
start = time.time()
recent_coding_books =  [set.intersection(set(coding_books), set(recent_books))]
print(len(recent_coding_books))
print('Duration : {} seconds'.format(time.time() - start))

1
Duration : 0.007979631423950195 seconds
