In [1]:
import csv

with open("laptops.csv") as file:
    reader = csv.reader(file)
    rows = list(reader)
    header = rows[0]
    rows = rows[1:]
    
print(header)
print("\n")
print(rows[:4])

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']


[['6571244', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 2.3GHz', '8GB', '128GB SSD', 'Intel Iris Plus Graphics 640', 'macOS', '1.37kg', '1339'], ['7287764', 'Apple', 'Macbook Air', 'Ultrabook', '13.3', '1440x900', 'Intel Core i5 1.8GHz', '8GB', '128GB Flash Storage', 'Intel HD Graphics 6000', 'macOS', '1.34kg', '898'], ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', '575'], ['9722156', 'Apple', 'MacBook Pro', 'Ultrabook', '15.4', 'IPS Panel Retina Display 2880x1800', 'Intel Core i7 2.7GHz', '16GB', '512GB SSD', 'AMD Radeon Pro 455', 'macOS', '1.83kg', '2537']]


## Inventory Class

Below we will create an inventory class to organize the data processing into a consistent framework.

We will add functionality to our inventory class throughout this notebook

In [2]:
class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename

        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:
                row[-1] = int(row[-1])



In [3]:
inventory = Inventory("laptops.csv")

print(inventory.header)
print("\n")
print(len(inventory.rows))

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']


1303


## Add Laptop Id Lookup Function

Below we'll add a function as part of the inventory class to allow lookups by ID.

In [4]:
class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename

        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:
                row[-1] = int(row[-1])

    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
            




In [5]:
inventory = Inventory("laptops.csv")

print(inventory.get_laptop_from_id("3362737"))
print("\n")
print(inventory.get_laptop_from_id("3362736"))

['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', 575]


None


## Modifying get_laptop_from_id to increase speed

To speed up performance, we will proceprocess the data into a dictionary where the keys are the IDs and the values the rows.

This would greatly increase performance if our data set was millions of rows as currently using the for loop has time complexity of o(N).

In [27]:
class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename
        self.id_to_row = {}
        
        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:              
                row[-1] = int(row[-1])
            self.id_to_row = {}
            for row in self.rows:
                self.id_to_row[row[0]] = row 

    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
            
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None

In [28]:
inventory = Inventory("laptops.csv")

print(inventory.get_laptop_from_id_fast("3362737"))
print("\n")
print(inventory.get_laptop_from_id_fast("3362736"))

['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', 575]


None


## Measuring Performance

Below we'll measure the performance of the two different laptop lookup functions to see if our hypothesis is correct that the lookup with the dictionary should have a time complexity of o(1) whch is significantly faster than o(N), while accounting for increased memory usage of storing the data in the dictionary

In [29]:
import time
import random 

ids = [str(random.randint(1000000, 9999999) for _ in range(10000))]

In [36]:
inventory = Inventory("laptops.csv")

total_time_no_dict = 0
for id in ids:
    start = time.time()
    inventory.get_laptop_from_id(id)
    end = time.time()
    total_time_no_dict += (end - start)
    
total_time_dict = 0
for id in ids:
    start = time.time()
    inventory.get_laptop_from_id_fast(id)
    end = time.time()
    total_time_dict += (end - start)

print(total_time_no_dict)
print("\n")
print(total_time_dict)

0.00015020370483398438


2.6226043701171875e-06


## Performance Review

As we can see, for the two results:
* Without the dictionary took .15\*\* -3
* With the dictionary .26\*\* -5

Our performance is 5.7X times as fast with the dictionary (.00015 / .000026)

## Adding Promotion Function

In our hypothetial example - Sometimes, our store offers a promotion where given a gift card, a customer can use the gift to buy up to two laptops. To avoid having to keep track of what was already spent, the gift card has a single time usage. This means that, even if there is leftover money, it cannot be used anymore.

Whenver a gift card is issued, we want to make sure that there is at least one way to spend it in full


In [37]:
class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename
        self.id_to_row = {}
        
        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:              
                row[-1] = int(row[-1])
            self.id_to_row = {}
            for row in self.rows:
                self.id_to_row[row[0]] = row 

    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
            
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            if row[-1] == dollars:
                return True
        for n in self.rows:
            for j in self.rows:
                if n[-1] + j[-1] == dollars:
                    return True
        return False


In [38]:
inventory = Inventory("laptops.csv")

print(inventory.check_promotion_dollars(1000))
print("\n")
print(inventory.check_promotion_dollars(442))

True


False


## Modifying check_promotion_dollars to increase speed

To speed up performance, we will proceprocess all prices into a set, where the dollar amount submitted can be quickly checked to see if it exists in the set, versus double-looping to check for individual and pairs of laptops that meet the price.

This would greatly increase performance if our data set was millions of rows as currently using the for loop has time complexity of o(N\*\*2) versus O(1) of checking a set.

In [42]:
class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename
        self.id_to_row = {}
        self.prices = set()
        
        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:              
                row[-1] = int(row[-1])
            self.id_to_row = {}
            for row in self.rows:
                self.id_to_row[row[0]] = row 
            for row in self.rows:
                self.prices.add(row[-1])

    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
            
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            if row[-1] == dollars:
                return True
        for n in self.rows:
            for j in self.rows:
                if n[-1] + j[-1] == dollars:
                    return True
        return False

    def check_promotion_dollars_fast(self, dollars):
        if dollars in self.prices:
            return True
        for price in self.prices:
            if dollars - price in self.prices:
                return True
        return False


In [43]:
inventory = Inventory("laptops.csv")

print(inventory.check_promotion_dollars(1000))
print("\n")
print(inventory.check_promotion_dollars(442))

True


False


In [44]:
prices = [random.randint(100, 5000) for _ in range(100)]

inventory = Inventory("laptops.csv")

total_time_no_set = 0
total_time_set = 0

for price in prices:
    start = time.time()
    inventory.check_promotion_dollars(price)
    end = time.time()
    total_time_no_set += (end - start)
    
for price in prices:
    start = time.time()
    inventory.check_promotion_dollars_fast(price)
    end = time.time()
    total_time_set += (end - start)
    
print(total_time_no_set)
print("\n")
print(total_time_set)

1.415714979171753


0.0006127357482910156


## Performance Review

As we can see, for the two results:
* Without the set took 1.41 seconds
* With the set .6\*\* -3

Our performance is 235X times as fast with the dictionary (1.41 / .0006)

## Add Binary Search Algorithm For Price

We want to write a method that efficiently answers the query: Given a budget of D dollars, find all laptops whose price it at most D.



In [59]:
def row_price(row):
    return row[-1]

class Inventory():
    def __init__(self, csv_filename):
        self.csv_filename = csv_filename
        self.id_to_row = {}
        self.prices = set()
    
        with open(csv_filename) as file:
            reader = csv.reader(file)
            rows = list(reader)
            self.header = rows[0]
            self.rows = rows[1:]
            for row in self.rows:              
                row[-1] = int(row[-1])
            self.id_to_row = {}
            for row in self.rows:
                self.id_to_row[row[0]] = row 
            for row in self.rows:
                self.prices.add(row[-1])
            for row in self.rows:
                self.prices.add(int(row[-1]))
            self.rows_by_price = sorted(self.rows, key=row_price)

    def get_laptop_from_id(self, laptop_id):
        for row in self.rows:
            if row[0] == laptop_id:
                return row
        return None
            
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            if row[-1] == dollars:
                return True
        for n in self.rows:
            for j in self.rows:
                if n[-1] + j[-1] == dollars:
                    return True
        return False

    def check_promotion_dollars_fast(self, dollars):
        if dollars in self.prices:
            return True
        for price in self.prices:
            if dollars - price in self.prices:
                return True
        return False

    def find_first_laptop_more_expensive(self, target_price):
        range_start = 0                                   
        range_end = len(self.rows_by_price) - 1                       
        while range_start < range_end:
            range_middle = (range_end + range_start) // 2  
            price = self.rows_by_price[range_middle][-1]
            if price > target_price:
                range_end = range_middle
            else:
                range_start = range_middle + 1
        if self.rows_by_price[range_start][-1] <= target_price:
            return -1                                      
        return range_start


In [60]:
inventory = Inventory("laptops.csv")

print(inventory.find_first_laptop_more_expensive(1000))
print("\n")
print(inventory.find_first_laptop_more_expensive(10000))

683


-1
