<a href="https://colab.research.google.com/github/shah-zeb-naveed/data-science-notes/blob/main/interview_systems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile lru_cache.py
import sys
import requests
from collections import OrderedDict

class ImageCache:
    def __init__(self, max_size_bytes):
        self.max_size = max_size_bytes
        self.cache = OrderedDict()
        self.current_size = 0

    def get(self, url):
        if url in self.cache:
            # Move to end to mark as recently used
            self.cache.move_to_end(url)
            return f"Cache Hit: {url}"
        else:
            try:
                response = requests.get(url)
                response.raise_for_status()
                image_bytes = response.content
                image_size = len(image_bytes)

                # If image is too big to cache, skip caching
                if image_size > self.max_size:
                    return f"Too big. Downloaded (Not Cached): {url}"

                # Evict least recently used items until there's space
                while self.current_size + image_size > self.max_size and self.cache:
                    _, old_bytes = self.cache.popitem(last=False) # first added
                    self.current_size -= len(old_bytes)

                self.cache[url] = image_bytes
                self.current_size += image_size
                return f"Downloaded and Cached: {url}"
            except requests.exceptions.RequestException as e:
                return f"Error downloading image: {e}"

def main():
    if len(sys.argv) < 2:
        print("Usage: python script.py <input_file>")
        sys.exit(1)

    # read fome file
    # input_file = sys.argv[1]
    # with open(input_file, 'r') as f:
    #     lines = f.read().splitlines()

    # max_size = int(lines[0])
    # num_urls = int(lines[1])
    # urls = lines[2:]

    # read from command line
    max_size = int(sys.argv[1])
    num_urls = int(sys.argv[2])
    urls = sys.argv[3:]

    cache = ImageCache(max_size)

    for url in urls:
        result = cache.get(url)
        print(result)

if __name__ == "__main__":
    main()

Overwriting lru_cache.py


In [None]:
!python lru_cache.py 10000 3 https://placehold.co/600x400.png https://placehold.co/600x400.png https://placehold.co/600x400.png

Downloaded and Cached: https://placehold.co/600x400.png
Cache Hit: https://placehold.co/600x400.png
Cache Hit: https://placehold.co/600x400.png


In [None]:
min({2:'A',1:'B'})

1

In [None]:
%%writefile advanced_lru_cache.py
import os
import sys
import requests
#import threading
import time
import pickle
from collections import OrderedDict, defaultdict, deque
from urllib.parse import urlparse
from io import BytesIO
from PIL import Image

class CacheItem:
    def __init__(self, url, content, size, timestamp, priority=0):
        self.url = url
        self.content = content
        self.size = size
        self.last_access = timestamp
        self.access_count = 1
        self.insert_time = timestamp
        self.priority = priority
        self.ttl_expiry = timestamp + 300  # default TTL of 5 minutes

class ImageCache:
    def __init__(self, max_size_bytes, eviction_policy="LRU", max_item_size=10_000_000, bandwidth_limit=5, snapshot_file="cache_snapshot.pkl"):
        self.max_size = max_size_bytes
        self.max_item_size = max_item_size

        self.size = 0
#        self.lock = threading.RLock()
        self.eviction_policy = eviction_policy

        self.bandwidth_limit = bandwidth_limit  # max downloads per minute
        self.snapshot_file = snapshot_file

        self.cache = OrderedDict()
        self.download_timestamps = deque()
        self.load_snapshot()

    def save_snapshot(self):
        with open(self.snapshot_file, 'wb') as f:
            pickle.dump(self.cache, f)

    def load_snapshot(self):
        if os.path.exists(self.snapshot_file):
            with open(self.snapshot_file, 'rb') as f:
                self.cache = pickle.load(f)
                self.size = sum(item.size for item in self.cache.values())

    def _evict(self):
        while self.size > self.max_size:
            if self.eviction_policy == "LRU":
                url, item = self.cache.popitem(last=False)
            if self.eviction_policy == "LFU":
                url = min(self.cache, key=lambda k: self.cache[k].access_count)
                item = self.cache.pop(url)
            elif self.eviction_policy == "FIFO":
                url = next(iter(self.cache))
                item = self.cache.pop(url)
            elif self.eviction_policy == "PRIORITY":
                url = min(self.cache, key=lambda k: self.cache[k].priority)
                item = self.cache.pop(url)
            else:
                # default
                url, item = self.cache.popitem(last=False)

            self.size -= item.size

    def _is_bandwidth_limited(self):
        now = time.time()
        # remove timestamps older than 1 minute
        while self.download_timestamps and now - self.download_timestamps[0] > 60:
            self.download_timestamps.popleft()
        return len(self.download_timestamps) >= self.bandwidth_limit

    def _download(self, url):
        if self._is_bandwidth_limited():
            return f"Bandwidth limit exceeded. Try again later."

        try:
            response = requests.get(url, stream=True)
            response.raise_for_status()
            content = response.content
            if len(content) > self.max_item_size:
                return f"Image too large to cache: {url}"
            self.download_timestamps.append(time.time())
            return content
        except requests.exceptions.RequestException as e:
            return f"Error downloading image: {e}"

    def get(self, url):
        #with self.lock:
          now = time.time()

          # Clean expired items
          # Eviction is a separate process than this
          expired = [key for key, item in self.cache.items() if now > item.ttl_expiry]
          for key in expired:
              self.size -= self.cache[key].size
              del self.cache[key]

          if url in self.cache:
              item = self.cache[url]
              item.last_access = now
              item.access_count += 1
              # Only move to end if we're using LRU policy
              if self.eviction_policy == "LRU":
                  self.cache.move_to_end(url)
              return f"CACHE HIT: {url}"

          result = self._download(url)
          if isinstance(result, str):
              return result  # error or bandwidth message

          image_size = len(result)
          if image_size > self.max_size:
              return f"Image exceeds total cache size: {url}"

          while self.size + image_size > self.max_size:
              self._evict()

          new_item = CacheItem(url, result, image_size, now)
          self.cache[url] = new_item
          self.size += image_size
          return f"DOWNLOADED: {url}"

    def prefetch(self, urls):
        for url in urls:
            #threading.Thread(target=self.get, args=(url,)).start()
            self.get(url)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python image_cache.py <input_file>")
        sys.exit(1)

    input_file = sys.argv[1]

    with open(input_file, 'r') as f:
        max_cache_size = int(f.readline().strip())
        n = int(f.readline().strip())
        urls = [f.readline().strip() for _ in range(n)]

    cache = ImageCache(max_cache_size, eviction_policy="LFU")
    for url in urls:
        print(cache.get(url))

    # Example of prefetching (next likely images)
    cache.prefetch(["https://placehold.co/100x100.png"])

    # Save snapshot at the end
    cache.save_snapshot()

Overwriting advanced_lru_cache.py


In [None]:
%%writefile input.txt
10000
3
https://placehold.co/600x400.png
https://placehold.co/600x400.png
https://placehold.co/150x150.png

Overwriting input.txt


In [None]:
!python advanced_lru_cache.py input.txt

Error downloading image: 404 Client Error: Not Found for url: https://placehold.co/600x400.png%20https://placehold.co/600x400.png%20https://placehold.co/150x150.png
Error downloading image: Invalid URL '': No scheme supplied. Perhaps you meant https://?
Error downloading image: Invalid URL '': No scheme supplied. Perhaps you meant https://?
Exception ignored in: <module 'threading' from '/usr/lib/python3.11/threading.py'>
Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1590, in _shutdown
    lock.acquire()
KeyboardInterrupt: 


In [None]:
%%writefile word_counter.py

import sys
from collections import defaultdict

def word_count(file_path):
    counts = defaultdict(int)
    with open(file_path, 'r') as f:
        for line in f:
            for word in line.strip().split():
                counts[word] += 1

    for word in sorted(counts):
        print(f"{word} {counts[word]}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python word_counter.py <input_file>")
        sys.exit(1)

    word_count(sys.argv[1])

Writing word_counter.py


In [None]:
%%writefile input.txt
apple banana apple
orange apple banana

Writing input.txt


In [None]:
!python word_counter.py input.txt

apple 3
banana 2
orange 1


In [None]:
# prompt: create a short snippet that writes into csv file and then reads it back

import csv

def write_and_read_csv(filename, data):
    # Write data to CSV file
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)

def csv_reader(csvfile):
    # Read data from CSV file
    with open(filename, 'r') as csvfile:
        reader = csv.reader(csvfile)
        read_data = list(reader)

    return read_data


# Example usage
data = [
    ["Name", "Age", "City"],
    ["Alice", "25", "New York"],
    ["Bob", "30", "Los Angeles"],
    ["Charlie", "28", "Chicago"]
]

filename = "example.csv"
read_data = write_and_read_csv(filename, data)
print(csv_reader(filename))

[['Name', 'Age', 'City'], ['Alice', '25', 'New York'], ['Bob', '30', 'Los Angeles'], ['Charlie', '28', 'Chicago']]
