<a href="https://colab.research.google.com/github/jsphelps12/CS452-AISQL/blob/main/embed/VectorDB_Lab_CS452_(starter).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Download datasets from kaggle

import json
import os

if not os.path.exists("lex-fridman-text-embedding-3-large-128.zip"):
  kaggle_json = {"username": "michaeltreynolds","key": "149701be742f30a8a0526762c61beea0"}
  kaggle_dir = os.path.join(os.path.expanduser("~"), ".kaggle")
  os.makedirs(kaggle_dir, exist_ok=True)
  kaggle_config_path = os.path.join(kaggle_dir, "kaggle.json")
  with open(kaggle_config_path, 'w') as f:
    json.dump(kaggle_json, f)

  !kaggle datasets download -d michaeltreynolds/lex-fridman-text-embedding-3-large-128


In [2]:
# Unzip kaggle data

!unzip lex-fridman-text-embedding-3-large-128.zip
!unzip lex-fridman-text-embedding-3-large-128/*.zip


Archive:  lex-fridman-text-embedding-3-large-128.zip
replace documents/documents/batch_request_0lw3vrQqdWbdBRurTGNMHU76.jsonl? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
unzip:  cannot find or open lex-fridman-text-embedding-3-large-128/*.zip, lex-fridman-text-embedding-3-large-128/*.zip.zip or lex-fridman-text-embedding-3-large-128/*.zip.ZIP.

No zipfiles found.


In [3]:
# Use specific libraries
!pip install datasets==2.20.0 psycopg2==2.9.9 pgcopy==1.6.0
import psycopg2



In [4]:
# Get your own trial account at timescaledb and paste your own connection string

#TODO
CONNECTION = "postgres://tsdbadmin:c5rwkl6n2bodpp3a@akjq0ow9n3.fxozb0sk8r.tsdb.cloud.timescale.com:35812/tsdb?sslmode=require"

In [5]:
# Use this if you want to start over on your postgres table!

DROP_TABLE = "DROP TABLE IF EXISTS podcast, segment"
with psycopg2.connect(CONNECTION) as conn:
    cursor = conn.cursor()
    cursor.execute(DROP_TABLE)
    conn.commit() # Commit the changes


In [6]:
# Useful function that takes a pd.DataFrame and copies it directly into a table.

import pandas as pd
import io
import psycopg2

from typing import List

def fast_pg_insert(df: pd.DataFrame, connection: str, table_name: str, columns: List[str]) -> None:
    """
        Inserts data from a pandas DataFrame into a PostgreSQL table using the COPY command for fast insertion.

        Parameters:
        df (pd.DataFrame): The DataFrame containing the data to be inserted.
        connection (str): The connection string to the PostgreSQL database.
        table_name (str): The name of the target table in the PostgreSQL database.
        columns (List[str]): A list of column names in the target table that correspond to the DataFrame columns.

        Returns:
        None
    """
    conn = psycopg2.connect(connection)
    _buffer = io.StringIO()
    df.to_csv(_buffer, sep=";", index=False, header=False)
    _buffer.seek(0)
    with conn.cursor() as c:
        c.copy_from(
            file=_buffer,
            table=table_name,
            sep=";",
            columns=columns,
            null=''
        )
    conn.commit()
    conn.close()

Database Schema
We will create a database with two tables: podcast and segment:

**podcast**

- PK: id
 - The unique podcast id found in the huggingface data (i,e., TRdL6ZzWBS0  is the ID for Jed Buchwald: Isaac Newton and the Philosophy of Science | Lex Fridman Podcast #214)
- title
 - The title of podcast (ie., Jed Buchwald: Isaac Newton and the Philosophy of Science | Lex Fridman Podcast #214)

**segment**

- PK: id
 - the unique identifier for the podcast segment. This was created by concatenating the podcast idx and the segment index together (ie., "0;1") is the 0th podcast and the 1st segment
This is present in the as the "custom_id" field in the `embedding.jsonl` and batch_request.jsonl files
- start_time
 - The start timestamp of the segment
- end_time
 - The end timestamp of the segment
- content
 - The raw text transcription of the podcast
- embedding
 - the 128 dimensional vector representation of the text
- FK: podcast_id
 - foreign key to podcast.id

In [7]:
# Sample document:
# {
#   "custom_id": "89:115",
#   "url": "/v1/embeddings",
#   "method": "POST",
#   "body": {
#     "input": " have been possible without these approaches?",
#     "model": "text-embedding-3-large",
#     "dimensions": 128,
#     "metadata": {
#       "title": "Podcast: Boris Sofman: Waymo, Cozmo, Self-Driving Cars, and the Future of Robotics | Lex Fridman Podcast #241",
#       "podcast_id": "U_AREIyd0Fc",
#       "start_time": 484.52,
#       "stop_time": 487.08
#     }
#   }
# }

# Sample embedding:
# {
#   "id": "batch_req_QZBmHS7FBiVABxcsGiDx2THJ",
#   "custom_id": "89:115",
#   "response": {
#     "status_code": 200,
#     "request_id": "7a55eba082c70aca9e7872d2b694f095",
#     "body": {
#       "object": "list",
#       "data": [
#         {
#           "object": "embedding",
#           "index": 0,
#           "embedding": [
#             0.0035960325,
#             126 more lines....
#             -0.093248844
#           ]
#         }
#       ],
#       "model": "text-embedding-3-large",
#       "usage": {
#         "prompt_tokens": 7,
#         "total_tokens": 7
#       }
#     }
#   },
#   "error": null
# }

In [8]:
# Create table statements that you'll write
#TODO


# may need to run this to enable vector data type if you didn't select AI in service
# CREATE_EXTENSION = "CREATE EXTENSION vector"

# TODO: Add create table statement
CREATE_PODCAST_TABLE = """
CREATE TABLE podcast (
    id VARCHAR(255) PRIMARY KEY,
    title VARCHAR(255)
);
"""

# TODO: Add create table statement
CREATE_SEGMENT_TABLE = """
CREATE TABLE segment (
    id VARCHAR(255) PRIMARY KEY,
    start_time FLOAT,
    end_time FLOAT,
    content TEXT,
    embedding VECTOR(128),
    podcast_id VARCHAR(255),
    CONSTRAINT fk_podcast
        FOREIGN KEY(podcast_id)
        REFERENCES podcast(id)
);
"""

conn = psycopg2.connect(CONNECTION)
# TODO: Create tables with psycopg2 (example: https://www.geeksforgeeks.org/executing-sql-query-with-psycopg2-in-python/)

cursor = conn.cursor()
cursor.execute(CREATE_PODCAST_TABLE)
cursor.execute(CREATE_SEGMENT_TABLE)

conn.commit()
conn.close()

In [9]:
## Extract needed data out of JSONL files. This may be the hard part!

# TODO: What data do we need?
# TODO: What data is in the documents jsonl files?
# TODO: What data is in the embedding jsonl files?
# TODO: Get some pandas data frames for our two tables so we can copy the data in!



In [10]:
import json
import glob

podcast_data = []
segment_data = []

# Process document files for podcast and segment data (content, start_time, end_time)
document_files = glob.glob("documents/documents/*.jsonl")
for file_path in document_files:
    with open(file_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            metadata = data.get("body", {}).get("metadata", {})
            podcast_id = metadata.get("podcast_id")
            title = metadata.get("title")
            custom_id = data.get("custom_id")
            start_time = metadata.get("start_time")
            stop_time = metadata.get("stop_time")
            content = data.get("body", {}).get("input")

            if podcast_id and title:
                # Collect unique podcasts
                podcast_data.append({"id": podcast_id, "title": title})

            if custom_id and start_time is not None and stop_time is not None and content is not None and podcast_id:
                 # Collect segment data (excluding embedding for now)
                segment_data.append({
                    "id": custom_id,
                    "start_time": start_time,
                    "end_time": stop_time,
                    "content": content,
                    "podcast_id": podcast_id
                })

# Process embedding files for embedding data
embedding_files = glob.glob("embedding/embedding/*.jsonl")
embedding_map = {} # To store embeddings by custom_id

for file_path in embedding_files:
    with open(file_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            custom_id = data.get("custom_id")
            embedding = data.get("response", {}).get("body", {}).get("data", [{}])[0].get("embedding")

            if custom_id and embedding:
                embedding_map[custom_id] = embedding

# Add embeddings to segment data
for segment in segment_data:
    segment["embedding"] = embedding_map.get(segment["id"])

# Remove segments for which no embedding was found
segment_data = [segment for segment in segment_data if segment["embedding"] is not None]

print(f"Extracted {len(podcast_data)} potential podcasts and {len(segment_data)} segments with embeddings.")

Extracted 832839 potential podcasts and 832839 segments with embeddings.


In [11]:
print(podcast_data[0])
print(segment_data[0])

{'id': 'tueAcSiiqYA', 'title': 'Podcast: Jordan Ellenberg: Mathematics of High-Dimensional Shapes and Geometries | Lex Fridman Podcast #190'}
{'id': '311:634', 'start_time': 2195.76, 'end_time': 2197.52, 'content': ' So all of those are intrinsic things, right?', 'podcast_id': 'tueAcSiiqYA', 'embedding': [-0.0012927473, -0.0044831056, -0.11136264, 0.10946503, -0.0587175, -0.13077249, -0.06300068, -0.10962769, 0.02233759, -0.17165245, -0.017092051, 0.1895442, 0.077693604, -0.0104233045, 0.07606708, 0.10463968, -0.08994675, 0.0010589346, -0.09992276, -0.015858604, 0.04166611, -0.077314086, -0.16167644, -0.008925548, -0.08219366, -0.050178252, 0.040581763, -0.09200701, -0.14410998, -0.03467206, 0.05489517, -0.02193096, 0.055952407, 0.031202143, -0.0739526, -0.0931998, 0.018108629, -0.021944514, 0.11678439, 0.050042707, -0.0067941244, -0.06961521, 0.027623791, 0.07341043, -0.12263987, 0.09894685, -0.09629019, 0.022703558, 0.061591025, 0.0005209958, 0.025292441, 0.033533495, 0.112989165, 0.

In [12]:
import pandas as pd

podcast_df = pd.DataFrame(podcast_data)
podcast_df = podcast_df.drop_duplicates(subset=['id'])
# display(podcast_df.head())

In [13]:
segment_df = pd.DataFrame(segment_data)
# display(segment_df.head())

In [14]:
#### Optional #####
# In addition to the embedding and document files you might like to load
# the full podcast raw data via the hugging face datasets library

# from datasets import load_dataset
# ds = load_dataset("Whispering-GPT/lex-fridman-podcast")


In [15]:
# TODO Copy all the "podcast" data into the podcast postgres table!

fast_pg_insert(podcast_df, CONNECTION, "podcast", ["id", "title"])

In [16]:
# TODO Copy all the "segment" data into the segment postgres table!
# HINT 1: use the recommender.utils.fast_pg_insert function to insert data into the database
# otherwise inserting the 800k documents will take a very, very long time
# HINT 2: if you don't want to use all your memory and crash
# colab, you'll need to either send the data up in chunks
# or write your own function for copying it up. Alternative to chunking maybe start
# with writing it to a CSV and then copy it up?

import io
import tempfile
import os

conn = psycopg2.connect(CONNECTION)

# Write DataFrame to a temporary CSV file with manual formatting for embedding
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".csv") as tmp_file:
    for index, row in segment_df.iterrows():
        # Ensure the embedding is treated as a list of floats before joining
        embedding_list = row['embedding']
        if isinstance(embedding_list, str):
             # Attempt to parse the string representation if it's already a string
             try:
                 embedding_list = json.loads(embedding_list)
             except json.JSONDecodeError:
                 # Handle cases where the string format is unexpected
                 print(f"Warning: Could not parse embedding string for segment {row['id']}: {embedding_list}")
                 embedding_list = [] # Or handle as appropriate

        # Manually format the embedding as a string representation of a list
        embedding_str = '[' + ','.join(map(str, embedding_list)) + ']'
        # Create a list of values for the row, including the formatted embedding
        row_values = [str(row['id']), str(row['start_time']), str(row['end_time']), str(row['content']).replace(';', '\\;'), embedding_str, str(row['podcast_id'])]
        # Join the values with the separator and write to the file
        tmp_file.write(';'.join(row_values) + '\n')
    tmp_file_path = tmp_file.name

# Copy data from the temporary CSV file into the database
try:
    with conn.cursor() as c:
        with open(tmp_file_path, 'r') as f:
            c.copy_from(
                file=f,
                table="segment",
                sep=";",
                columns=["id", "start_time", "end_time", "content", "embedding", "podcast_id"],
                null=''
            )
    conn.commit()
except Exception as e:
    conn.rollback()
    print(f"Error during copy: {e}")
finally:
    conn.close()
    # Clean up the temporary file
    os.remove(tmp_file_path)

In [17]:
## This script is used to query the database
import os
import psycopg2


# Write your queries
# Q1) What are the five most similar segments to segment "267:476"
# Input: "that if we were to meet alien life at some point"
# For each result return the podcast name, the segment id, segment raw text,  the start time, stop time, and embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()
cur.execute("""
SELECT
    p.title,
    s.id,
    s.content,
    s.start_time,
    s.end_time,
    s.embedding <-> (SELECT embedding FROM segment WHERE id = '267:476') AS distance
FROM
    segment s
JOIN
    podcast p ON s.podcast_id = p.id
ORDER BY
    distance
LIMIT 5;
""")
for row in cur.fetchall():
  print(row)

conn.commit()
conn.close()

('Podcast: David Silver: AlphaGo, AlphaZero, and Deep Reinforcement Learning | Lex Fridman Podcast #86', '267:476', ' that if we were to meet alien life at some point', 1336.32, 1339.4399999999998, 0.0)
('Podcast: Ryan Graves: UFOs, Fighter Jets, and Aliens | Lex Fridman Podcast #308', '113:2792', ' encounters, human beings, if we were to meet another alien', 6725.62, 6729.86, 0.6483450674336982)
('Podcast: Richard Dawkins: Evolution, Intelligence, Simulation, and Memes | Lex Fridman Podcast #87', '268:1019', ' Suppose we did meet an alien from outer space', 2900.04, 2903.0800000000004, 0.655810731375558)
('Podcast: Jeffrey Shainline: Neuromorphic Computing and Optoelectronic Intelligence | Lex Fridman Podcast #225', '305:3600', ' but if we think of alien civilizations out there', 9479.960000000001, 9484.04, 0.6595433341200092)
('Podcast: Michio Kaku: Future of Humans, Aliens, Space Travel & Physics | Lex Fridman Podcast #45', '18:464', ' So I think when we meet alien life from outer s

In [23]:
# Q2) What are the five most dissimilar segments to segment "267:476"
# Input: "that if we were to meet alien life at some point"
# For each result return the podcast name, the segment id, segment raw text, the start time, stop time, and embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()
cur.execute("""
SELECT
    p.title,
    s.id,
    s.content,
    s.start_time,
    s.end_time,
    s.embedding <-> (SELECT embedding FROM segment WHERE id = '267:476') AS distance
FROM
    segment s
JOIN
    podcast p ON s.podcast_id = p.id
ORDER BY
    distance DESC
LIMIT 5;
""")
print("Q2 Results:")
for row in cur.fetchall():
  print(row)

conn.commit()
conn.close()

Q2 Results:
('Podcast: Jason Calacanis: Startups, Angel Investing, Capitalism, and Friendship | Lex Fridman Podcast #161', '119:218', ' a 73 Mustang Grande in gold?', 519.96, 523.8000000000001, 1.6157687685840119)
('Podcast: Rana el Kaliouby: Emotion AI, Social Robots, and Self-Driving Cars | Lex Fridman Podcast #322', '133:2006', ' for 94 car models.', 5818.62, 5820.82, 1.5863358321539258)
('Podcast: Travis Stevens: Judo, Olympics, and Mental Toughness | Lex Fridman Podcast #223', '283:1488', ' when I called down to get the sauna.', 3709.34, 3711.1000000000004, 1.572552805197421)
('Podcast: Jeremy Howard: fast.ai Deep Learning Courses and Research | Lex Fridman Podcast #35', '241:1436', ' which has all the courses pre-installed.', 4068.9, 4071.1400000000003, 1.5663321232557983)
('Podcast: Joscha Bach: Nature of Reality, Dreams, and Consciousness | Lex Fridman Podcast #212', '307:3933', ' and very few are first class and some are budget.', 10648.64, 10650.960000000001, 1.56163412898204

In [24]:
# Q3) What are the five most similar segments to segment '48:511'

# Input: "Is it is there something especially interesting and profound to you in terms of our current deep learning neural network, artificial neural network approaches and the whatever we do understand about the biological neural network."
# For each result return the podcast name, the segment id, segment raw text,  the start time, stop time, and embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()
cur.execute("""
SELECT
    p.title,
    s.id,
    s.content,
    s.start_time,
    s.end_time,
    s.embedding <-> (SELECT embedding FROM segment WHERE id = '48:511') AS distance
FROM
    segment s
JOIN
    podcast p ON s.podcast_id = p.id
ORDER BY
    distance
LIMIT 5;
""")
print("\nQ3 Results:")
for row in cur.fetchall():
  print(row)

conn.commit()
conn.close()


Q3 Results:
('Podcast: Matt Botvinick: Neuroscience, Psychology, and AI at DeepMind | Lex Fridman Podcast #106', '48:511', ' Is it is there something especially interesting and profound to you in terms of our current deep learning neural network, artificial neural network approaches and the whatever we do understand about the biological neural network.', 1832.84, 1846.84, 0.0)
('Podcast: Andrew Huberman: Neuroscience of Optimal Performance | Lex Fridman Podcast #139', '155:648', ' Is there something interesting to you or fundamental to you about the circuitry of the brain', 3798.48, 3805.84, 0.652299685331962)
('Podcast: Cal Newport: Deep Work, Focus, Productivity, Email, and Social Media | Lex Fridman Podcast #166', '61:3707', ' of what we might discover about neural networks?', 8498.02, 8500.1, 0.7121050124628524)
('Podcast: Matt Botvinick: Neuroscience, Psychology, and AI at DeepMind | Lex Fridman Podcast #106', '48:512', " And our brain is there. There's some there's quite a few d

In [25]:
# Q4) What are the five most similar segments to segment '51:56'

# Input: "But what about like the fundamental physics of dark energy? Is there any understanding of what the heck it is?"
# For each result return the podcast name, the segment id, segment raw text,  the start time, stop time, and embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()
cur.execute("""
SELECT
    p.title,
    s.id,
    s.content,
    s.start_time,
    s.end_time,
    s.embedding <-> (SELECT embedding FROM segment WHERE id = '51:56') AS distance
FROM
    segment s
JOIN
    podcast p ON s.podcast_id = p.id
ORDER BY
    distance
LIMIT 5;
""")
print("\nQ4 Results:")
for row in cur.fetchall():
  print(row)

conn.commit()
conn.close()


Q4 Results:
('Podcast: Alex Filippenko: Supernovae, Dark Energy, Aliens & the Expanding Universe | Lex Fridman Podcast #137', '51:56', ' But what about like the fundamental physics of dark energy? Is there any understanding of what the heck it is?', 366.5, 375.0, 0.0)
('Podcast: George Hotz: Hacking the Simulation & Learning to Drive with Neural Nets | Lex Fridman Podcast #132', '308:144', " I mean, we don't understand dark energy, right?", 500.44, 502.6, 0.6681965222094363)
('Podcast: Lex Fridman: Ask Me Anything - AMA January 2021 | Lex Fridman Podcast', '243:273', " Like, what's up with this dark matter and dark energy stuff?", 946.22, 950.12, 0.7355511357796344)
('Podcast: Katherine de Kleer: Planets, Moons, Asteroids & Life in Our Solar System | Lex Fridman Podcast #184', '196:685', ' being like, what the hell is dark matter and dark energy?', 2591.72, 2595.9599999999996, 0.7631141596843518)
('Podcast: Alex Filippenko: Supernovae, Dark Energy, Aliens & the Expanding Universe | Le

In [27]:
# Q5) For each of the following podcast segments, find the five most similar podcast episodes. Hint: You can do this by averaging over the embedding vectors within a podcast episode.

#     a) Segment "267:476"

#     b) Segment '48:511'

#     c) Segment '51:56'

# For each result return the Podcast title and the embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()

# Helper function to find most similar podcasts based on average embedding of a segment
def find_similar_podcasts_by_segment(segment_id):
    cur.execute("SELECT embedding FROM segment WHERE id = %s", (segment_id,))
    segment_embedding = cur.fetchone()[0]

    cur.execute("""
        SELECT
            p.title,
            (SELECT AVG(embedding) FROM segment WHERE podcast_id = p.id) <-> %s AS distance
        FROM
            podcast p
        ORDER BY
            distance
        LIMIT 5;
    """, (segment_embedding,))
    return cur.fetchall()

print("\nQ5 Results:")
print("a) Most similar podcasts to segment '267:476':")
for row in find_similar_podcasts_by_segment('267:476'):
    print(row)

print("\nb) Most similar podcasts to segment '48:511':")
for row in find_similar_podcasts_by_segment('48:511'):
    print(row)

print("\nc) Most similar podcasts to segment '51:56':")
for row in find_similar_podcasts_by_segment('51:56'):
    print(row)


conn.commit()
conn.close()


Q5 Results:
a) Most similar podcasts to segment '267:476':
('Podcast: Sara Walker: The Origin of Life on Earth and Alien Worlds | Lex Fridman Podcast #198', 0.7828978136062058)
('Podcast: Martin Rees: Black Holes, Alien Life, Dark Matter, and the Big Bang | Lex Fridman Podcast #305', 0.7879499391348677)
('Podcast: Max Tegmark: Life 3.0 | Lex Fridman Podcast #1', 0.7886898936177833)
('Podcast: Sean Carroll: The Nature of the Universe, Life, and Intelligence | Lex Fridman Podcast #26', 0.7890653326909047)
('Podcast: Nick Bostrom: Simulation and Superintelligence | Lex Fridman Podcast #83', 0.7911210354871258)

b) Most similar podcasts to segment '48:511':
('Podcast: Matt Botvinick: Neuroscience, Psychology, and AI at DeepMind | Lex Fridman Podcast #106', 0.7481194602869561)
('Podcast: Christof Koch: Consciousness | Lex Fridman Podcast #2', 0.7537802160985114)
('Podcast: Dileep George: Brain-Inspired AI | Lex Fridman Podcast #115', 0.7605153285431108)
('Podcast: Tomaso Poggio: Brains, Mi

In [28]:
# Q6) For podcast episode id = VeH7qKZr0WI, find the five most similar podcast episodes. Hint: you can do a similar averaging procedure as Q5

# Input Episode: "Balaji Srinivasan: How to Fix Government, Twitter, Science, and the FDA | Lex Fridman Podcast #331"
# For each result return the Podcast title and the embedding distance

conn = psycopg2.connect(CONNECTION)
cur = conn.cursor()

# Helper function to get average embedding for a podcast
def get_average_embedding(podcast_id):
    cur.execute("SELECT AVG(embedding) FROM segment WHERE podcast_id = %s", (podcast_id,))
    return cur.fetchone()[0]

# Get the average embedding for the input podcast
input_podcast_id = 'VeH7qKZr0WI'
input_podcast_embedding = get_average_embedding(input_podcast_id)

cur.execute("""
    SELECT
        p.title,
        (SELECT AVG(embedding) FROM segment WHERE podcast_id = p.id) <-> %s AS distance
    FROM
        podcast p
    ORDER BY
        distance
    LIMIT 5;
""", (input_podcast_embedding,))

print("\nQ6 Results:")
for row in cur.fetchall():
    print(row)


conn.commit()
conn.close()


Q6 Results:
('Podcast: Balaji Srinivasan: How to Fix Government, Twitter, Science, and the FDA | Lex Fridman Podcast #331', 0.0)
('Podcast: Tyler Cowen: Economic Growth & the Fight Against Conformity & Mediocrity | Lex Fridman Podcast #174', 0.11950104556214838)
('Podcast: Eric Weinstein: Difficult Conversations, Freedom of Speech, and Physics | Lex Fridman Podcast #163', 0.1257139025632404)
('Podcast: Michael Malice and Yaron Brook: Ayn Rand, Human Nature, and Anarchy | Lex Fridman Podcast #178', 0.12842690324343972)
('Podcast: Steve Keen: Marxism, Capitalism, and Economics | Lex Fridman Podcast #303', 0.12916269225753493)


# Deliverables
You will turn in a ZIP or PDF file containing all your code and a PDF file with the queries and results for questions 1-7.