In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# CHROMA PLOT

In [None]:
chroma_df = pd.read_csv("chroma_performance.csv")
chroma_df['database'] = 'Chroma'

In [None]:
# enabling grid
sns.set_style("whitegrid")

# making plt figure object
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10, 5))

plt.subplots_adjust(wspace=.7)
# speed subplot
for embedder in chroma_df["embedder"].unique():
    subset = chroma_df[chroma_df["embedder"] == embedder]
    ax1.plot(subset["chunk_size"], subset["time_ms"], marker="o",linestyle="dashed", label=f"{embedder}")

ax1.set_yscale("log")
ax1.set_xlabel("Chunk Size")
ax1.set_ylabel("Processing Time (ms) (log scaling)", color="tab:blue")
ax1.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
ax1.set_title("Processing Time by Embedder")

# memory subplot
for embedder in chroma_df["embedder"].unique():
    subset = chroma_df[chroma_df["embedder"] == embedder]
    ax2.plot(subset["chunk_size"], subset["memory_kb"], marker="s", linestyle="dashed", label=f"{embedder}")

ax2.set_ylabel("Memory Usage (KB)", color="tab:orange")
ax2.legend(loc="lower left", bbox_to_anchor=(1.05, 0))
ax2.set_xlabel("Chunk Size")
ax2.set_title("Memory Usage by Embedder")

plt.savefig('chroma_plot.png')
plt.show()


# FAISS PLOT

In [None]:
faiss_df = pd.read_csv("faiss_performance.csv")
faiss_df['database'] = 'faiss'

In [None]:
agg_faiss_df = faiss_df.groupby(['embedder', 'chunk_size', 'overlap']).agg({
    'time_ms': 'mean',
    'memory_kb': 'mean'
}).reset_index()
agg_faiss_df = agg_faiss_df.drop(1) # extra row with differing chunk size and overlap
agg_faiss_df = agg_faiss_df.reset_index()
agg_faiss_df

In [None]:
# enabling grid
sns.set_style("whitegrid")

# making plt figure object
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10, 5))

plt.subplots_adjust(wspace=.7)
# speed subplot
for embedder in agg_faiss_df["embedder"].unique():
    subset = agg_faiss_df[agg_faiss_df["embedder"] == embedder]
    ax1.plot(subset["chunk_size"], subset["time_ms"], marker="o",linestyle="dashed", label=f"{embedder}")

ax1.set_yscale("log")
ax1.set_xlabel("Chunk Size")
ax1.set_ylabel("Processing Time (ms) (log scaling)", color="tab:blue")
ax1.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
ax1.set_title("Processing Time by Embedder")

# memory subplot
for embedder in agg_faiss_df["embedder"].unique():
    subset = agg_faiss_df[agg_faiss_df["embedder"] == embedder]
    ax2.plot(subset["chunk_size"], subset["memory_kb"], marker="s", linestyle="dashed", label=f"{embedder}")

ax2.set_ylabel("Memory Usage (KB)", color="tab:orange")
ax2.legend(loc="lower left", bbox_to_anchor=(1.05, 0))
ax2.set_xlabel("Chunk Size")
ax2.set_title("Memory Usage by Embedder")

plt.savefig('faiss_plot.png')
plt.show()


# REDIS PLOT

In [None]:
redis_df = pd.read_csv("redis_performance.csv")
redis_df['database'] = 'redis'

In [None]:
redis_df

In [None]:
redis_df = redis_df.drop([0, 10, 20]) # removing rows from git merge conflict
redis_df

In [None]:
redis_df['embedder'] = redis_df['embedder'].replace('Mini', 'MiniLM') # fixing differing embedder label
agg_redis_df = redis_df.groupby(['embedder', 'chunk_size', 'overlap']).agg({
    'time_ms': 'mean',
    'memory_kb': 'mean'
}).reset_index()
agg_redis_df

In [None]:
# enabling grid
sns.set_style("whitegrid")

# making plt figure object
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10, 5))

plt.subplots_adjust(wspace=.7)
# speed subplot
for embedder in agg_redis_df["embedder"].unique():
    subset = agg_redis_df[agg_redis_df["embedder"] == embedder]
    ax1.plot(subset["chunk_size"], subset["time_ms"], marker="o",linestyle="dashed", label=f"{embedder}")

ax1.set_yscale("log")
ax1.set_xlabel("Chunk Size")
ax1.set_ylabel("Processing Time (ms) (log scaling)", color="tab:blue")
ax1.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
ax1.set_title("Processing Time by Embedder")

# memory subplot
for embedder in agg_redis_df["embedder"].unique():
    subset = agg_redis_df[agg_redis_df["embedder"] == embedder]
    ax2.plot(subset["chunk_size"], subset["memory_kb"], marker="s", linestyle="dashed", label=f"{embedder}")

ax2.set_ylabel("Memory Usage (KB)", color="tab:orange")
ax2.legend(loc="lower left", bbox_to_anchor=(1.05, 0))
ax2.set_xlabel("Chunk Size")
ax2.set_title("Memory Usage by Embedder")

plt.savefig('redis_plot.png')
plt.show()
