# 🔋 Battery RAG System with Memory

A Retrieval-Augmented Generation system for battery calculations and analysis with conversation memory.

## Features:
- 📊 Load and process battery data from CSV
- 🧠 Vector embeddings with ChromaDB
- 💬 Conversational memory system
- 🔍 Semantic search for relevant battery data  
- ⚡ Real-time battery calculations

In [22]:
# Core imports
import pandas as pd
import chromadb
from sentence_transformers import SentenceTransformer
from datetime import datetime

In [23]:
# LangChain imports
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [None]:
# Load battery data
print("📊 Loading battery data...")
df = pd.read_csv('battery_data_10000_rows.csv')
df = df.apply(pd.to_numeric, errors='ignore').dropna().reset_index(drop=True)
print(f"✅ Loaded {len(df)} battery records")

📊 Loading battery data...


  df = df.apply(pd.to_numeric, errors='ignore').dropna().reset_index(drop=True)


✅ Processed 10000 battery records


In [27]:
# Convert rows to structured documents
def format_battery_document(row):
    """Format a battery row into a structured document"""
    return f"""Battery ID: {row.iloc[0]}
Type: {row.iloc[1] if len(row) > 1 else 'Unknown'}
Voltage: {row.iloc[2] if len(row) > 2 else 'Unknown'} V
Capacity: {row.iloc[3] if len(row) > 3 else 'Unknown'} Ah
Energy Density: {row.iloc[4] if len(row) > 4 else 'Unknown'} Wh/kg"""

structured_docs = [format_battery_document(row) for _, row in df.iterrows()]
print(f"✅ Created {len(structured_docs)} structured documents")

✅ Created 10000 structured documents


In [None]:
# Create embeddings
print("🧠 Creating embeddings...")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedding_model.encode(structured_docs, show_progress_bar=True)
print(f"✅ Created embeddings with shape: {embeddings.shape}")

🧠 Creating embeddings...


Batches: 100%|██████████| 313/313 [00:47<00:00,  6.53it/s]



✅ Stored 10000 documents in vector database


In [28]:
# Setup ChromaDB vector store
print("🗄️ Setting up ChromaDB...")
chroma_client = chromadb.Client()

# Clean up existing collection
try:
    chroma_client.delete_collection(name="battery_data")
except:
    pass

collection = chroma_client.create_collection(name="battery_data")
print("✅ ChromaDB collection created")

🗄️ Setting up ChromaDB...
✅ ChromaDB collection created


In [29]:
# Store documents in batches
print("💾 Storing documents in ChromaDB...")
batch_size = 1000

for i in range(0, len(structured_docs), batch_size):
    end_idx = min(i + batch_size, len(structured_docs))
    collection.add(
        documents=structured_docs[i:end_idx],
        embeddings=embeddings[i:end_idx].tolist(),
        ids=[str(j) for j in range(i, end_idx)]
    )
    print(f"   Stored batch {i//batch_size + 1}: {end_idx - i} documents")

print(f"✅ Total documents stored: {collection.count()}")

💾 Storing documents in ChromaDB...
   Stored batch 1: 1000 documents
   Stored batch 2: 1000 documents
   Stored batch 3: 1000 documents
   Stored batch 4: 1000 documents
   Stored batch 5: 1000 documents
   Stored batch 6: 1000 documents
   Stored batch 7: 1000 documents
   Stored batch 8: 1000 documents
   Stored batch 9: 1000 documents
   Stored batch 10: 1000 documents
✅ Total documents stored: 10000


In [None]:
# Simple memory class
class BatteryRAGMemory:
    def __init__(self):
        self.conversation_memory = ConversationBufferMemory(
            memory_key="chat_history", return_messages=True
        )
        self.query_history = []
    
    def add_interaction(self, query, response, retrieved_docs):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        self.query_history.append({
            "timestamp": timestamp, "query": query, 
            "response": response, "docs_count": len(retrieved_docs)
        })
        self.conversation_memory.save_context(
            {"input": query}, {"output": response}
        )
    
    def get_context(self):
        if self.query_history:
            recent = self.query_history[-2:]  # Last 2 interactions
            context = "Previous queries:\n"
            for item in recent:
                context += f"Q: {item['query']}\nA: {item['response'][:80]}...\n"
            return context
        return ""

memory = BatteryRAGMemory()
print("🧠 Memory system ready")

🧠 Memory system initialized


  self.conversation_memory = ConversationBufferMemory(


In [24]:
# Create retriever function
def retrieve_documents(query, top_k=3):
    """Retrieve relevant documents based on query"""
    query_embedding = embedding_model.encode([query])
    results = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=top_k
    )
    return results['documents'][0]

print("🔍 Retriever function ready")

🔍 Retriever function ready


In [30]:
# Parse battery documents
def parse_battery_docs(retrieved_docs):
    """Parse retrieved documents into battery objects"""
    batteries = []
    for doc in retrieved_docs:
        lines = doc.strip().split('\n')
        if len(lines) >= 5:
            try:
                battery = {
                    'id': lines[0].split(': ')[1],
                    'type': lines[1].split(': ')[1], 
                    'voltage': float(lines[2].split(': ')[1].replace(' V', '')),
                    'capacity': float(lines[3].split(': ')[1].replace(' Ah', '')),
                    'energy_density': float(lines[4].split(': ')[1].replace(' Wh/kg', ''))
                }
                batteries.append(battery)
            except:
                continue
    return batteries

print("⚙️ Document parser ready")

⚙️ Document parser ready


In [31]:
# Battery pack calculation function
def calculate_battery_pack(query, batteries):
    """Calculate battery pack specifications"""
    if not batteries:
        return "No battery data found for calculation."
    
    selected = batteries[0]
    
    # Extract configuration (2S3P format)
    import re
    series_match = re.search(r'(\d+)S', query)
    parallel_match = re.search(r'(\d+)P', query)
    
    series = int(series_match.group(1)) if series_match else 1
    parallel = int(parallel_match.group(1)) if parallel_match else 1
    
    pack_voltage = selected['voltage'] * series
    pack_capacity = selected['capacity'] * parallel
    pack_energy = pack_voltage * pack_capacity
    
    return f"""🔋 Battery Pack Calculation:
Selected: {selected['id']} ({selected['type']})
Configuration: {series}S{parallel}P
Pack Voltage: {pack_voltage:.2f} V
Pack Capacity: {pack_capacity:.2f} Ah  
Pack Energy: {pack_energy:.2f} Wh
Total Cells: {series * parallel}"""

print("🧮 Calculator function ready")

🧮 Calculator function ready


In [32]:
# LangChain Prompt Template
prompt_template = PromptTemplate(
    input_variables=["context", "query", "battery_data"],
    template="""You are a battery engineering expert assistant.

Previous Context:
{context}

Current Query: {query}

Available Battery Data:
{battery_data}

Instructions:
- Answer based on provided data and context
- Show step-by-step calculations
- Use technical battery terminology
- For configurations: 2S = series, 3P = parallel

Response:"""
)

print("📝 Prompt template created")

📝 Prompt template created


In [33]:
# LangChain Chain Components
def format_docs(docs):
    """Format documents for the chain"""
    return "\n---\n".join(docs)

def generate_response(query, docs):
    """Generate response using battery calculation logic"""
    batteries = parse_battery_docs(docs)
    
    if 'calculate' in query.lower() or 'configuration' in query.lower():
        return calculate_battery_pack(query, batteries)
    elif 'compare' in query.lower():
        return compare_batteries(batteries)
    else:
        return f"Found {len(batteries)} batteries: " + ", ".join([b['id'] for b in batteries[:3]])

def compare_batteries(batteries):
    """Compare multiple batteries"""
    if len(batteries) < 2:
        return "Need at least 2 batteries to compare."
    
    comparison = "🔋 Battery Comparison:\n"
    for i, bat in enumerate(batteries[:3], 1):
        comparison += f"{i}. {bat['id']}: {bat['voltage']}V, {bat['capacity']}Ah, {bat['energy_density']}Wh/kg\n"
    return comparison

print("🔗 Chain components ready")

🔗 Chain components ready


In [34]:
# Create LangChain RAG Chain
from langchain_core.runnables import RunnableParallel

# Build the chain
rag_chain = (
    RunnableParallel({
        "context": lambda x: memory.get_context(),
        "query": RunnablePassthrough(),
        "battery_data": lambda x: format_docs(retrieve_documents(x))
    })
    | RunnableLambda(lambda x: {
        "response": generate_response(x["query"], retrieve_documents(x["query"])),
        "query": x["query"],
        "context": x["context"]
    })
)

print("⛓️ LangChain RAG chain created")

⛓️ LangChain RAG chain created


In [36]:
# Test the LangChain RAG system
print("🧪 Testing LangChain RAG Chain\n")

test_queries = [
    "Calculate energy for 2S3P configuration",
    "Compare top 3 batteries by energy density", 
    "What batteries have high voltage?"
]

for i, query in enumerate(test_queries, 1):
    print(f"{'='*40}")
    print(f"Test {i}: {query}")
    print(f"{'='*40}")
    
    # Use LangChain
    result = rag_chain.invoke(query)
    print(result["response"])
    
    # Save to memory
    docs = retrieve_documents(query)
    memory.add_interaction(query, result["response"], docs)
    print(f"\n💾 Memory: {len(memory.query_history)} interactions\n")

🧪 Testing LangChain RAG Chain

Test 1: Calculate energy for 2S3P configuration
🔋 Battery Pack Calculation:
Selected: BAT-05323 (Prismatic)
Configuration: 2S3P
Pack Voltage: 6.60 V
Pack Capacity: 86.16 Ah  
Pack Energy: 568.66 Wh
Total Cells: 6

💾 Memory: 5 interactions

Test 2: Compare top 3 batteries by energy density
🔋 Battery Comparison:
1. BAT-00130: 3.78V, 2.51Ah, 25.8Wh/kg
2. BAT-00171: 3.63V, 3.31Ah, 30.0Wh/kg
3. BAT-04483: 3.7V, 2.59Ah, 28.8Wh/kg


💾 Memory: 6 interactions

Test 3: What batteries have high voltage?
Found 3 batteries: BAT-00112, BAT-00100, BAT-00190

💾 Memory: 7 interactions



## 🎉 Streamlined Battery RAG System Complete!

### 📊 New Structure (Small Cells):
1. **Core Imports** - Basic dependencies
2. **LangChain Imports** - RAG chain components  
3. **Data Loading** - CSV processing
4. **Document Formatting** - Structured text creation
5. **Embeddings** - Vector creation
6. **ChromaDB Setup** - Vector store initialization
7. **Batch Storage** - Efficient data storage
8. **Memory System** - Conversation tracking
9. **Retriever** - Document search function
10. **Parser** - Battery data extraction
11. **Calculator** - Pack calculations
12. **Prompt Template** - LangChain prompting
13. **Chain Components** - RAG logic
14. **LangChain Chain** - Complete RAG pipeline
15. **Testing** - Chain validation
16. **Interactive Chat** - User interface

### 🔗 LangChain Integration:
- **RunnableParallel** for context + retrieval
- **RunnableLambda** for custom processing
- **PromptTemplate** for structured prompting
- **Memory integration** with conversation tracking
- **Modular components** for easy modification

In [35]:
# Interactive chat function
def battery_chat():
    """Simple interactive chat using LangChain"""
    print("🔋 Battery Expert (LangChain) - type 'quit' to exit\n")
    
    while True:
        query = input("❓ Question: ").strip()
        
        if query.lower() in ['quit', 'exit']:
            print("👋 Goodbye!")
            break
            
        if query.lower() == 'history':
            for i, item in enumerate(memory.query_history, 1):
                print(f"{i}. {item['query']}")
            continue
            
        if not query:
            continue
            
        # Use the chain
        result = rag_chain.invoke(query)
        print(f"\n🤖 {result['response']}\n")
        
        # Save to memory  
        docs = retrieve_documents(query)
        memory.add_interaction(query, result['response'], docs)

# Test the chain with a simple example
print("🧪 Testing the chain:")
result = rag_chain.invoke("Calculate 2S3P battery configuration")
print("✅ Chain working! Result:", result["response"][:100] + "...")

print("\n🚀 Run battery_chat() for interactive mode")
print("🔗 Use rag_chain.invoke('your query') for direct access")

🧪 Testing the chain:
✅ Chain working! Result: 🔋 Battery Pack Calculation:
Selected: BAT-02123 (Prismatic)
Configuration: 2S3P
Pack Voltage: 6.60 V...

🚀 Run battery_chat() for interactive mode
🔗 Use rag_chain.invoke('your query') for direct access


In [37]:
# 🎉 System Status Check
print("="*50)
print("🔋 BATTERY RAG SYSTEM STATUS")
print("="*50)
print(f"✅ Data loaded: {len(df)} battery records")
print(f"✅ Embeddings created: {embeddings.shape}")
print(f"✅ Vector store: {collection.count()} documents")
print(f"✅ Memory system: {len(memory.query_history)} interactions")
print(f"✅ LangChain RAG chain: Ready")
print("="*50)
print("🚀 READY FOR USE!")
print("📝 Usage: rag_chain.invoke('your query')")
print("💬 Interactive: battery_chat()")
print("="*50)

🔋 BATTERY RAG SYSTEM STATUS
✅ Data loaded: 10000 battery records
✅ Embeddings created: (10000, 384)
✅ Vector store: 10000 documents
✅ Memory system: 7 interactions
✅ LangChain RAG chain: Ready
🚀 READY FOR USE!
📝 Usage: rag_chain.invoke('your query')
💬 Interactive: battery_chat()


In [39]:
result = rag_chain.invoke("Calculate 2S3P battery configuration")
print(result["response"])

🔋 Battery Pack Calculation:
Selected: BAT-02123 (Prismatic)
Configuration: 2S3P
Pack Voltage: 6.60 V
Pack Capacity: 62.34 Ah  
Pack Energy: 411.44 Wh
Total Cells: 6
