In [1]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import os

# Create an upload widget
uploader = widgets.FileUpload(
    accept='.csv,.json',
    multiple=False
)
display(uploader)


FileUpload(value=(), accept='.csv,.json', description='Upload')

In [2]:
def process_uploaded_file(uploader):
    if len(uploader.value) == 0:
        print("No file uploaded yet.")
        return None
    
    # Handle tuple format from ipywidgets
    upload_content = uploader.value[0] if isinstance(uploader.value, tuple) else next(iter(uploader.value.values()))
    
    # Extract filename and content based on the actual structure
    if 'metadata' in upload_content:
        filename = upload_content['metadata']['name']
        content = upload_content['content']
    else:
        # Alternative structure
        filename = upload_content.name
        content = upload_content.content

    # Save to temporary directory
    temp_filepath = f'temp/{filename}'
    os.makedirs('temp', exist_ok=True)
    with open(temp_filepath, 'wb') as f:
        f.write(content)
    
    # Load with pandas
    if filename.endswith('.csv'):
        df = pd.read_csv(temp_filepath)
    elif filename.endswith('.json'):
        df = pd.read_json(temp_filepath)
    else:
        raise ValueError('Unsupported file format')
    return df



### File Upload and Processing

1. Use the widget above to upload a `.csv` or `.json` file.
2. Then run:

```python
df = process_uploaded_file(uploader)
```

- If no file is uploaded yet, you'll see a message and `None` is returned.
- Once you have `df` (a pandas DataFrame), continue with your RAG pipeline: chunking, embedding, and vector DB steps.


In [3]:
# Index the uploaded DataFrame into Qdrant
from main import build_index_from_df, clear_and_rebuild_collection
import time

# First, process the uploaded file to create df
if 'uploader' in globals() and len(uploader.value) > 0:
    df = process_uploaded_file(uploader)
    if df is not None:
        # Clear old data first
        print("🗑️ Clearing previous data...")
        clear_and_rebuild_collection()
        
        # Index new data
        print(f"📊 Processing {len(df)} rows from uploaded file...")
        build_index_from_df(df)
        print("✅ Indexed uploaded DataFrame into Qdrant collection.")
        
        # Wait for Qdrant to finish indexing
        print("⏳ Waiting for Qdrant to complete indexing...")
        time.sleep(2)  # Wait 2 seconds for indexing to complete
        
        # Verify the upload
        from main import qdrant
        import config
        collection_info = qdrant.get_collection(config.COLLECTION_NAME)
        print(f"📈 Total documents in Qdrant: {collection_info.points_count}")
    else:
        print("❌ Failed to process uploaded file.")
else:
    print("⚠️ No file uploaded yet. Use the upload widget above first.")


✅ RAG system initialized. Ready to index uploaded data.
🗑️ Clearing previous data...
🗑️ Clearing existing collection...
🔄 Recreating collection...
✅ Collection cleared. Ready for new data to be indexed.
📊 Processing 150 rows from uploaded file...
📝 Indexing 150 documents...
Sample doc: Date: 2022-10-21, Year: 2022, Locality: Delhi, Estimated Value: 146108.92, Sale Price: 1771709.8, Pr...
  Processing batch 1/3 (50 docs)...
  Processing batch 2/3 (50 docs)...


  qdrant.upload_collection(
  qdrant.upload_collection(


  Processing batch 3/3 (50 docs)...
✅ Successfully indexed 150 documents!
✅ Indexed uploaded DataFrame into Qdrant collection.
⏳ Waiting for Qdrant to complete indexing...
📈 Total documents in Qdrant: 150
