Python Environment (Using Conda)

# CELL 1: Check GPU and Mount Drive

In [2]:
# 01_Environment_Setup.ipynb

# ==========================================
# CELL 1: Check GPU and Mount Drive
# ==========================================

import torch
import os
from google.colab import drive

# Check GPU availability
print("🔍 Checking GPU availability...")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name()
    print(f"✅ GPU Available: {gpu_name}")
    print(f"📊 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("❌ No GPU available. Please enable GPU in Runtime > Change runtime type")

# Mount Google Drive
print("\n📂 Mounting Google Drive...")
drive.mount('/content/drive')

# Create project directory
project_dir = '/content/drive/MyDrive/RAG_Research'
os.makedirs(project_dir, exist_ok=True)
os.makedirs(f'{project_dir}/notebooks', exist_ok=True)
os.makedirs(f'{project_dir}/data/raw', exist_ok=True)
os.makedirs(f'{project_dir}/data/processed', exist_ok=True)
os.makedirs(f'{project_dir}/data/embeddings', exist_ok=True)
os.makedirs(f'{project_dir}/models', exist_ok=True)
os.makedirs(f'{project_dir}/results', exist_ok=True)
os.makedirs(f'{project_dir}/src', exist_ok=True)
os.makedirs(f'{project_dir}/configs', exist_ok=True)

print("✅ Project structure created!")


🔍 Checking GPU availability...
❌ No GPU available. Please enable GPU in Runtime > Change runtime type

📂 Mounting Google Drive...
Mounted at /content/drive
✅ Project structure created!


# CELL 2: Install Dependencies

In [6]:
# Install core packages
!pip install -q transformers>=4.30.0
!pip install -q sentence-transformers>=2.2.0
!pip install -q datasets>=2.12.0
!pip install -q accelerate>=0.20.0
!pip install -q chromadb>=0.4.0
!pip install -q faiss-cpu>=1.7.4  # Changed from faiss-gpu
!pip install -q rouge-score>=0.1.2
!pip install -q bert-score>=0.3.13
!pip install -q wandb>=0.15.0
!pip install -q pandas numpy scikit-learn tqdm

print("✅ All dependencies installed!")

✅ All dependencies installed!


# CELL 3: Create Utility Functions

In [4]:
import sys
sys.path.append('/content/drive/MyDrive/RAG_Research/src')

# Save this as a file for reuse
utils_code = '''
import os
import json
import pickle
import torch
from datetime import datetime
import pandas as pd
from typing import List, Dict, Any

class ColabUtils:
    """Utility functions optimized for Google Colab"""

    @staticmethod
    def save_to_drive(data, filepath):
        """Save data to Google Drive with proper error handling"""
        try:
            drive_path = f"/content/drive/MyDrive/RAG_Research/{filepath}"
            os.makedirs(os.path.dirname(drive_path), exist_ok=True)

            if filepath.endswith('.json'):
                with open(drive_path, 'w') as f:
                    json.dump(data, f, indent=2)
            elif filepath.endswith('.pkl'):
                with open(drive_path, 'wb') as f:
                    pickle.dump(data, f)
            elif filepath.endswith('.csv'):
                data.to_csv(drive_path, index=False)

            print(f"✅ Saved to: {drive_path}")
            return True
        except Exception as e:
            print(f"❌ Error saving {filepath}: {e}")
            return False

    @staticmethod
    def load_from_drive(filepath):
        """Load data from Google Drive"""
        try:
            drive_path = f"/content/drive/MyDrive/RAG_Research/{filepath}"

            if filepath.endswith('.json'):
                with open(drive_path, 'r') as f:
                    return json.load(f)
            elif filepath.endswith('.pkl'):
                with open(drive_path, 'rb') as f:
                    return pickle.load(f)
            elif filepath.endswith('.csv'):
                return pd.read_csv(drive_path)

        except Exception as e:
            print(f"❌ Error loading {filepath}: {e}")
            return None

    @staticmethod
    def clear_gpu_memory():
        """Clear GPU memory to prevent OOM errors"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            print("🧹 GPU memory cleared")

    @staticmethod
    def check_disk_space():
        """Check available disk space"""
        statvfs = os.statvfs('/content')
        free_space = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
        print(f"💾 Available disk space: {free_space:.2f} GB")
        return free_space

    @staticmethod
    def get_runtime_info():
        """Get current runtime information"""
        import psutil

        # GPU info
        if torch.cuda.is_available():
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
            gpu_used = torch.cuda.memory_allocated() / 1e9
            gpu_free = gpu_memory - gpu_used
        else:
            gpu_memory = gpu_used = gpu_free = 0

        # RAM info
        ram = psutil.virtual_memory()
        ram_total = ram.total / 1e9
        ram_used = ram.used / 1e9
        ram_free = ram.available / 1e9

        print(f"🖥️  Runtime Info:")
        print(f"   GPU Memory: {gpu_used:.1f}/{gpu_memory:.1f} GB")
        print(f"   RAM: {ram_used:.1f}/{ram_total:.1f} GB")
        print(f"   Disk: {ColabUtils.check_disk_space():.1f} GB free")

        return {
            'gpu_total': gpu_memory,
            'gpu_used': gpu_used,
            'ram_total': ram_total,
            'ram_used': ram_used
        }
'''

# Save utils to file
with open('/content/drive/MyDrive/RAG_Research/src/colab_utils.py', 'w') as f:
    f.write(utils_code)

# Import the utils
from colab_utils import ColabUtils
utils = ColabUtils()

print("✅ Utility functions created and loaded!")


✅ Utility functions created and loaded!


# CELL 4: Test Environment

In [8]:
from datetime import datetime
# Test the setup
utils.get_runtime_info()

# Test saving/loading
test_data = {"test": "data", "timestamp": str(datetime.now())}
utils.save_to_drive(test_data, "results/test_save.json")
loaded_data = utils.load_from_drive("results/test_save.json")
print(f"✅ Save/Load test: {loaded_data}")

print("\n🎉 Environment setup complete! You can now proceed to data preparation.")

🖥️  Runtime Info:
   GPU Memory: 0.0/0.0 GB
   RAM: 1.3/13.6 GB
💾 Available disk space: 65.80 GB
   Disk: 65.8 GB free
✅ Saved to: /content/drive/MyDrive/RAG_Research/results/test_save.json
✅ Save/Load test: {'test': 'data', 'timestamp': '2025-06-23 11:59:49.708881'}

🎉 Environment setup complete! You can now proceed to data preparation.
