In [None]:
%%bash
# Create project folders in the Kaggle working directory
mkdir -p /kaggle/working/agentsmentor/backend/agents
mkdir -p /kaggle/working/agentsmentor/backend/mcp_tools
mkdir -p /kaggle/working/agentsmentor/backend/tools
mkdir -p /kaggle/working/agentsmentor/backend/memory
mkdir -p /kaggle/working/agentsmentor/samples
mkdir -p /kaggle/working/agentsmentor/artifacts/charts
mkdir -p /kaggle/working/agentsmentor/artifacts/notebooks

# Show the created structure
echo "Created folders:"
ls -R /kaggle/working/agentsmentor | sed -n '1,200p'


In [None]:
%%writefile /kaggle/working/agentsmentor/backend/app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import uuid

from agents.router_agent import route_query
from agents.dataset_agent import analyze_dataset
from agents.debug_agent import debug_code
from agents.model_agent import suggest_model, generate_notebook

# Streamlit App Settings
st.set_page_config(page_title="AgentsMentor - Kaggle Concierge", layout="wide")

BASE = Path("/kaggle/working/agentsmentor")
DATA_DIR = BASE / "samples"
ARTIFACTS_DIR = BASE / "artifacts"
ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(parents=True, exist_ok=True)

st.title("ðŸ§  AgentsMentor â€“ Kaggle Problem-Solving Concierge (MVP)")

# Session initialization
if "dataset_id" not in st.session_state:
    st.session_state["dataset_id"] = None
if "dataset_path" not in st.session_state:
    st.session_state["dataset_path"] = None

menu = st.sidebar.selectbox(
    "Choose Action",
    ["Upload Dataset", "Analyze Dataset", "Debug Code", "Generate Notebook"]
)

# Upload Dataset
if menu == "Upload Dataset":
    st.header("ðŸ“¤ Upload a CSV Dataset")
    uploaded_file = st.file_uploader("Choose CSV file", type=["csv"])
    if uploaded_file:
        dataset_id = str(uuid.uuid4())
        path = DATA_DIR / f"{dataset_id}_{uploaded_file.name}"
        with open(path, "wb") as f:
            f.write(uploaded_file.getvalue())
        st.session_state["dataset_id"] = dataset_id
        st.session_state["dataset_path"] = str(path)
        st.success(f"Dataset uploaded successfully! ID: {dataset_id}")
        st.write("Saved at:", path)

# Analyze Dataset
if menu == "Analyze Dataset":
    st.header("ðŸ“Š Dataset Insights")
    if not st.session_state["datase]()_


In [None]:
# %%writefile /kaggle/working/agentsmentor/backend/agents/router_agent.py
# backend/agents/router_agent.py
def route_query(text: str):
    """
    Very small intent router for MVP.
    Returns a dict: {"intent": "dataset|debug|model|notebook|other", "summary": "one-line"}
    """
    if not text or not text.strip():
        return {"intent": "other", "summary": "empty"}

    t = text.lower().strip()

    # debug-related keywords (code / error)
    debug_keys = ["error", "traceback", "exception", "stacktrace", "syntax error", "nameerror", "typeerror", "shape mismatch", "missing import", "bug", "fix this"]
    for k in debug_keys:
        if k in t:
            return {"intent": "debug", "summary": f"debug: matched keyword '{k}'"}

    # dataset-related keywords
    dataset_keys = ["dataset", "column", "columns", "missing", "null", "dtype", "skew", "histogram", "eda", "explore", "feature", "target"]
    for k in dataset_keys:
        if k in t:
            return {"intent": "dataset", "summary": f"dataset: matched keyword '{k}'"}

    # model / baseline / ml keywords
    model_keys = ["baseline", "model", "randomforest", "xgboost", "train", "validation", "score", "accuracy", "roc", "auc"]
    for k in model_keys:
        if k in t:
            return {"intent": "model", "summary": f"model: matched keyword '{k}'"}

    # notebook generation / notebook
    if "notebook" in t or "generate" in t and ("notebook" in t or "script" in t):
        return {"intent": "notebook", "summary": "generate notebook/script"}

    # fallback: use simple heuristics for questions
    if t.endswith("?"):
        # prefer dataset if mentions column/row
        if any(w in t for w in ["column", "row", "how many", "what is", "which"]):
            return {"intent": "dataset", "summary": "question - assume dataset"}
        return {"intent": "other", "summary": "question fallback"}
    return {"intent": "other", "summary": "fallback"}


In [None]:
%%writefile /kaggle/working/agentsmentor/backend/agents/dataset_agent.py
# backend/agents/dataset_agent.py
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

def analyze_dataset(path: Path):
    """
    Loads CSV at `path` and returns basic schema, missing counts, sample rows,
    and generates a histogram for the first numeric column (if any).
    Returns a dict:
      {
        "schema": [ {"column":..., "dtype":..., "non_null_count":...}, ...],
        "missing": {col: missing_count, ...},
        "sample_rows": [...],
        "chart": "artifacts/charts/xxx.png" or None
      }
    """
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(f"Dataset not found: {path}")

    df = pd.read_csv(path)

    # Build schema
    schema = []
    for c in df.columns:
        schema.append({
            "column": c,
            "dtype": str(df[c].dtype),
            "non_null_count": int(df[c].count())
        })

    # Missing values
    missing = {c: int(df[c].isna().sum()) for c in df.columns}

    # Sample rows (small)
    sample_rows = df.head(5).to_dict(orient="records")

    # Create chart artifact for first numeric column
    numeric_cols = df.select_dtypes(include="number").columns.tolist()
    chart_path = None
    if numeric_cols:
        col = numeric_cols[0]
        plt.figure(figsize=(6,3))
        df[col].dropna().hist(bins=20)
        plt.title(f"Histogram: {col}")
        plt.tight_layout()
        artifacts_dir = Path("/kaggle/working/agentsmentor/artifacts/charts")
        artifacts_dir.mkdir(parents=True, exist_ok=True)
        fname = f"{path.stem}_{col}_hist.png"
        out_path = artifacts_dir / fname
        plt.savefig(out_path)
        plt.close()
        chart_path = str(out_path)

    return {
        "schema": schema,
        "missing": missing,
        "sample_rows": sample_rows,
        "chart": chart_path
    }


In [None]:
%%writefile /kaggle/working/agentsmentor/backend/agents/debug_agent.py
# backend/agents/debug_agent.py
import ast
from typing import List, Dict

def _find_possible_missing_imports(code: str) -> List[Dict]:
    """
    Very small heuristic checks for common sklearn/linalg/etc mentions
    that may indicate missing imports. Returns list of suggestions.
    """
    suggestions = []
    lowered = code.lower()
    # common sklearn classname checks
    mapping = {
        "randomforestclassifier": "from sklearn.ensemble import RandomForestClassifier",
        "xgboost": "import xgboost as xgb",
        "pandas": "import pandas as pd",
        "numpy": "import numpy as np",
        "train_test_split": "from sklearn.model_selection import train_test_split",
    }
    for key, imp in mapping.items():
        if key in lowered and imp.split()[1] not in code:
            suggestions.append({"issue": f"Possible missing import related to '{key}'", "suggestion": imp})
    return suggestions

def debug_code(code: str):
    """
    Static analysis for Python code (MVP).
    Returns:
      {
        "diagnostics": [...],
        "fix": "suggested code snippet or notes",
        "confidence": float
      }
    """
    diagnostics = []
    # Syntax check via ast
    try:
        ast.parse(code)
    except SyntaxError as e:
        diagnostics.append({"type": "SyntaxError", "message": str(e), "lineno": getattr(e, "lineno", None)})
    except Exception as e:
        diagnostics.append({"type": "ParseError", "message": str(e)})

    # Heuristic missing imports
    import_suggestions = _find_possible_missing_imports(code)
    for s in import_suggestions:
        diagnostics.append({"type": "MissingImport", "message": s["issue"], "suggestion": s["suggestion"]})

    # Build a simple fix text
    fix_lines = []
    if diagnostics:
        fix_lines.append("# Diagnostics found:")
        for d in diagnostics:
            if d.get("suggestion"):
                fix_lines.append(f"# - {d['type']}: {d['message']} -> try: {d['suggestion']}")
            else:
                fix_lines.append(f"# - {d['type']}: {d['message']}")
        # Prepend suggested imports if any
        suggested_imports = [s["suggestion"] for s in import_suggestions]
        if suggested_imports:
            fix_lines.append("\n# Suggested imports:")
            for imp in suggested_imports:
                fix_lines.append(imp)
    else:
        fix_lines.append("# No static issues detected. Consider runtime checks.")

    fix_text = "\n".join(fix_lines)
    # Confidence is low-to-medium because we only do static checks
    return {"diagnostics": diagnostics, "fix": fix_text, "confidence": 0.7}


In [None]:
%%writefile /kaggle/working/agentsmentor/backend/agents/model_agent.py
# backend/agents/model_agent.py
from pathlib import Path

def suggest_model(dataset_id: str, dataset_path: Path):
    """
    Simple heuristic model suggestion for MVP.
    Returns dictionary with recommended model and sample code cells.
    """
    dataset_path = Path(dataset_path)
    if not dataset_path.exists():
        raise FileNotFoundError(f"Dataset not found: {dataset_path}")

    # Heuristic: tabular -> RandomForest baseline
    suggestion = {
        "recommended_model": "RandomForestClassifier",
        "reason": "Good baseline for tabular problems (robust and easy).",
        "preprocessing": ["fill missing numeric with median", "one-hot encode categorical"],
        "sample_code_cells": [
            "import pandas as pd",
            "from sklearn.model_selection import train_test_split",
            "from sklearn.ensemble import RandomForestClassifier",
            "df = pd.read_csv('samples/{}')".format(dataset_path.name),
            "# TODO: add preprocessing here",
            "X = df.drop(columns=['target'], errors='ignore')",
            "y = df.get('target')",
            "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)",
            "model = RandomForestClassifier(n_estimators=100, random_state=42)",
            "model.fit(X_train, y_train)",
            "print('Done')"
        ]
    }
    return suggestion

def generate_notebook(dataset_id: str, dataset_path: Path):
    """
    Generates a simple .py "notebook-like" script and saves into artifacts/notebooks/.
    Returns the saved path as a string.
    """
    dataset_path = Path(dataset_path)
    artifacts_dir = Path("/kaggle/working/agentsmentor/artifacts/notebooks")
    artifacts_dir.mkdir(parents=True, exist_ok=True)

    fname = f"{dataset_id}_baseline.py"
    out_path = artifacts_dir / fname

    content_lines = [
        "# AgentsMentor generated baseline script (MVP)",
        "import pandas as pd",
        "from sklearn.model_selection import train_test_split",
        "from sklearn.ensemble import RandomForestClassifier",
        "",
        f"df = pd.read_csv('samples/{dataset_path.name}')",
        "print('Loaded:', df.shape)",
        "",
        "# Simple preprocessing (example)",
        "df = df.copy()",
        "for c in df.select_dtypes(include='number').columns:",
        "    df[c] = df[c].fillna(df[c].median())",
        "",
        "# Prepare X/y (replace 'target' with your target column name)",
        "if 'target' in df.columns:",
        "    X = df.drop(columns=['target'])",
        "    y = df['target']",
        "else:",
        "    X = df.drop(columns=[df.columns[-1]])",
        "    y = df[df.columns[-1]]",
        "",
        "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)",
        "model = RandomForestClassifier(n_estimators=100, random_state=42)",
        "model.fit(X_train, y_train)",
        "print('Training done. Sample score:', model.score(X_val, y_val))",
        "",
        "# Save example submission.csv (modify as needed)",
        "preds = model.predict(X_val)",
        "import pandas as _pd",
        "_pd.DataFrame({'pred': preds}).to_csv('/kaggle/working/agentsmentor/artifacts/notebooks/prediction_sample.csv', index=False)",
        ""
    ]
    out_path.write_text("\n".join(content_lines), encoding="utf-8")
    return str(out_path)


In [None]:
%%bash
ls -l /kaggle/working/agentsmentor/backend/agents


In [None]:
%%bash
pip install --quiet streamlit pandas scikit-learn matplotlib
python -c "import streamlit, pandas, sklearn, matplotlib; print('Packages OK')"


In [None]:
%%bash
cat > /kaggle/working/agentsmentor/samples/sample_titanic.csv <<'CSV'
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
CSV

ls -l /kaggle/working/agentsmentor/samples


In [None]:
%%bash
# Run Streamlit app for AgentsMentor on port 6006 (Kaggle shows a Public URL)
streamlit run /kaggle/working/agentsmentor/backend/app.py --server.port 6006 --server.headless true


In [None]:
# example buggy code
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)  # but X_train not defined


In [1]:
%%bash
ls -l /kaggle/working/agentsmentor/backend/agents
python -c "import sys; print(sys.path)"


['', '/kaggle/lib/kagglegym', '/kaggle/lib', '/usr/lib/python311.zip', '/usr/lib/python3.11', '/usr/lib/python3.11/lib-dynload', '/usr/local/lib/python3.11/dist-packages', '/usr/lib/python3/dist-packages']


ls: cannot access '/kaggle/working/agentsmentor/backend/agents': No such file or directory
