Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
404 changes: 404 additions & 0 deletions changai/changai/api/v2/assets/changai_alias_map.json

Large diffs are not rendered by default.

10,266 changes: 10,264 additions & 2 deletions changai/changai/api/v2/assets/non_erp_combined.csv

Large diffs are not rendered by default.

Binary file not shown.
59,836 changes: 59,836 additions & 0 deletions changai/changai/api/v2/assets/non_erp_combined.processed.json

Large diffs are not rendered by default.

13 changes: 10 additions & 3 deletions changai/changai/api/v2/auto_gen_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _get_file_doc_by_name(file_name: str, folder: str = RAG_FOLDER) -> Optional[
return None
return frappe.get_doc("File", file_id)


@frappe.whitelist(allow_guest=False)
def _read_filedoctype(file_name: str, folder: str = RAG_FOLDER):
doc = _get_file_doc_by_name(file_name, folder)
if not doc:
Expand Down Expand Up @@ -300,6 +300,9 @@ def sync_master_data_smart() -> Dict[str, Any]:
final_data = rebuilt_rows

meta["last_sync"] = str(now_datetime())
settings = frappe.get_single("ChangAI Settings")
settings.last_masterdata_sync = meta["last_sync"]
settings.save(ignore_permissions=True)
payload_out = {"_meta": meta, "data": final_data}
file_doc = write_filedoctype(file_name, payload_out, folder=RAG_FOLDER)

Expand Down Expand Up @@ -740,7 +743,7 @@ def sync_tables_and_schema_smart() -> Dict[str, Any]:
meta, tables_blocks = _normalize_schema_payload(payload)

by_table = _build_table_map(tables_blocks)
last_sync_raw = meta.get("last_doctype_sync")
last_sync_raw = meta.get("last_sync")
changed_doctypes = _get_changed_doctypes(last_sync_raw)
app_names=["erpnext","frappe"]
erpnext_modules = get_mod(app_names)
Expand Down Expand Up @@ -771,7 +774,11 @@ def sync_tables_and_schema_smart() -> Dict[str, Any]:
if _strip_tab(table) in valid_doctypes
}
_clean_schema_fields(by_table)
meta["last_doctype_sync"] = str(now_datetime())
meta["last_sync"] = str(now_datetime())
settings = frappe.get_single("ChangAI Settings")
settings.last_schema_sync = meta["last_sync"]
settings.save(ignore_permissions=True)

try:
_write_schema_outputs(meta, by_table, current_tables)
except Exception as e:
Expand Down
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/emb_dir/field_docs.pkl
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/schema_fvs/index.faiss
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/schema_fvs/index.pkl
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/table_fvs/index.faiss
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/table_fvs/index.pkl
Binary file not shown.
130 changes: 105 additions & 25 deletions changai/changai/api/v2/schema_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
from typing import Any, Dict, List, Tuple, Union, Optional, Set
import yaml
from frappe.utils import getdate
from pathlib import Path

def _safe_join(base: Path, rel: str) -> Path:
Expand All @@ -21,6 +22,7 @@ def _safe_join(base: Path, rel: str) -> Path:

_ALLOWED_EXT = {".json", ".yaml",".j2", ".yml", ".txt", ".md"}
_ASSETS_DIR = Path(frappe.get_app_path("changai", "changai", "api", "v2", "assets")).resolve()
_PROMPTS_DIR = Path(frappe.get_app_path("changai", "changai", "prompts")).resolve()
RAG_FOLDER = "Home/RAG Sources"
JSON_EXT = ".json"
YAML_EXT = ".yaml"
Expand Down Expand Up @@ -98,45 +100,122 @@ def _load_mapping_data() -> dict:
@frappe.whitelist()
def validate_sql_schema(sql: str, dialect: str = "mysql") -> dict:
try:
mapping_data = _load_mapping_data() # fresh load every time
mapping_data = {
table: columns
for table, columns in mapping_data.items()
if table and table.strip() and columns # skip empty table names AND empty column dicts
}
schema = MappingSchema(mapping_data, dialect=dialect)
mapping_data, schema = get_mapping_schema(dialect)

ast = sqlglot.parse_one(sql, read=dialect)
used_tables = {table.name for table in ast.find_all(exp.Table)}
small_mapping = {
table: mapping_data[table]
for table in used_tables
if table in mapping_data
}

for table in ast.find_all(exp.Table):
if table.name and table.name not in mapping_data:
return {"ok": False, "error": f"Table '{table.name}' does not exist in schema"}
return {
"ok": False,
"error": f"Table '{table.name}' does not exist in schema"
}

qualified = optimizer.qualify.qualify(
ast,
schema=small_mapping,
dialect=dialect,
identify=False,
)

qualified = optimizer.qualify.qualify(ast, schema=schema, dialect=dialect,identify=False,)
return {"ok": True, "qualified_sql": qualified.sql()}
return {
"ok": True,
"qualified_sql": qualified.sql()
}

except sqlglot.errors.OptimizeError as e:
return {"ok": False, "error": str(e)}
except sqlglot.errors.ParseError as e:
return {"ok": False, "error": str(e)}

from frappe.utils import add_to_date, today, date_diff, days_diff
MASTER_DOCTYPES = [
"Customer",
"Supplier",
"Item",
"Warehouse",
"Company",
"Account"
]

@frappe.whitelist(allow_guest=False)
def checkmaster_updates():
file_name = "master_data.yaml"
payload = _read_filedoctype(file_name, RAG_FOLDER)
if not payload:
return {"update": False, "data": False}
if not payload.get("data") or not payload.get("_meta"):
return {"data": False}
meta = payload.get("_meta") or {}
lastdate = meta["last_sync"]
docs = frappe.get_all("DocType",filters={"modified":[">",lastdate]})
if len(docs)>0:
return {"update": False,"data":True,"days": days_diff(today(),lastdate)}
return {"update": True,"data":True}
def check_file_updates(file_name=None):
settings = frappe.get_single("ChangAI Settings")

if file_name == "master_data.yaml":
last_sync = settings.last_masterdata_sync
elif file_name == "schema.yaml":
last_sync = settings.last_schema_sync
else:
frappe.throw("Invalid file_name")

if not last_sync:
return {
"update_status": False,
"data": True,
"days": 0,
"last_sync": None
}

if file_name == "schema.yaml":
changed = frappe.db.exists(
"DocType",
{
"modified": [">", last_sync]
}
)

elif file_name == "master_data.yaml":
changed = False
for doc in MASTER_DOCTYPES:
if frappe.db.exists(doc, {"modified": [">", last_sync]}):
changed = True
break

days = days_diff(today(), getdate(last_sync))

return {
"update_status": not bool(changed),
"data": True,
"days": days,
"last_sync": last_sync
}


@frappe.whitelist()
def reload_mapping_schema_cache():
global _MAPPING_DATA, _MAPPING_SCHEMA
_MAPPING_DATA = None
_MAPPING_SCHEMA = None
get_mapping_schema()
return {"ok": True}


_MAPPING_DATA = None
_MAPPING_SCHEMA = None


def get_mapping_schema(dialect="mysql"):
global _MAPPING_DATA, _MAPPING_SCHEMA

if _MAPPING_DATA is None:
mapping_data = _load_mapping_data()
_MAPPING_DATA = {
table: columns
for table, columns in mapping_data.items()
if table and table.strip() and columns
}

if _MAPPING_SCHEMA is None:
_MAPPING_SCHEMA = MappingSchema(_MAPPING_DATA, dialect=dialect)

return _MAPPING_DATA, _MAPPING_SCHEMA

@frappe.whitelist()
def convert_yaml_schema_to_sqlglot_meta() -> dict:
Expand Down Expand Up @@ -171,6 +250,7 @@ def convert_yaml_schema_to_sqlglot_meta() -> dict:
json.dumps(meta, indent=2),
encoding="utf-8"
)
reload_mapping_schema_cache()

return {
"ok": True,
Expand All @@ -185,6 +265,6 @@ def convert_yaml_schema_to_sqlglot_meta() -> dict:
from frappe import _
@frappe.whitelist(allow_guest=False)
def test():
res=checkmaster_updates()
if not res.get("update"):
res=check_file_updates("master_data.yaml")
if not res.get("update_status"):
frappe.throw(_("Please update master data for entity recognition to work. Click on Update Master Data button in Training tab in ChangAI Settings.<br>Check Quick Start Guide Here 👇"))
49 changes: 38 additions & 11 deletions changai/changai/api/v2/text2sql_pipeline_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from langchain_huggingface import HuggingFaceEmbeddings
from google import genai
from google.genai import types
from changai.changai.api.v2.schema_utils import validate_sql_schema,_load_mapping_data,checkmaster_updates
from changai.changai.api.v2.schema_utils import validate_sql_schema,_load_mapping_data,check_file_updates
from google.oauth2 import service_account

from werkzeug.wrappers import Response
Expand Down Expand Up @@ -1442,7 +1442,9 @@ def remote_entity_embedder(q: str) -> Union[list, str]:
# _VS_MASTER = FAISS.load_local(master_vs_path, emb, allow_dangerous_deserialization=True)
# return _VS_MASTER


settingsUrl = frappe.utils.get_url(
"/app/changai-settings/ChangAI%20Settings"
)
@frappe.whitelist(allow_guest=False)
def get_master_vs():
global _VS_MASTER
Expand All @@ -1455,10 +1457,19 @@ def get_master_vs():
master_vs_path = frappe.get_site_path(
"private", "changai", "fvs_stores", "erpnext", "masterdata_fvs"
)

if not os.path.exists(master_vs_path):
frappe.throw(_(f"FAISS MASTER store not found at {0}.Please click on Update Master Data button in Training tab in ChangAI Settings"
f"Check Quick Start Guide Here 👇:\n {1}").format(master_vs_path, CHANGAI_GUIDE_LINK))
frappe.throw(_(
"FAISS MASTER store not found at {0}.<br><br>"
"Please open "
"<a href='{1}' target='_blank' rel='noopener noreferrer'>ChangAI Settings</a> "
"and click on the <b>Update Master Data</b> button in the Training tab.<br><br>"
"Check Quick Start Guide Here 👇<br>"
"<a href='{2}' target='_blank' rel='noopener noreferrer' style='color:#1e90ff;'>Click here</a>"
).format(
master_vs_path,
settingsUrl,
CHANGAI_GUIDE_LINK
))

_VS_MASTER = FAISS.load_local(
master_vs_path,
Expand Down Expand Up @@ -1553,13 +1564,29 @@ def detect_specific_entities(state: SQLState) -> SQLState:
return {**state, "entity_cards": [], "entity_raw": None}

try:
res=checkmaster_updates()
res = check_file_updates("master_data.yaml")

if not res.get("data"):
frappe.throw(_("Master Data do not exist. Bcs of that result may not come accurate. For better accuracy please update by clicking on <b>Update Master Data</b> button in Training tab in ChangAI Settings.<br>Check Quick Start Guide Here 👇:\n"
"<a href='{1}' target='_blank' rel='noopener noreferrer' style='color: #1e90ff;'>Click here</a>").format(res.get("days"), CHANGAI_GUIDE_LINK))
if not res.get("update") and res.get("days")>0:
frappe.throw(_("Your master data is {0} days old. Bcs of that result may not come accurate. For better accuracy please update by clicking on <b>Update Master Data</b> button in Training tab in ChangAI Settings.<br>Check Quick Start Guide Here 👇:\n"
"<a href='{1}' target='_blank' rel='noopener noreferrer' style='color: #1e90ff;'>Click here</a>").format(res.get("days"), CHANGAI_GUIDE_LINK))
frappe.throw(_(
"Master Data does not exist. Because of this, results may not be accurate. "
"For better accuracy, please open "
"<a href='{0}' target='_blank' rel='noopener noreferrer'>ChangAI Settings</a> "
"and click on the <b>Update Master Data</b> button in the Training tab.<br><br>"
"Check Quick Start Guide Here 👇:<br>"
"<a href='{1}' target='_blank' rel='noopener noreferrer' style='color: #1e90ff;'>Click here</a>"
).format(settingsUrl, CHANGAI_GUIDE_LINK))

if not res.get("update_status") and res.get("days", 0) > 0:
frappe.throw(_(
"Your master data is {0} days old. "
"Because of this, results may not be accurate. "
"For better accuracy, please open "
"<a href='{1}' target='_blank' rel='noopener noreferrer'>ChangAI Settings</a> "
"and click on the <b>Update Master Data</b> button in the Training tab.<br><br>"
"Check Quick Start Guide Here 👇:<br>"
"<a href='{2}' target='_blank' rel='noopener noreferrer' style='color: #1e90ff;'>Click here</a>"
).format(res.get("days"), settingsUrl, CHANGAI_GUIDE_LINK))

out = call_entity_retriever(q)
return {
**state,
Expand Down
Loading
Loading