Skip to content

Commit

Permalink
more progress on the new SMILES parser:
Browse files Browse the repository at this point in the history
 * smiles3 picks up atom from CL, not chem

 * smiles3 shadows chem:parse-smiles-string

 * add orientation slot to smiles-atom

 * make the hydrogen count itself optional, not the H (well,
   <hydrogen-count> is still an optional choice, but not the H itself
   in <hydrogen-count>, which was matching empty input

 * use backtracking forms in charge -- not sure if this is necssary,
   but a more principled approach to figure out where we don't need to
   backtrack is called for in general

 * add <orientation> and <bracket-modifier>

 * use new bracket modifiers and do some validation on them for
   charge, hydrogen count and orientation

 * add make-load-forms -- revisit this!
  • Loading branch information
slyrus committed Feb 2, 2011
1 parent 41c2707 commit 1d91b69
Showing 1 changed file with 75 additions and 31 deletions.
106 changes: 75 additions & 31 deletions smiles3.lisp
Expand Up @@ -4,6 +4,8 @@

(cl:defpackage #:smiles3
(:use #:cl #:parser-combinators)
(:shadowing-import-from #:cl #:atom)
(:shadow #:parse-smiles-string)
(:import-from #:chemicl
#:molecule
#:get-element
Expand All @@ -23,7 +25,8 @@
((aromatic :initarg :aromatic :accessor aromatic :initform nil)
(explicit-hydrogen-count :initarg :explicit-hydrogen-count
:accessor explicit-hydrogen-count
:initform 0)))
:initform 0)
(orientation :initarg :orientation :accessor orientation :initform nil)))

(defun make-smiles-atom (element-identifier &rest args)
(apply #'make-instance
Expand Down Expand Up @@ -59,27 +62,28 @@
'("b" "c" "n" "o" "s" "p")))))

;;;
;;; Bracketed atoms e.g. [Na]
;;; Bracketed atoms
;;; e.g. [H] [Na] [13C] [O-]
;;; (not yet working: [C@] [C@@]
(defun <isotope> ()
(nat?))

(defun <hydrogen-count> ()
(opt?
(named-seq*
#\H
(<- num (nat?))
(or num 1))))
(named-seq?
#\H
(<- num (opt? (nat?)))
(or num 1)))

(defun <charge> ()
(choice1 (named-seq*
(choice1 (named-seq?
(char? #\+)
(<- charge (opt? (choices1 (named-seq* (char? #\+) 2)
(nat*))))
(<- charge (opt? (choices1 (named-seq? (char? #\+) 2)
(nat?))))
(or charge 1))
(named-seq*
(named-seq?
(char? #\-)
(<- charge (opt? (choices1 (named-seq* (char? #\-) 2)
(nat*))))
(<- charge (opt? (choices1 (named-seq? (char? #\-) 2)
(nat?))))
(- (or charge 1)))))

(defun <bracket-aliphatic-atom-symbol> ()
Expand All @@ -95,6 +99,18 @@
#'<aromatic-atom-matcher>
'("c" "n" "o" "p" "s"))))

(defun <orientation> ()
(choices1 (chook? :clockwise "@@")
(chook? :counterclockwise "@")))

(defun <bracket-modifier> ()
(choices1 (named-seq? (<- hydrogens (<hydrogen-count>))
(list :hydrogen-count hydrogens))
(named-seq? (<- charge (<charge>))
(list :charge charge))
(named-seq? (<- orientation (<orientation>))
(list :orientation orientation))))

(defun <bracket-atom> ()
(named-seq? #\[
(<- isotope-number (opt? (nat?)))
Expand All @@ -105,22 +121,35 @@
(seq-list?
(<bracket-aromatic-atom-symbol>)
(result t))))
(<- hydrogen-count (<hydrogen-count>))
(<- charge (opt? (<charge>)))
(<- mods (validate?
(many? (<bracket-modifier>))
(lambda (x)
(if (<= (apply #'max
(map 'list
#'(lambda (f) (funcall f x :key #'car))
(map 'list
#'(lambda (y) (alexandria:curry #'count y))
'(:hydrogen-count :charge :orientation))))
1)
t
(error "Bracket modifier appeared more than once!")))))
#\]
(destructuring-bind (element aromaticity)
atom-aromaticity
(let* ((isotope
(when isotope-number
(chem::get-isotope element isotope-number)))
(atom (apply #'make-smiles-atom element
(append
(when isotope `(:isotope ,isotope))
(when charge `(:charge ,charge))
(when aromaticity `(:aromatic ,aromaticity))))))
(when hydrogen-count
(print (list 'hydrogens hydrogen-count)))
atom))))
(destructuring-bind (&key charge hydrogen-count orientation)
(apply #'append mods)
(let* ((isotope
(when isotope-number
(chem::get-isotope element isotope-number)))
(atom (apply #'make-smiles-atom element
(append
(when isotope `(:isotope ,isotope))
(when charge `(:charge ,charge))
(when aromaticity `(:aromatic ,aromaticity))
(when hydrogen-count
`(:explicit-hydrogen-count ,hydrogen-count))
(when orientation `(:orientation ,orientation))))))
atom)))))

;;;
;;; Atoms
Expand Down Expand Up @@ -254,6 +283,12 @@
(format stream "Error parsing SMILES string ~S"
(smiles-string condition)))))

(defun smiles-reader-error (stream control &rest args)
(error 'reader-error
:stream stream
:format-control control
:format-arguments args))

(defun parse-smiles-string (str)
(let ((parsed (parse-string* (<chain>) str :complete t)))
(if parsed
Expand Down Expand Up @@ -304,11 +339,6 @@
;;;
;;; Reader macro support for molecule literals
;;; e.g.: {CC(O)C}
(defun smiles-reader-error (stream control &rest args)
(error 'reader-error
:stream stream
:format-control control
:format-arguments args))

(eval-when (:compile-toplevel :load-toplevel :execute)
(named-readtables:defreadtable smiles-reader
Expand All @@ -322,3 +352,17 @@
(declare (ignore char))
(smiles-reader-error stream "unmatched curly brace")))))


;;; We need these to allow the reader macro to work in compiled files (literal atoms)
(defmethod make-load-form ((self chem:element) &optional environment)
(make-load-form-saving-slots self :environment environment))

(defmethod make-load-form ((self chem:isotope) &optional environment)
(make-load-form-saving-slots self :environment environment))

(defmethod make-load-form ((self chem:atom) &optional environment)
(make-load-form-saving-slots self :environment environment))

(defmethod make-load-form ((self chem:molecule) &optional environment)
(make-load-form-saving-slots self :environment environment))

0 comments on commit 1d91b69

Please sign in to comment.