Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

added python crm wrapper

  • Loading branch information...
commit 2e3411b93ea1ee7464c22fc76a22d753ea2b121c 1 parent 579f980
Sam Deane authored

Showing 1 changed file with 142 additions and 0 deletions. Show diff stats Hide diff stats

  1. +142 0 python/crm.py
142 python/crm.py
... ... @@ -0,0 +1,142 @@
  1 +#!/usr/bin/python
  2 +
  3 +"""
  4 +$Id: crm.py 158 2005-06-07 17:45:47Z sam $
  5 +
  6 +Python wrapper classes for the CRM114 Discriminator (http://crm114.sourceforge.net/).
  7 +
  8 +Requires the crm command to be installed and in your command path.
  9 +
  10 +The latest version of this file can be obtained from the Elegant Chaos subversion server (user=guest, pass=guest) at:
  11 + $URL: http://source.elegantchaos.com/projects/com/elegantchaos/libraries/python/crm.py $
  12 +
  13 +This module provides a very simplified interface to crm114. It does not attempt to expose all of crm114's power, instead it
  14 +tries to hide almost all of the gory details.
  15 +
  16 +To use the module, create an instance of the Classifier class, giving it a path (where to store the data files), and a list
  17 +of category strings (these are the "labels" to classify the text with).
  18 +
  19 +e.g:
  20 + c = Classifier("/path/to/my/data", ["good", "bad"])
  21 +
  22 +To teach the classifier object about some text, call the learn method passing in a category (on of the ones that you provided originally),
  23 +and the text.
  24 +
  25 +e.g:
  26 + c.learn("good", "some good text")
  27 + c.learn("bad", "some bad text")
  28 +
  29 +To find out what the classifier things about some text, call the classify method passing in the text. The result of this
  30 +method is a pair - the first item being the category best matching the text, and the second item being the probability of the match.
  31 +
  32 +e.g:
  33 + (classification, probability) = c.classify("some text")
  34 +
  35 +TODO: use proper path separator variable in the regular expression instead of assuming that it's a slash
  36 +
  37 +"""
  38 +
  39 +__version__ = "1.0.0a1"
  40 +
  41 +__license__ = """
  42 +Copyright (C) 2005 Sam Deane.
  43 +
  44 +This program is free software; you can redistribute it and/or
  45 +modify it under the terms of the GNU General Public License
  46 +as published by the Free Software Foundation; either version 2
  47 +of the License, or (at your option) any later version.
  48 +
  49 +This program is distributed in the hope that it will be useful,
  50 +but WITHOUT ANY WARRANTY; without even the implied warranty of
  51 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  52 +GNU General Public License for more details.
  53 +
  54 +You should have received a copy of the GNU General Public License
  55 +along with this program; if not, write to the Free Software
  56 +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  57 +"""
  58 +
  59 +import os;
  60 +import string;
  61 +
  62 +#constants
  63 +
  64 +kCrmPath = "crm"
  65 +
  66 +kClassificationType = "<osb unique microgroom>"
  67 +kClassificationExtension = ".css"
  68 +
  69 +kLearnCommand = " '-{ learn %s ( %s ) }'"
  70 +kClassifyCommand = " '-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \(%s\/([[:graph:]]+)\\%s\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'"
  71 +
  72 +
  73 +# wrapper for crm114
  74 +class Classifier:
  75 +
  76 + def __init__( self, path, categories = [] ):
  77 + self.categories = categories
  78 + self.path = path
  79 + self.makeFiles()
  80 +
  81 + # learn the classifier what category some new text is in
  82 + def learn( self, category, text ):
  83 + command = kCrmPath + ( kLearnCommand % ( kClassificationType, os.path.join( self.path, category + kClassificationExtension ) ) )
  84 +
  85 + pipe = os.popen( command, 'w' )
  86 + pipe.write( text )
  87 + pipe.close()
  88 +
  89 + # ask the classifier what category best matches some text
  90 + def classify( self, text ):
  91 + path = string.replace(self.path, "/", "\\/") # need to escape path separator for the regexp matching
  92 + command = kCrmPath + ( kClassifyCommand % (kClassificationType, self.getFileListString(), path, kClassificationExtension) )
  93 + (fin, fout) = os.popen2( command )
  94 + fin.write( text )
  95 + fin.close()
  96 + list = string.split(fout.readline())
  97 + fout.close()
  98 + if list == None:
  99 + return ("", 0.0)
  100 + else:
  101 + category = list[0]
  102 + probability = float(list[1])
  103 + return (category, probability)
  104 +
  105 + # ensure that data files exist, by calling learn with an empty string
  106 + def makeFiles( self ):
  107 + # make directory if necessary
  108 + if not os.path.exists( self.path ):
  109 + os.mkdir( self.path )
  110 +
  111 + # make category files
  112 + for category in self.categories:
  113 + self.learn( category, "" )
  114 +
  115 +
  116 + # return a list of classification files
  117 + def getFileList( self ):
  118 +
  119 + # internal method to build a file path given a category
  120 + def getFilePath( file ):
  121 + return os.path.join( self.path, file + kClassificationExtension )
  122 +
  123 + # return list of all category paths
  124 + return map( getFilePath, self.categories )
  125 +
  126 +
  127 + # return a list of classification files as a string
  128 + def getFileListString( self ):
  129 + return string.join( self.getFileList(), " " )
  130 +
  131 + # perform some self tests
  132 + def test( self ):
  133 + print self.getFileList()
  134 + self.learn( "good", "this is a test" )
  135 + self.learn( "bad", "this is very bad" )
  136 + print "class was: %s, prob was:%f" % ( self.classify( "this is a test" ) )
  137 +
  138 +
  139 +if __name__ == "__main__":
  140 + # perform a simple test
  141 + c = Classifier( "test/data", [ "good", "bad" ] )
  142 + c.test()

0 comments on commit 2e3411b

Please sign in to comment.
Something went wrong with that request. Please try again.