Skip to content

Commit

Permalink
noun clustering near complete
Browse files Browse the repository at this point in the history
  • Loading branch information
jprovidence committed Jan 17, 2012
1 parent ae7557c commit 56ba051
Show file tree
Hide file tree
Showing 5 changed files with 818 additions and 59 deletions.
37 changes: 32 additions & 5 deletions src/Feed.hs
Expand Up @@ -5,11 +5,14 @@ module Feed (
, find
, transact
, wrap
, allFeeds
, updateFeed
, overflow
, numEntries
, lastUpdate
, entries
, entryFromId
, Feed(Feed, _id, _url)
) where


Expand All @@ -24,6 +27,7 @@ import Database.HDBC
import Database.HDBC.PostgreSQL
import Parse
import Entry
import Viterbi



Expand Down Expand Up @@ -90,12 +94,13 @@ assimilateFeed :: String -> IO ()
assimilateFeed url =
urlEntries url >>= \es ->
case es of
Nothing -> putStrLn "_ticket: No entries found"
Nothing -> putStrLn ">> No entries found."
Just xs -> do
con <- connectPostgreSQL connStr
saveMstr con xs url
mapM_ (save con url) xs
disconnect con
putStrLn ">> Feed assimilated."


----------------------------------------------------------------------------------------------------
Expand All @@ -106,7 +111,7 @@ cyclicalUpdate :: IO ()
cyclicalUpdate = do
fds <- allFeeds
case fds of
Nothing -> putStrLn "> No feeds detected. Ensure database is initialized and functional"
Nothing -> putStrLn ">> No feeds detected. Ensure database is initialized and functional"
Just xs -> mapM_ (updateFeed . _url) xs >> threadDelay 21600000000 >> cyclicalUpdate


Expand Down Expand Up @@ -167,10 +172,10 @@ saveMstr con es url = do

save :: Connection -> String -> Entry -> IO (Integer)
save con url e = do
let a = "'" ++ (description e) ++ "'"
let a = "'" ++ (L.filter noQuot $ description e) ++ "'"
b = "'" ++ (unsafePerformIO (feedFromUrl url >>= return . show . _id . fromJust)) ++ "'"
c = "'" ++ (date e) ++ "'"
d = "'" ++ (title e) ++ "'"
d = "'" ++ (L.filter noQuot $ title e) ++ "'"
f = "'" ++ (link e) ++ "'"
z = concat $ L.intersperse ", " [a, b, c, d, f]
sql <- return $ "insert into entries (content, feed_id, date, title, link) values (" ++ z ++ ");"
Expand Down Expand Up @@ -276,7 +281,7 @@ lastUpdate i = wrap (lastUpdate' i)

----------------------------------------------------------------------------------------------------

-- -- given a feed id, return all associated entries field
-- given a feed id, return all associated entries field

entries :: Int -> IO [Entry]
entries i = wrap (entries' i)
Expand All @@ -290,3 +295,25 @@ entries i = wrap (entries' i)


----------------------------------------------------------------------------------------------------

-- find an entry by its id

entryFromId :: Int -> IO Entry
entryFromId id = wrap (efID id) >>= return . flip (!!) 0

where efID :: Int -> Connection -> IO [Entry]
efID id con = do
let sel = "select * from entries where id=" ++ (show id) ++ ";"
f x = fromJust . fromSql . (flip (!!) x)
rows <- quickQuery' con sel []
return $ map (\e -> Entry {description=(f 1 e), date=(f 3 e), title=(f 4 e), link=(f 5 e)}) rows


----------------------------------------------------------------------------------------------------

-- remove single quotes

noQuot :: Char -> Bool
noQuot c = not (c == '\'' || c == '`')


58 changes: 58 additions & 0 deletions src/Main.hs
Expand Up @@ -38,6 +38,10 @@ processUsrCommands = do
"-fi" -> communicate
"-c" -> putStrLn ""
"-t" -> runAllTests
"-tag" -> runTagger
"-addFeed" -> addFeeds
"-precluster" -> precluster
"-nvis" -> runNounVisualization
"-testTextSim" -> testTextSim
"-testViterbi" -> testViterbi
"-testNouns" -> testNouns
Expand All @@ -56,11 +60,65 @@ printHelp = do
putStrLn ">> -fi : Run this instance as a function server"
putStrLn ">> -c : Run this instance as a corpus manager"
putStrLn ">> -t : Run all test functions"
putStrLn ">> -tag : Tag a document"
putStrLn ">> -addFeed : Add feeds to the index manually"
putStrLn ">> -precluster : Perform some pre-cluster processing"
putStrLn ">> -nvis : Run noun visualizations"
putStrLn ">> -testViterbi : Run tests of the viterbi algorithm"
putStrLn ">> -testTextSim : Run tests of text similarity functions"
putStrLn ">> -testNouns : Run tests of the noun clustering functions"



runTagger :: IO ()
runTagger = do
putStrLn ">> Training Viterbi, please wait..."
vit <- trainVit
putStrLn ">> Training Complete."
doTag vit

doTag :: Vit -> IO ()
doTag vit = do
putStrLn ">> Enter the id of the document you would like to tag."
input <- getLine
putStrLn (">> Will tag document with id " ++ input ++ ".")
entry <- entryFromId (read input)
res <- tag vit $ B.pack $ description entry
putStrLn ">> Tagged Document: \n\n"
putStrLn (show res)
putStrLn "\n\n>> Tag Another? y/n"
input <- getLine
case (input == "y") || (input == "Y") of
True -> doTag vit
False -> return ()


addFeeds :: IO ()
addFeeds = do
putStrLn ">> Please enter the feed URL"
input <- getLine
putStrLn (">> Adding feed data at " ++ input ++ ", please wait...")
assimilateFeed input
putStrLn ">> Would you like to add another? y/n"
input <- getLine
case input of
"y" -> addFeeds
"Y" -> addFeeds
_ -> return ()


precluster :: IO ()
precluster = do
putStrLn ">> -c : Entire database to graph"
input <- getLine
case input of
"-c" -> convertDatabase
_ -> return ()

runNounVisualization :: IO ()
runNounVisualization = runCluster "user"


runAllTests :: IO ()
runAllTests = do
testTextSim
Expand Down

0 comments on commit 56ba051

Please sign in to comment.