Permalink
Browse files

initial commit of pre-submission file set

  • Loading branch information...
0 parents commit 2dec586f43c7e79e31800e113d187d74c732998b Russell Poldrack committed Feb 14, 2012
Showing with 77,325 additions and 0 deletions.
  1. +51 −0 CCA/CCA_solution_6mm_nonneg.pkl
  2. +29 −0 CCA/cors.txt
  3. +483 −0 CCA/topic_list_6mm_nonneg.txt
  4. +130 −0 CCA/uvecs.txt
  5. +29 −0 CCA/vvecs.txt
  6. +1 −0 clustering/disorder_abbrevs.txt
  7. +9,181 −0 clustering/disorder_voxel_data.txt
  8. +6 −0 code/1.1_merge_peakfiles.sh
  9. +29 −0 code/1.2_extract_nidag_text.py
  10. +16 −0 code/10_mk_8fold_cogatlas_mallet_data.py
  11. +16 −0 code/11_mk_8fold_disorders_mallet_data.py
  12. +27 −0 code/12_mk_8fold_cogatlas_topic_models.py
  13. +500 −0 code/13.1_run_all_nfold_scripts.sh
  14. +250 −0 code/13.2_run_all_disorder_nfold_scripts.sh
  15. +27 −0 code/13_mk_8fold_disorders_topic_models.py
  16. +38 −0 code/14_get_best_topic_likelihood.py
  17. +27 −0 code/15.1_get_disorders_dimensionality.py
  18. +40 −0 code/15.2_run_all_disorders_models.sh
  19. +55 −0 code/15.3_get_best_disorder_dimensionality.py
  20. +32 −0 code/15_run_topic_models.py
  21. +64 −0 code/16_mk_cogatlas_loadingdata.py
  22. +64 −0 code/17_mk_disorders_loadingdata.py
  23. +29 −0 code/18.1.1_mk_disorder29_maps.sh
  24. +60 −0 code/18.1.1_run_all_disorder_chisq.sh
  25. +200 −0 code/18.1_mk_all_chisq_maps.sh
  26. +14 −0 code/18.2_mk_6mm_chisq_maps.sh
  27. +8 −0 code/18_mk_all_chisq_maps.py
  28. +17 −0 code/19_mk_slice_images_cogatlas.py
  29. +43 −0 code/1_db_foci_to_image.py
  30. +17 −0 code/20_mk_slice_images_disorders.py
  31. +57 −0 code/22_mk_latexreport_cogatlas.py
  32. +57 −0 code/23_mk_latexreport_disorders.py
  33. +208 −0 code/24_run_CCA_nonneg.py
  34. +55 −0 code/2_get_cogat_concepts.py
  35. +93 −0 code/3_create_disorder_list.py
  36. +68 −0 code/4_get_disorders_NIF.py
  37. +30 −0 code/5_mk_cogatlas_docs.py
  38. +36 −0 code/6_mk_disorders_docs.py
  39. +27 −0 code/7_mk_pickled_data.py
  40. +46,464 −0 code/8.1_mk_cogatlas_8fold_data.sh
  41. +47 −0 code/8_mk_8fold_cogatlas.py
  42. +18,616 −0 code/9.1_mk_8fold_disorders_data.sh
  43. +46 −0 code/9_mk_8fold_disorders.py
  44. +21 −0 code/run_mallet_cogatlas_130.sh
  45. +17 −0 code/run_mallet_stub.sh

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,29 @@
+9.247339644746876974e-01
+7.727696397052865596e-01
+8.676441054002390629e-01
+8.226652240816667572e-01
+6.766705317222657667e-01
+7.116647273438641008e-01
+4.886742092165417550e-01
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+7.470234444264010065e-01
+3.711856082669939272e-01
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+7.747436571981390907e-01
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00
+0.000000000000000000e+00

Large diffs are not rendered by default.

Oops, something went wrong.

Large diffs are not rendered by default.

Oops, something went wrong.

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1 @@
+MD_PDSZ_DASZ_PSYSZ_PARPSY_PAROCD_DASZ_TICBPD_SZOBE_COCSZ_SZTYAMN_ALZAPHDA_GAMDEP_MDDDA_CDADDSZALC_ALXSZ_PSYAUT_ASPADD_CDDA_ADGAM_DAAUT_SLIDLX_SLIANX_PANPHO_EATAUT_ASPBPD_DA

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,6 @@
+# merge all images
+fslmerge -t /scratch/01329/poldrack/textmining/paper/data_preparation/all_peakimages.nii.gz /scratch/01329/poldrack/textmining/paper/data_preparation/peakimages/*.nii.gz
+# compute mask, excluding voxels that are not active on at
+# least 1% of papers
+fslmaths /scratch/01329/poldrack/textmining/paper/data_preparation/all_peakimages.nii.gz -Tmean -thr 0.01 -bin /scratch/01329/poldrack/textmining/paper/data_preparation/all_peakimages_mask
+
@@ -0,0 +1,29 @@
+# extract text from nidag database to perform topic modeling
+
+import MySQLdb
+
+# connect to nidag database
+conn = MySQLdb.connect (host = "localhost",
+ user = "articles_foci",
+ passwd = "",
+ db = "articles_foci")
+cursor = conn.cursor()
+
+query='select * from articles where active=1;'
+active_articles_query=cursor.execute(query)
+active_articles_result=cursor.fetchall()
+print 'found %d articles'%len(active_articles_result)
+for article in active_articles_result:
+ query='select text from article_texts where article_id=%d'%article[0]
+ fulltext_query=cursor.execute(query)
+ fulltext_result=cursor.fetchall()
+ f=open('/data1/poldracklab/textmining/fulltext/nidag_%05d.txt'%article[0],'w')
+ f.write(fulltext_result[0][0])
+ f.close()
+
+
+
+
+
+cursor.close ()
+conn.close ()
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+"""
+make data for 8-fold topic modeling test
+"""
+import numpy as N
+
+mallet_bin='/scratch/01329/poldrack/textmining/mallet-2.0.6/bin/mallet'
+basedir='/scratch/01329/poldrack/textmining/paper/cogatlas_8fold/'
+
+for fold in range(1,9):
+ traindir=basedir+'fold%d_train'%fold
+ testdir=basedir+'fold%d_test'%fold
+ cmd="%s import-dir --input %s --output %s/fold%d_train_data.mallet --keep-sequence --token-regex '[\p{L}\p{N}_]+|[\p{P}]+'"%(mallet_bin,traindir,basedir,fold)
+ print cmd
+ cmd="%s import-dir --input %s --output %s/fold%d_test_data.mallet --keep-sequence --token-regex '[\p{L}\p{N}_]+|[\p{P}]+'"%(mallet_bin,testdir,basedir,fold)
+ print cmd
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+"""
+make data for 8-fold topic modeling test
+"""
+import numpy as N
+
+mallet_bin='/scratch/01329/poldrack/textmining/mallet-2.0.6/bin/mallet'
+basedir='/scratch/01329/poldrack/textmining/paper/disorders_8fold/'
+
+for fold in range(1,9):
+ traindir=basedir+'fold%d_train'%fold
+ testdir=basedir+'fold%d_test'%fold
+ cmd="%s import-dir --input %s --output %s/fold%d_train_data.mallet --keep-sequence --token-regex '[\p{L}\p{N}_]+|[\p{P}]+'"%(mallet_bin,traindir,basedir,fold)
+ print cmd
+ cmd="%s import-dir --input %s --output %s/fold%d_test_data.mallet --keep-sequence --token-regex '[\p{L}\p{N}_]+|[\p{P}]+'"%(mallet_bin,testdir,basedir,fold)
+ print cmd
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+"""
+make scripts to run all topic models
+"""
+import numpy as N
+import os
+
+mallet_bin='/scratch/01329/poldrack/textmining/mallet-2.0.6/bin/mallet'
+datadir='/scratch/01329/poldrack/textmining/paper/cogatlas_8fold/'
+outputdir='/scratch/01329/poldrack/textmining/paper/topic_modeling/cogatlas/8fold/'
+
+ntopics=N.arange(10,260,10)
+
+for t in ntopics:
+ for fold in range(1,11):
+ outfile=open('nfold_scripts/run_mallet_cogatlas_fold%d_%d.sh'%(fold,t),'w')
+ a=50.0/t
+ topicdir='%sfold%d_%d'%(outputdir,fold,t)
+ try:
+ os.mkdir(topicdir)
+ except:
+ pass
+ cmd=mallet_bin+' train-topics --input %s/fold%d_train_data.mallet --num-topics %d --num-top-words 31 --output-topic-keys %s/topic_keys.txt --output-doc-topics %s/doc_topics.txt --topic-word-weights-file %s/word_weights.txt --word-topic-counts-file %s/word_topic_counts.txt --num-iterations 5000 --output-model %s/saved_model.mallet --evaluator-filename %s/evaluator.mallet --alpha %f --beta 0.1'%(datadir,fold,t,topicdir,topicdir,topicdir,topicdir,topicdir,topicdir,a)
+ outfile.write(cmd+'\n')
+ cmd=mallet_bin+' evaluate-topics --evaluator %s/evaluator.mallet --input %s/fold%d_test_data.mallet --output-prob %s/prob.txt --output-doc-probs %s/docprob.txt'%(topicdir,datadir,fold,topicdir,topicdir)
+ outfile.write(cmd+'\n')
+ outfile.close()
Oops, something went wrong.

0 comments on commit 2dec586

Please sign in to comment.