GitHub - quadrismegistus/RpyD2: A python class modeled on Rpy2's DataFrame, providing convenient access to common statistical procedures in R.

Branches Tags

Name		Name	Last commit message	Last commit date
Latest commit History 38 Commits
._rpyd2.py		._rpyd2.py
README		README
rpyd2.py		rpyd2.py
rpyd2.pyc		rpyd2.pyc

Repository files navigation

Help on class RpyD2 in module rpyd2:

class RpyD2
 |  Methods defined here:
 |  
 |  __init__(self, input, **kwargs)
 |      input is your data, which can be in the following forms:
 |        1. LD (List of Dictionaries)
 |            [ {'hair':'blonde','eyes':'blue'}, {'hair':'black','eyes':'green'}, ... ]
 |        2. DL (Dictionary of Lists)
 |            { 'hair':['blonde','blue'], 'eyes':['blue','green] }
 |        3. Rpy2 DataFrame
 |        4. Another RpyD2
 |      
 |      
 |      Keyword arguments will override the following default options:
 |       self.cols=None                  # specify which columns to build from
 |       self.rownamecol=None            # specify a column name from which row names should be used
 |       self.allcols=False              # if False, columns limited to those shared among all rows;
 |                                         if True, all columns are chosen;
 |                                         if a positive integer N, columns limited to the 'top' N columns,
 |                                            where columns are compared numerically by:
 |       self.trimbyVariance=True             # if trimbyVariance==True, sum of absolute value of Z-scores across column
 |                                              otherwise, sum of scores across column
 |      
 |       self.rank=True                  # if rank==True, append 'r'+ranknum to the top N columns
 |       self.zero=0.0                   # if allcols is True or an integer, what should empty cells be populated with?
 |       self.z=False                    # if True, Z-score all quantitative columns
 |       self.factor=True                # if True, treat strings as factors
 |       self.onlyQuant=False            # if True, only build quantitative columns
 |       self.onlyCat=False              # if True, only build categorical (string) columns
 |      
 |               self.toprint=True                               # if True, print R objects using R's summary() before returning them
 |  
 |  __repr__(self)
 |  
 |  __str__(self)
 |  
 |  addCol(self, name, vals)
 |  
 |  aov(self, formula, tukey=False, plot=False, fn=None, w=1100, h=800)
 |  
 |  boxplot(self, fn=None, x=None, y=None, main=None, xlab=None, ylab=None, ggplot=False, w=1100, h=800)
 |  
 |  ca(self, fn, cols=[])
 |  
 |  chisq(self, cols=[])
 |  
 |  cloud(self, fn=None, x='x', y='y', z='z', title=False, w=800, h=800)
 |  
 |  col(self, colname)
 |      Return column 'colname', where colname can be either a string name or an integer position (starting at 0).
 |  
 |  cor(self, returnType='rpyd2')
 |  
 |  cordist(self)
 |  
 |  corrgram(self, fn=None, w=1600, h=1600)
 |      API to corrgram package:
 |  
 |  csv(self, fn='csv.txt', sep='\t')
 |  
 |  dist(self, z=False)
 |  
 |  distro(self, fn=None)
 |  
 |  glm(self, ykey='y', family='gaussian', anovaTest='Chisq')
 |      API to R's glm:
 |              http://web.njit.edu/all_topics/Prog_Lang_Docs/html/library/base/html/glm.html
 |      
 |      Family can be:
 |              [ref: http://web.njit.edu/all_topics/Prog_Lang_Docs/html/library/base/html/family.html]
 |  
 |  group(self, x=None, ys=[], yname='y', ytype='y_type')
 |  
 |  hclust(self, cor=False, z=True, plot=True, fn=None, w=1100, h=900)
 |  
 |  kclust(self, k=4, z=True, plot=True, fn=None, w=1100, h=800)
 |      Currently set to return self.pam(k) for robust k-means clustering.
 |  
 |  kmeans(self, k=4)
 |      API to R's kmeans clustering function: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/kmeans.html
 |  
 |  lm(self, formula, toprint=True)
 |  
 |  loess(self, formula, toprint=True)
 |  
 |  mclust(self, z=True, fn='mclust.png', w=1100, h=900)
 |  
 |  mean_stdev(self, cols=[], rows=[])
 |  
 |  pam(self, k=4, z=True)
 |      API to R's pam function: 
 |              http://stat.ethz.ch/R-manual/R-patched/library/cluster/html/pam.html
 |      A more robust version of k-means clustering, 'around medoids.'
 |  
 |  pca(self, fn='pca.png', col=None, w=1200, h=1200)
 |  
 |  plot(self, fn=None, x=None, y=None, col=None, group=None, w=1100, h=800, size=2, smooth=False, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False, bar=False, xlab_size=14, ylab_size=14)
 |  
 |  plot3d(self, fn=None, x='x', y='y', z='z', title=False, w=800, h=800)
 |  
 |  plots(self, x=None, y=None, n=1)
 |  
 |  points_3d(self, fn=None, x='x', y='y', z='z', title=False, w=800, h=800)
 |  
 |  polyfit(self, x, y, deg=3, addCol=True, addDer=True)
 |  
 |  polyfits(self, x, y, degs, addCol=True, fn=None, onlyBest=False)
 |  
 |  polyplot(self, terms)
 |  
 |  predict(self, y='', z=True, fn='predict.png', w=1100, h=800)
 |      API to pamr.train and pamr.predict:
 |      http://www-stat.stanford.edu/~tibs/PAM/Rdist/pamr.train.html
 |      http://rgm2.lab.nig.ac.jp/RGM2/func.php?rd_id=pamr:pamr.predict
 |  
 |  pvclust(self, z=True, fn='pvclust.png', w=1100, h=900)
 |      API to R package pvclust: http://cran.r-project.org/web/packages/pvclust/index.html
 |  
 |  q(self, z=False)
 |      Return a version of self of only quantitative columns
 |  
 |  rankcols(self, byVariance=False, returnSums=False)
 |  
 |  removeCol(self, name)
 |  
 |  row(self, rowname)
 |      Return row 'rowname', where rowname can be either a string name or an integer position (starting at 0).
 |  
 |  rows_where(self, qdict)
 |  
 |  save(self, fn=None)
 |  
 |  sub(self, cols=[], rows=[])
 |      Return an RpyD2 from self, with only those rows and/or columns as specified.
 |  
 |  sub_where(self, rows={})
 |  
 |  summary(self, obj=None)
 |  
 |  t(self)
 |  
 |  toDL(self, cols=None, rows=None, rownamecol=False)
 |      Return a dictionary of lists representation of self:
 |      {'col0':[row0val,row1val,...],
 |      'col1':[row1val,row2val,...],
 |      ...}
 |      
 |      If rows is a non-empty list, return only these rows.
 |      If cols is a non-empty list, return only these cols.
 |      If both are non-empty, return only these rows and only these cols.
 |  
 |  toVectors(self, xcol='x', ycol='y')
 |  
 |  treepredict(self, y='', fn='treepredict.png', w=1100, h=800)
 |  
 |  vioplot(self, fn=None, x=None, y=None, w=1100, h=800)
 |      API to the 'vioplot' R package: http://cran.r-project.org/web/packages/vioplot/index.html
 |  
 |  xtabs(self, cols=[])