Skip to content
Browse files

Affyio: updated C library module

  • Loading branch information...
1 parent f282dd2 commit 0c76931b1ffb6cc9aee789c7fee7899f66c527d4 @pjotrp committed Jan 25, 2010
View
6 src/clibs/affyio/DESCRIPTION 100755 → 100644
@@ -1,11 +1,11 @@
Package: affyio
-Version: 1.8.0
+Version: 1.15.1
Title: Tools for parsing Affymetrix data files
Author: Benjamin Milo Bolstad <bmb@bmbolstad.com>
Maintainer: Benjamin Milo Bolstad <bmb@bmbolstad.com>
Depends: R (>= 2.6.0), methods
Description: Routines for parsing Affymetrix data files based upon file format information. Primary focus is on accessing the CEL and CDF file formats.
-License: LGPL version 2 or newer
+License: LGPL (>= 2)
biocViews: Microarray, DataImport, Infrastructure
LazyLoad: yes
-Packaged: Wed Apr 30 01:48:07 2008; biocbuild
+Packaged: 2009-11-10 20:54:53 UTC; biocbuild
View
0 src/clibs/affyio/HISTORY 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/LICENSE 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/NAMESPACE 100755 → 100644
File mode changed.
View
4 src/clibs/affyio/R/check.cdf.type.R 100755 → 100644
@@ -1,7 +1,7 @@
###
### File: check.cdf.type.R
###
-### AIM: return a string giving the file format. Either text, xda or unknown
+### Aim: return a string giving the file format. Either text, xda or unknown
### in the case that file format is not known.
###
@@ -15,6 +15,4 @@ check.cdf.type <- function(filename){
} else {
return("unknown")
}
-
-
}
View
20 src/clibs/affyio/R/read.cdffile.list.R 100755 → 100644
@@ -1,14 +1,11 @@
-####
-####
-#### file: read.cdffile.list.R
-####
-#### aim: reads full CDF file into R list structure.
-####
-#### History
-#### Dec 1, 2005 - Initial version
-
-
-
+###
+### File: read.cdffile.list.R
+###
+### Aim: reads full CDF file into R list structure.
+###
+### History
+### Dec 1, 2005 - Initial version
+###
read.cdffile.list <- function (filename, cdf.path = getwd()){
@@ -24,4 +21,3 @@ read.cdffile.list <- function (filename, cdf.path = getwd()){
stop(paste("File format for",filename,"not recognized."))
}
}
-
View
6 src/clibs/affyio/R/read.celfile.R 100755 → 100644
@@ -1,9 +1,9 @@
###
-### file: read.celfile.R
+### File: read.celfile.R
###
-### aim: read entire contents of a single given specified CEL file into
+### Aim: read entire contents of a single given specified CEL file into
### an R data structure.
-
+###
read.celfile <- function(filename,intensity.means.only=FALSE){
View
29 src/clibs/affyio/R/read.celfile.header.R 100755 → 100644
@@ -1,4 +1,9 @@
-
+###
+### File: read.celfile.header.R
+###
+### Aim: read header contents of a given specified CEL file into
+### an R data structure.
+###
read.celfile.header <- function(filename,info=c("basic","full"),verbose=FALSE){
@@ -11,18 +16,22 @@ read.celfile.header <- function(filename,info=c("basic","full"),verbose=FALSE){
cat("Reading", filename, "to get header information.\n")
headdetails <- .Call("ReadHeader", filename, PACKAGE="affyio")
names(headdetails) <- c("cdfName","CEL dimensions")
-
- return(headdetails)
} else {
if (verbose)
cat("Reading", filename, "to get full header information.\n")
### full returns greater detailed information from the header. Exact details differ depending on the file format.
- headdetails <- .Call("ReadHeaderDetailed", filename, PACKAGE="affyio")
- names(headdetails) <- c("cdfName","CEL dimensions","GridCornerUL","GridCornerUR","GridCornerLR","GridCornerLL","DatHeader","Algorithm","AlgorithmParameters")
-
- return(headdetails)
+ headdetails <- try(.Call("ReadHeaderDetailed", filename, PACKAGE="affyio"))
+ if (is(headdetails, "try-error"))
+ stop("Failed to get full header information for ", filename)
+ names(headdetails) <- c("cdfName","CEL dimensions","GridCornerUL","GridCornerUR","GridCornerLR","GridCornerLL","DatHeader","Algorithm","AlgorithmParameters","ScanDate")
+
+ if (nchar(headdetails$ScanDate) == 0){
+ # try to extract it from the DatHeader
+ DatHeaderSplit <- strsplit(headdetails$DatHeader," ")
+ Which.Date <- grep("[0-9]*/[0-9]*/[0-9]*",DatHeaderSplit[[1]])
+ Which.Time <- grep("[0-9]*:[0-9]*:[0-9]*",DatHeaderSplit[[1]])
+ headdetails$ScanDate <- paste(DatHeaderSplit[[1]][Which.Date],DatHeaderSplit[[1]][Which.Time])
+ }
}
-
-
+ return(headdetails)
}
-
View
0 src/clibs/affyio/R/read.probematrices.R 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/aclocal.m4 100755 → 100644
File mode changed.
View
4 src/clibs/affyio/man/check.cdf.type.Rd 100755 → 100644
@@ -10,12 +10,12 @@
\item{filename}{fullpath to a cdf file}
}
\value{Returns a string which is currently one of:
- \itemize{
+
\item{text}{the cdf file is of the text format}
\item{xda}{the cdf file is of the binary format used in GCOS}
\item{unknown}{the parser can not handle this format or does not
recognize this file as a CDF file}
- }
+
}
\author{B. M. Bolstad <bmb@bmbolstad.com>}
View
0 src/clibs/affyio/man/read.cdffile.list.Rd 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/man/read.celfile.Rd 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/man/read.celfile.header.Rd 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/man/read.celfile.probeintensity.matrices.Rd 100755 → 100644
File mode changed.
View
2 src/clibs/affyio/src/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.6)
PROJECT(Biolib_affyio)
SET (M_NAME affyio)
-SET (M_VERSION 1.8)
+# SET (M_VERSION 1.8)
IF(NOT BUILD_LIBS)
SET (MAP_ROOT ../../../..)
View
0 src/clibs/affyio/src/Makevars.in 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/src/Makevars.win 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/src/fread_functions.c 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/src/fread_functions.h 100755 → 100644
File mode changed.
View
86 src/clibs/affyio/src/read_abatch.c 100755 → 100644
@@ -19,7 +19,7 @@
**
** aim: read in from 1st to nth chips of CEL data
**
- ** Copyright (C) 2003-2007 B. M. Bolstad
+ ** Copyright (C) 2003-2008 B. M. Bolstad
**
** Created on Jun 13, 2003
**
@@ -144,6 +144,11 @@
** Oct 28, 2007 - add pthread based multi-threaded read_probematrix this is based on a submission by Paul Gordon (U Calgary)
** Feb 18, 2008 - R_read_cel_file now can be told to read only the mean intensities (rather than also the SD and npixels)
** Mar 6, 2008 - Add additional CEL file corruption checking.
+ ** Oct 16, 2008 - Fix issue with stack exhaustion
+ ** Oct 28, 2008 - Increase stack space allocated (prevents a crash)
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
+ ** Jun 3, 2009 - CEL corruption not detected in read.probematrix
+ ** Nov 10, 2009 Pthread on solaris fix
**
*************************************************************/
@@ -171,6 +176,9 @@
#if USE_PTHREADS
#include <pthread.h>
+#include <limits.h>
+#include <unistd.h>
+
pthread_mutex_t mutex_R;
int n_probesets;
int *n_probes = NULL;
@@ -198,6 +206,41 @@ struct thread_data{
#define BUF_SIZE 1024
+/******************************************************************
+ **
+ ** A "C" level object designed to hold information for a
+ ** single CEL file
+ **
+ ** These should be created using the function
+ **
+ ** read_cel_file()
+ **
+ **
+ **
+ *****************************************************************/
+
+typedef struct{
+ detailed_header_info header;
+
+ /** these are for storing the intensities, the sds and the number of pixels **/
+ double *intensities;
+ double *stddev;
+ double *npixels;
+
+ /** these are for storing information in the masks and outliers section **/
+
+ int nmasks;
+ int noutliers;
+
+ short *masks_x, *masks_y;
+ short *outliers_x, *outliers_y;
+
+} CEL;
+
+
+
+
+
/****************************************************************
@@ -1235,6 +1278,7 @@ static void get_detailed_header_info(const char *filename, detailed_header_info
fclose(currentFile);
+ header_info->ScanDate = Calloc(2, char);
}
@@ -2023,7 +2067,8 @@ static void gz_get_detailed_header_info(const char *filename, detailed_header_in
strcpy(header_info->AlgorithmParameters,get_token(cur_tokenset,1));
gzclose(currentFile);
-
+
+ header_info->ScanDate = Calloc(2, char);
}
@@ -2685,7 +2730,8 @@ static void binary_get_detailed_header_info(const char *filename, detailed_heade
error("Cel file %s does not seem to be have cdf information",filename);
}
}
-
+
+ header_info->ScanDate = Calloc(2, char);
delete_tokens(my_tokenset);
delete_binary_header(my_header);
@@ -3435,6 +3481,8 @@ static void gzbinary_get_detailed_header_info(const char *filename, detailed_hea
}
+ header_info->ScanDate = Calloc(2, char);
+
delete_tokens(my_tokenset);
delete_binary_header(my_header);
Free(header_copy);
@@ -4107,10 +4155,10 @@ SEXP ReadHeaderDetailed(SEXP filename){
const char *cur_file_name;
detailed_header_info header_info;
- PROTECT(HEADER = allocVector(VECSXP,9)); /* return as a list */
+ PROTECT(HEADER = allocVector(VECSXP,10)); /* return as a list */
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
if (isTextCelFile(cur_file_name)){
@@ -4189,7 +4237,12 @@ SEXP ReadHeaderDetailed(SEXP filename){
SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.AlgorithmParameters));
SET_VECTOR_ELT(HEADER,8,tmp_sexp);
UNPROTECT(1);
-
+
+ PROTECT(tmp_sexp = allocVector(STRSXP,1));
+ SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.ScanDate));
+ SET_VECTOR_ELT(HEADER,9,tmp_sexp);
+ UNPROTECT(1);
+
Free(header_info.Algorithm);
Free(header_info.AlgorithmParameters);
Free(header_info.DatHeader);
@@ -4229,16 +4282,24 @@ void readfile(SEXP filenames, double *CurintensityMatrix, double *pmMatrix, doub
error("Compress option not supported on your platform\n");
#endif
} else if (isBinaryCelFile(cur_file_name)){
- read_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1);
+ if(read_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){
+ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name);
+ }
storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag);
} else if (isgzBinaryCelFile(cur_file_name)){
- gzread_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1);
+ if(gzread_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){
+ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name);
+ }
storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag);
} else if (isGenericCelFile(cur_file_name)){
- read_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1);
+ if(read_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){
+ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name);
+ }
storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag);
} else if (isgzGenericCelFile(cur_file_name)){
- gzread_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1);
+ if(gzread_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1)!=0){
+ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name);
+ }
storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag);
} else {
#if defined HAVE_ZLIB
@@ -4384,6 +4445,8 @@ SEXP read_probeintensities(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP
pthread_attr_t attr;
struct thread_data *args;
void *status;
+ size_t stacksize = PTHREAD_STACK_MIN + 0x40000;
+
#endif
if (strcmp(CHAR(STRING_ELT(which,0)),"pm") == 0){
@@ -4446,6 +4509,7 @@ SEXP read_probeintensities(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP
/* Initialize and set thread detached attribute */
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+ pthread_attr_setstacksize (&attr, stacksize);
/* this code works out how many threads to use and allocates ranges of files to each thread */
/* The aim is to try to be as fair as possible in dividing up the matrix */
@@ -5255,7 +5319,7 @@ SEXP R_read_cel_file(SEXP filename, SEXP intensities_mean_only){
int read_intensities_only;
- const char *cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ const char *cur_file_name = CHAR(STRING_ELT(filename,0));
read_intensities_only = INTEGER_POINTER(intensities_mean_only)[0];
View
35 src/clibs/affyio/src/read_abatch.h 100755 → 100644
@@ -22,41 +22,8 @@ typedef struct{
char *DatHeader;
char *Algorithm;
char *AlgorithmParameters;
+ char *ScanDate;
} detailed_header_info;
-/******************************************************************
- **
- ** A "C" level object designed to hold information for a
- ** single CEL file
- **
- ** These should be created using the function
- **
- ** read_cel_file()
- **
- **
- **
- *****************************************************************/
-
-typedef struct{
- detailed_header_info header;
-
- /** these are for storing the intensities, the sds and the number of pixels **/
- double *intensities;
- double *stddev;
- double *npixels;
-
- /** these are for storing information in the masks and outliers section **/
-
- int nmasks;
- int noutliers;
-
- short *masks_x, *masks_y;
- short *outliers_x, *outliers_y;
-
-} CEL;
-
-extern CEL *read_cel_file(const char *filename, int read_intensities_only);
-
-
#endif
View
5 src/clibs/affyio/src/read_bpmap.c 100755 → 100644
@@ -15,7 +15,8 @@
** June 12, 2006 - fix naming vector length issue.
** June 12, 2007 - much wailing and grinding of teeth, but finally a fix for reading version number right.
** Aug 25, 2007 - Move file reading functions to centralized location
- ** Mar 14, 2008 - Fix reading of version number for big endian platforms
+ ** Mar 14, 2008 - Fix reading of version number for big endian platforms
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
**
*******************************************************************/
@@ -839,7 +840,7 @@ SEXP ReadBPMAPFileIntoRList(SEXP filename){
const char *cur_file_name;
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
View
186 src/clibs/affyio/src/read_cdf_xda.c 100755 → 100644
@@ -24,7 +24,8 @@
** when there are 0 qcunits or 0 units
** Aug 25, 2007 - Move file reading functions to centralized location
** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays)
- **
+ ** Nov 12, 2008 - Fix crash
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
**
****************************************************************/
@@ -36,18 +37,177 @@
#include "stdio.h"
#include "fread_functions.h"
#include <ctype.h>
-#include <read_cdf.h>
-
-#ifdef BIOLIB
- #include <biolib_R_map.h>
-#endif
/* #define READ_CDF_DEBUG */
/* #define READ_CDF_DEBUG_SNP */
#define READ_CDF_NOSNP
+/************************************************************************
+ **
+ ** Structures for holding the CDF file information. Basically
+ ** header/general information that appears at the start of the CDF file
+ **
+ ************************************************************************/
+
+typedef struct {
+ int magicnumber;
+ int version_number;
+ unsigned short rows,cols;
+ int n_units,n_qc_units;
+ int len_ref_seq;
+ int i;
+ char *ref_seq;
+} cdf_xda_header;
+
+
+/****************************************************************************
+ **
+ ** The following two structures store QC units and QC unit probe information
+ **
+ ** QC information, repeated for each QC unit:
+ ** Type - unsigned short
+ ** Number of probes - integer
+ **
+ ** Probe information, repeated for each probe in the QC unit:
+ ** X coordinate - unsigned short
+ ** Y coordinate - unsigned short
+ ** Probe length - unsigned char
+ ** Perfect match flag - unsigned char
+ ** Background probe flag - unsigned char
+ **
+ ****************************************************************************/
+
+
+typedef struct{
+ unsigned short x;
+ unsigned short y;
+ unsigned char probelength;
+ unsigned char pmflag;
+ unsigned char bgprobeflag;
+
+} cdf_qc_probe;
+
+typedef struct{
+ unsigned short type;
+ unsigned int n_probes;
+
+ cdf_qc_probe *qc_probes;
+
+} cdf_qc_unit;
+
+
+/****************************************************************************
+ **
+ ** The following three structures store information for units (sometimes called
+ ** probesets), blocks (of which there are one or more within a unit) and cells
+ ** sometimes called probe of which there are one or more within each block
+ **
+ **
+ ** Unit information, repeated for each unit:
+ **
+ ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag)
+ ** Direction - unsigned char
+ ** Number of atoms - integer
+ ** Number of blocks - integer (always 1 for expression units)
+ ** Number of cells - integer
+ ** Unit number (probe set number) - integer
+ ** Number of cells per atom - unsigned char
+ **
+ **
+ **
+ ** Block information, repeated for each block in the unit:
+ **
+ ** Number of atoms - integer
+ ** Number of cells - integer
+ ** Number of cells per atom - unsigned char
+ ** Direction - unsigned char
+ ** The position of the first atom - integer
+ ** <unused integer value> - integer
+ ** The block name - char[64]
+ **
+ **
+ **
+ ** Cell information, repeated for each cell in the block:
+ **
+ ** Atom number - integer
+ ** X coordinate - unsigned short
+ ** Y coordinate - unsigned short
+ ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer
+ ** Base of probe at substitution position - char
+ ** Base of target at interrogation position - char
+ **
+ **
+ ****************************************************************************/
+
+
+typedef struct{
+ int atomnumber;
+ unsigned short x;
+ unsigned short y;
+ int indexpos;
+ char pbase;
+ char tbase;
+} cdf_unit_cell;
+
+
+typedef struct{
+ int natoms;
+ int ncells;
+ unsigned char ncellperatom;
+ unsigned char direction;
+ int firstatom;
+ int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */
+ char blockname[64];
+
+ cdf_unit_cell *unit_cells;
+
+} cdf_unit_block;
+
+
+typedef struct{
+ unsigned short unittype;
+ unsigned char direction;
+ int natoms;
+ int nblocks;
+ int ncells;
+ int unitnumber;
+ unsigned char ncellperatom;
+
+ cdf_unit_block *unit_block;
+
+} cdf_unit;
+
+
+/****************************************************************************
+ **
+ ** A data structure for holding CDF information read from a xda format cdf file
+ **
+ ** note that this structure reads in everything including things that might not
+ ** be of any subsequent use.
+ **
+ ****************************************************************************/
+
+
+
+typedef struct {
+
+ cdf_xda_header header; /* Header information */
+ char **probesetnames; /* Names of probesets */
+
+ int *qc_start; /* These are used for random access */
+ int *units_start;
+
+ cdf_qc_unit *qc_units;
+ cdf_unit *units;
+
+
+} cdf_xda;
+
+
+
+
@@ -164,7 +324,7 @@ int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){
**
*************************************************************************/
-void dealloc_cdf_xda(cdf_xda *my_cdf){
+static void dealloc_cdf_xda(cdf_xda *my_cdf){
int i;
@@ -207,7 +367,7 @@ void dealloc_cdf_xda(cdf_xda *my_cdf){
**
*************************************************************/
-int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
+static int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
FILE *infile;
@@ -386,7 +546,7 @@ int read_cdf_xda(const char *filename,cdf_xda *my_cdf){
*************************************************************/
-int check_cdf_xda(const char *filename){
+static int check_cdf_xda(const char *filename){
FILE *infile;
@@ -487,7 +647,7 @@ SEXP CheckCDFXDA(SEXP filename){
int good;
const char *cur_file_name;
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
good = check_cdf_xda(cur_file_name);
@@ -530,7 +690,7 @@ SEXP ReadCDFFile(SEXP filename){
/* int nrows, ncols; */
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
if (!read_cdf_xda(cur_file_name,&my_cdf)){
error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
@@ -584,7 +744,7 @@ SEXP ReadCDFFile(SEXP filename){
curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs));
- for (k=0; k < cur_cells*2; k++){
+ for (k=0; k < cur_atoms*2; k++){
curlocs[k] = R_NaN;
}
@@ -875,7 +1035,7 @@ SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){
cdf_xda my_cdf;
const char *cur_file_name;
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
/* Read in the xda style CDF file into memory */
if (!read_cdf_xda(cur_file_name,&my_cdf)){
View
190 src/clibs/affyio/src/read_cdffile2.c 100755 → 100644
@@ -28,7 +28,8 @@
** Dec 1, 2005 - Some comment cleaning. Added isTextCDFFile,CheckCDFtext
** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers
** May 31, 2006 - fix some compiler warnings
- **
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
+ **
**
*******************************************************************/
@@ -38,14 +39,183 @@
#include "stdlib.h"
#include "stdio.h"
-#define BUFFER_SIZE 1024
-
-#include <read_cdf.h>
-
#ifdef BIOLIB
#include <biolib_R_map.h>
#endif
+#define BUFFER_SIZE 1024
+
+
+/*****************************************************************
+ **
+ **
+ ** A structure for holding information in the
+ ** "CDF" and "Chip" sections (basically header information)
+ **
+ ******************************************************************/
+
+
+
+typedef struct {
+
+ char *version;
+ char *name;
+ int rows,cols;
+ int numberofunits;
+ int maxunit;
+ int NumQCUnits;
+ char *chipreference;
+} cdf_text_header;
+
+
+/*****************************************************************
+ **
+ **
+ ** A structure for holding QC probe information
+ ** Note the "CYCLES" item is ignored and never parsed
+ **
+ ******************************************************************/
+
+
+typedef struct {
+ int x;
+ int y;
+ char *probe;
+ int plen;
+ int atom;
+ int index;
+ int match;
+ int bg;
+} cdf_text_qc_probe;
+
+
+
+
+
+
+
+/*******************************************************************
+ **
+ ** A structure for holding QC units information. These are
+ ** areas of the chip that contain probes that may or may not be useful
+ ** for QC and other purposes.
+ **
+ **
+ *******************************************************************/
+
+
+
+typedef struct{
+ int type;
+ unsigned int n_probes;
+ int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/
+ cdf_text_qc_probe *qc_probes;
+
+} cdf_text_qc_unit;
+
+
+/*******************************************************************
+ **
+ ** A structure for holding probe information for unit_blocks_probes
+ **
+ ** probes are stored within blocks
+ **
+ *******************************************************************/
+
+typedef struct{
+ int x;
+ int y;
+ char *probe;
+ char *feat;
+ char *qual;
+ int expos;
+ int pos;
+ char *cbase;
+ char *pbase;
+ char *tbase;
+ int atom;
+ int index;
+ int codonid;
+ int codon;
+ int regiontype;
+ char* region;
+} cdf_text_unit_block_probe;
+
+
+
+
+/*******************************************************************
+ **
+ ** A structure holding Unit_blocks
+ **
+ ** blocks are stored within units.
+ ** blocks contain many probes
+ **
+ *******************************************************************/
+
+typedef struct{
+ char *name;
+ int blocknumber;
+ int num_atoms;
+ int num_cells;
+ int start_position;
+ int stop_position;
+ int direction;
+ cdf_text_unit_block_probe *probes;
+
+} cdf_text_unit_block;
+
+
+
+
+
+
+/*******************************************************************
+ **
+ ** A structure for holding "Units" AKA known as probesets
+ **
+ ** Each unit contains one or more blocks. Each block contains one or
+ ** more probes
+ **
+ *******************************************************************/
+
+
+typedef struct{
+ char *name;
+ int direction;
+ int num_atoms;
+ int num_cells;
+ int unit_number;
+ int unit_type;
+ int numberblocks;
+ int MutationType;
+ cdf_text_unit_block *blocks;
+} cdf_text_unit;
+
+
+
+/*******************************************************************
+ **
+ ** A structure for holding a text CDF file
+ **
+ ** text cdf files consist of
+ ** basic header information
+ ** qcunits
+ ** - qc probes
+ ** units (aka probesets)
+ ** - blocks
+ ** - probes
+ **
+ **
+ *******************************************************************/
+
+typedef struct{
+ cdf_text_header header;
+ cdf_text_qc_unit *qc_units;
+ cdf_text_unit *units;
+} cdf_text;
+
+
/**************************************************************
**
** The following code is for tokenizing strings
@@ -699,7 +869,7 @@ static void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer){
*******************************************************************/
-int read_cdf_text(const char *filename, cdf_text *mycdf){
+static int read_cdf_text(const char *filename, cdf_text *mycdf){
FILE *infile;
@@ -751,7 +921,7 @@ int read_cdf_text(const char *filename, cdf_text *mycdf){
-void dealloc_cdf_text(cdf_text *my_cdf){
+static void dealloc_cdf_text(cdf_text *my_cdf){
int i,j,k;
@@ -805,7 +975,7 @@ void dealloc_cdf_text(cdf_text *my_cdf){
**
******************************************************************/
-int isTextCDFFile(const char *filename){
+static int isTextCDFFile(const char *filename){
FILE *infile;
@@ -909,7 +1079,7 @@ SEXP ReadtextCDFFileIntoRList(SEXP filename){
cdf_text my_cdf;
const char *cur_file_name;
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
if(!read_cdf_text(cur_file_name, &my_cdf)){
error("Problem reading text cdf file %s. Possibly corrupted or truncated?\n",cur_file_name);
@@ -1392,7 +1562,7 @@ SEXP CheckCDFtext(SEXP filename){
int good;
const char *cur_file_name;
- cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ cur_file_name = CHAR(STRING_ELT(filename,0));
good = isTextCDFFile(cur_file_name);
View
26 src/clibs/affyio/src/read_celfile_generic.c 100755 → 100644
@@ -22,14 +22,14 @@
** Oct 11, 2007 - fix missing DatHeader problem
** Feb 11, 2008 - add #include for inttypes.h in situations that stdint.h might not exist
** Feb 13, 2008 - fix problems with generic_get_detailed_header_info(), gzgeneric_get_detailed_header_info()
+ ** May 18, 2009 - Add Ability to extract scan date from CEL file header
**
*************************************************************/
#include <R.h>
#include <Rdefines.h>
#include <Rmath.h>
#include <Rinternals.h>
-#define HAVE_STDINT_H 1
#ifdef HAVE_STDINT_H
#include <stdint.h>
#elif HAVE_INTTYPES_H
@@ -238,6 +238,18 @@ void generic_get_detailed_header_info(const char *filename, detailed_header_info
header_info->DatHeader = Calloc(2, char);
}
+ triplet = find_nvt(&data_header,"affymetrix-scan-date");
+
+ if (triplet != NULL){
+ cur_mime_type = determine_MIMETYPE(*triplet);
+
+ wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size);
+ header_info->ScanDate = Calloc(size + 1, char);
+ wcstombs(header_info->ScanDate, wchartemp, size);
+ Free(wchartemp);
+ } else {
+ header_info->ScanDate = Calloc(2, char);
+ }
triplet = find_nvt(&data_header,"affymetrix-algorithm-name");
@@ -1237,6 +1249,18 @@ void gzgeneric_get_detailed_header_info(const char *filename, detailed_header_in
header_info->DatHeader = Calloc(2, char);
}
+ triplet = find_nvt(&data_header,"affymetrix-scan-date");
+
+ if (triplet != NULL){
+ cur_mime_type = determine_MIMETYPE(*triplet);
+
+ wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size);
+ header_info->ScanDate = Calloc(size + 1, char);
+ wcstombs(header_info->ScanDate, wchartemp, size);
+ Free(wchartemp);
+ } else {
+ header_info->ScanDate = Calloc(2, char);
+ }
triplet = find_nvt(&data_header,"affymetrix-algorithm-name");
View
1 src/clibs/affyio/src/read_celfile_generic.h 100755 → 100644
@@ -5,6 +5,7 @@
#include <biolib_R_map.h>
#endif
+
#include "read_abatch.h"
int isGenericCelFile(const char *filename);
View
0 src/clibs/affyio/src/read_clf.c 100755 → 100644
File mode changed.
View
24 src/clibs/affyio/src/read_generic.c 100755 → 100644
@@ -23,6 +23,8 @@
** Jan 28, 2008 - fix read_generic_data_group/gzread_generic_data_group. Change bitwise OR (|) to logical OR (||)
** Feb 11, 2008 - add #include for inttypes.h in situations that stdint.h might not exist
** Feb 13, 2008 - add decode_MIME_value_toASCII which takes any MIME and attempts to convert to a string
+ ** Jul 29, 2008 - fix preprocessor directive error for WORDS_BIGENDIAN systems
+ ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues
**
*************************************************************/
@@ -31,8 +33,6 @@
#include <Rmath.h>
#include <Rinternals.h>
-#define HAVE_STDINT_H
-
#ifdef HAVE_STDINT_H
#include <stdint.h>
#elif HAVE_INTTYPES_H
@@ -233,7 +233,7 @@ static wchar_t *decode_TEXT(ASTRING value){
contents = (uint16_t *)temp.value;
for (i=0; i < len; i++){
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents[i]=(((contents[i]>>8)&0xff) | ((contents[i]&0xff)<<8));
#endif
return_value[i] = contents[i];
@@ -250,7 +250,7 @@ static int8_t decode_INT8_t(ASTRING value){
memcpy(&contents,value.value, sizeof(int8_t));
- //#ifndef WORD_BIGENDIAN
+ //#ifndef WORDS_BIGENDIAN
// contents=(((contents[i]>>8)&0xff) | ((contents[i]&0xff)<<8));//
//#endif
@@ -266,7 +266,7 @@ static uint8_t decode_UINT8_t(ASTRING value){
memcpy(&contents,value.value, sizeof(uint8_t));
- //#ifndef WORD_BIGENDIAN
+ //#ifndef WORDS_BIGENDIAN
//contents=(((contents[i]>>8)&0xff) | ((contents[i]&0xff)<<8));
//#endif
return contents;
@@ -280,7 +280,7 @@ static int16_t decode_INT16_t(ASTRING value){
memcpy(&contents,value.value, sizeof(int16_t));
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents=(((contents>>8)&0xff) | ((contents&0xff)<<8));
#endif
@@ -296,7 +296,7 @@ static uint16_t decode_UINT16_t(ASTRING value){
memcpy(&contents,value.value, sizeof(uint16_t));
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents=(((contents>>8)&0xff) | ((contents&0xff)<<8));
#endif
return contents;
@@ -312,7 +312,7 @@ static int32_t decode_INT32_t(ASTRING value){
memcpy(&contents,value.value, sizeof(int32_t));
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) |
((contents>>8)&0xff00) | ((contents&0xff00)<<8));
#endif
@@ -328,7 +328,7 @@ static int32_t decode_UINT32_t(ASTRING value){
memcpy(&contents,value.value, sizeof(uint32_t));
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) |
((contents>>8)&0xff00) | ((contents&0xff00)<<8));
#endif
@@ -346,7 +346,7 @@ static float decode_float32(ASTRING value){
memcpy(&contents,value.value, sizeof(uint32_t));
-#ifndef WORD_BIGENDIAN
+#ifndef WORDS_BIGENDIAN
contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) |
((contents>>8)&0xff00) | ((contents&0xff00)<<8));
#endif
@@ -1346,7 +1346,7 @@ SEXP Read_Generic(SEXP filename){
generic_data_set my_data_set;
- const char *cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ const char *cur_file_name = CHAR(STRING_ELT(filename,0));
/* Pass through all the header information */
@@ -1402,7 +1402,7 @@ SEXP gzRead_Generic(SEXP filename){
generic_data_set my_data_set;
- const char *cur_file_name = CHAR(VECTOR_ELT(filename,0));
+ const char *cur_file_name = CHAR(STRING_ELT(filename,0));
/* Pass through all the header information */
View
0 src/clibs/affyio/src/read_generic.h 100755 → 100644
File mode changed.
View
0 src/clibs/affyio/src/read_pgf.c 100755 → 100644
File mode changed.

0 comments on commit 0c76931

Please sign in to comment.
Something went wrong with that request. Please try again.