Navigation Menu

Skip to content

Commit

Permalink
vertical
Browse files Browse the repository at this point in the history
  • Loading branch information
lindenb committed Oct 1, 2011
1 parent e1bd321 commit b9e7fca
Show file tree
Hide file tree
Showing 2 changed files with 225 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/Makefile
Expand Up @@ -10,12 +10,15 @@ all: ../bin/mean ../bin/json2xml \
../bin/mergexml \
../bin/manhattan \
../bin/groupbygene \
../bin/verticalize \
../bin/colgrep

hershey:
curl -s -o hershey.zip "http://paulbourke.net/dataformats/hershey/hershey.zip"
unzip hershey.zip hershey
rm hershey.zip
../bin/verticalize:verticalize.cpp
$(CPP) -o $@ -Wall $< -lz
../bin/colgrep:colgrep.cpp
$(CPP) -o $@ -Wall $< -lz
../bin/manhattan:manhattan.cpp
Expand Down
222 changes: 222 additions & 0 deletions src/verticalize.cpp
@@ -0,0 +1,222 @@
/**
* Author:
* Pierre Lindenbaum PhD
* Contact:
* plindenbaum@yahoo.fr
* Date:
* Oct 2011
* WWW:
* http://plindenbaum.blogspot.com
* Motivation:
* verticalize a table
* Compilation:
* g++ -o verticalize -Wall -O3 verticalize.cpp -lz
*/
#include <cstdlib>
#include <vector>
#include <map>
#include <set>
#include <cerrno>
#include <string>
#include <cstring>
#include <stdexcept>
#include <climits>
#include <cmath>
#include <cfloat>
#include <cstdio>
#include <iostream>
#include <zlib.h>
#include <sstream>
#include <algorithm>
#include <cassert>
#include <stdint.h>

using namespace std;


class Verticalize
{
public:

char delim;
bool first_line_is_header;


Verticalize()
{
delim='\t';
first_line_is_header=true;
}
~Verticalize()
{

}

bool readline(gzFile in,std::string& line)
{
line.clear();
int c;
if(gzeof(in)) return false;
while((c=gzgetc(in))!=EOF && c!='\n')
{
line+=(char)c;
}
return true;
}
void split(const string& line,vector<string>& tokens)
{
size_t prev=0;
size_t i=0;
tokens.clear();
while(i<=line.size())
{
if(i==line.size() || line[i]==delim)
{
tokens.push_back(line.substr(prev,i-prev));
if(i==line.size()) break;
prev=i+1;
}
++i;
}
}


void run(gzFile in)
{
size_t nLine=0UL;
vector<string> header;
vector<string> tokens;
string line;
size_t len_word=0UL;
if(first_line_is_header)
{
if(!readline(in,line))
{
cerr << "Error cannot read first line.\n";
return;
}
++nLine;
split(line,header);
for(size_t i=0;i< header.size();++i) len_word=max(len_word,header[i].size());
}

while(readline(in,line))
{
++nLine;
cout << ">>>"<< delim << (nLine)<< endl;
split(line,tokens);
if(first_line_is_header)
{
for(size_t i=0;i< header.size();++i)
{
cout << "$"<<(i+1)<< delim << header[i];
for(size_t j=header[i].size();j< len_word;++j)
{
cout << " ";
}
cout <<delim;
if(i<tokens.size())
{
cout << tokens[i];
}
else
{
cout << "???";
}
cout <<endl;
}
for(size_t i=header.size();i< tokens.size();++i)
{
cout << "$"<<(i+1)<<delim << "???";
for(size_t j=3;j< len_word;++j)
{
cout << " ";
}
cout << delim << tokens[i] << endl;
}
}
else
{
for(size_t i=header.size();i< tokens.size();++i)
{
cout << "$"<<(i+1)<<delim << tokens[i] << endl;
}
}
cout << "<<<"<< delim << (nLine)<< "\n\n";
}
}
};


int main(int argc,char** argv)
{
Verticalize app;
int optind=1;
while(optind < argc)
{
if(std::strcmp(argv[optind],"-h")==0)
{
cerr << argv[0] << "Pierre Lindenbaum PHD. 2011.\n";
cerr << "Compilation: "<<__DATE__<<" at "<< __TIME__<<".\n";
cerr << "Options:\n";
cerr << " -d or --delim (char) delimiter default:tab\n";
cerr << " -n first line is NOT the header.\n";
cerr << "(stdin|file|file.gz)\n";
exit(EXIT_FAILURE);
}

else if(std::strcmp(argv[optind],"-n")==0)
{
app.first_line_is_header =false;
}
else if((std::strcmp(argv[optind],"-d")==0 ||
std::strcmp(argv[optind],"--delim")==0)
&& optind+1< argc)
{
char* p=argv[++optind];
if(strlen(p)!=1)
{
cerr << "Bad delimiter \""<< p << "\"\n";
exit(EXIT_FAILURE);
}
app.delim=p[0];
}
else if(argv[optind][0]=='-')
{
fprintf(stderr,"unknown option '%s'\n",argv[optind]);
exit(EXIT_FAILURE);
}
else
{
break;
}
++optind;
}

if(optind==argc)
{
gzFile in=gzdopen(fileno(stdin),"r");
if(in==NULL)
{
cerr << "Cannot open stdin" << endl;
return EXIT_FAILURE;
}
app.run(in);
}
else
{
while(optind< argc)
{
char* filename=argv[optind++];
gzFile in=gzopen(filename,"r");
if(in==NULL)
{
cerr << "Cannot open "<< filename << " " << strerror(errno) << endl;
return EXIT_FAILURE;
}
app.run(in);
gzclose(in);
}
}
return EXIT_SUCCESS;
}

0 comments on commit b9e7fca

Please sign in to comment.