/
ClusterReader.cpp
86 lines (73 loc) · 2.24 KB
/
ClusterReader.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//-----------------------------------------------
// Copyright 2011 Wellcome Trust Sanger Institute
// Written by Jared Simpson (js18@sanger.ac.uk)
// Released under the GPL
//-----------------------------------------------
//
// ClusterReader - Read in a file of read clusters.
// This class conforms to the interface needed
// for the SequenceProcessFramework concurrency lib
//
#include "ClusterReader.h"
#include "Util.h"
//
ClusterReader::ClusterReader(const std::string& filename) : m_numConsumedLast(0), m_numConsumedTotal(0)
{
m_pReader = createReader(filename);
}
ClusterReader::~ClusterReader()
{
delete m_pReader;
}
// Read a cluster from the file and write its members
// to out. Returns false if the read failed
bool ClusterReader::generate(ClusterVector& out)
{
out.clear();
// Read a single cluster from the file.
ClusterRecord record;
bool good = readCluster(record);
// Read failed, return false
if(!good)
return false;
out.push_back(record);
// Read the remaining records for this cluster
int remaining = record.numElements - 1;
for(int i = 0; i < remaining; ++i)
{
bool good = readCluster(record);
if(!good)
{
std::cerr << "Error: expected " << remaining + 1 << " elements in the cluster but only read " << i+1 << "\n";
exit(1);
}
if(record.clusterID != out.front().clusterID)
{
std::cerr << "Error: cluster names do not match! " << record.clusterID << " != " << out.front().clusterID << "\n";
exit(1);
}
out.push_back(record);
}
m_numConsumedLast = 1;
m_numConsumedTotal += 1;
return true;
}
//
bool ClusterReader::readCluster(ClusterRecord& record)
{
std::string line;
bool good = getline(*m_pReader, line);
if(!good || line.empty())
return false;
std::stringstream parser(line);
parser >> record.clusterID;
parser >> record.numElements;
parser >> record.readID;
parser >> record.sequence;
if(record.clusterID.empty() || record.numElements == 0 || record.readID.empty() || record.sequence.empty())
{
std::cerr << "Could not parse cluster record from line: " << line << "\n";
exit(1);
}
return true;
}