/
cditraits.h
145 lines (119 loc) · 3.19 KB
/
cditraits.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#ifndef CDITRAITS_H_
#define CDITRAITS_H_
#include <iostream>
#include <limits>
#include <boost/array.hpp>
#include <boost/scoped_ptr.hpp>
#include <vector>
#include <iterator>
#include "primitives.h"
namespace traits {
template<size_t D, typename T>
struct site : public primitives::point<D,T> {
const std::string& attribute(size_t i) const {
return _attributes(i);
}
std::string& attribute(size_t i) {
return _attributes(i);
}
size_t attributes() const {
return _attributes.dimensions();
}
const int& year() const {
return _year;
}
int& year() {
return _year;
}
private:
primitives::point<2,std::string> _attributes;
int _year;
};
template<size_t D, typename T>
class CDITraits {
public:
//Defines the order on the points
//typedef primitives::point<D,T> point_type;
typedef site<D,T> point_type;
typedef typename point_type::lexicographic_comparator comparator_type;
typedef primitives::box<D,T> box_type;
CDITraits(size_t min_size, int min_year, int max_year)
: _min_size(min_size), _min_year(min_year), _max_year(max_year)
{ }
void count_years (
typename std::vector<point_type>::const_iterator begin,
typename std::vector<point_type>::const_iterator end,
std::vector<size_t>& yearcounts) const
{
while (begin != end) {
int year = (*begin).year();
assert(year<=_max_year && year >= _min_year);
++yearcounts[year-_min_year];
++begin;
}
}
/// \return Does each year have enough sites.
bool check_years (
typename std::vector<point_type>::const_iterator begin,
typename std::vector<point_type>::const_iterator end
) const
{
std::vector<size_t> counts(_max_year-_min_year+1,0);
count_years(begin,end,counts);
for (int i = 0; i < counts.size(); ++i) {
if (counts[i] < min_size()) {
return false;
}
}
/*
std::cout<< "-----------------------------------------------\n";
for (int i = 0; i < counts.size(); ++i) {
std::cout << _min_year+i << " " << counts[i] << "\n";
}
std::cout<<"\n";
*/
return true;
}
///O(nlog n) median finding algorithm, can be trivially replaced
///with one of the standard O(N) (approximate, randomized) median
///finding algorithms,
/// \returns true if split ok, false if we should stop splitting
bool compute_median(std::vector<point_type>& v, int d, point_type& median) const
{
//if there is too few points, abort
if (!check_years(v.begin(),v.end()))
return false;
std::sort(v.begin(),v.end(), comparator_type(d));
typename std::vector<point_type>::const_iterator m = v.begin();
std::advance(m,v.size()/2);
//if there is too few points, abort
if (!check_years(v.begin(),m))
return false;
median=*m;
std::advance(m,1);
if (!check_years(m,v.end()))
return false;
return true;
}
static size_t dimensions() { return D; }
size_t min_size() const { return _min_size; }
private:
const size_t _min_size;
const int _min_year;
const int _max_year;
};
template<size_t D, typename T>
std::ostream& operator<<(std::ostream& s, const site<D,T>& p) {
s << p.year() << ",";
s << "";
for (size_t a = 0; a < p.attributes(); ++a) {
if (a != 0) s <<",";
s << p.attribute(a);
}
s << "";
const primitives::point<D,T>& newp = p;
s << "," << newp << "";
return s;
}
}
#endif