/
gowerD.cpp
155 lines (147 loc) · 4.85 KB
/
gowerD.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#include <Rcpp.h>
#include <string>
#include <cmath>
void R_init_VIM(DllInfo* info) {
R_registerRoutines(info, NULL, NULL, NULL, NULL);
R_useDynamicSymbols(info, TRUE);
}
using namespace std;
using namespace Rcpp;
double distW(double x,double y, int type, double weight=1, double weightsum=1,double levOrder=1,
double mixedConstant=0){
double out=0;
if(type==0){ //NUMERIC
out = weight*abs(x-y)/weightsum;
}else if(type==1){ //Categorical
if(x!=y){
out=weight/weightsum;
}
}else if(type==2){ //Ordered
out=abs(x-y)/(levOrder-1)*weight/weightsum;
}else if(type==3){ //Semi-Continous
if(
((x==mixedConstant)&(y!=mixedConstant))|
((x!=mixedConstant)&(y==mixedConstant))
){
out=weight/weightsum;
}else if((x!=mixedConstant)&(y!=mixedConstant)){
out=weight*abs(x-y)/weightsum;
}
}
// Rprintf("xval %f, yval %f, dist %f \n",x,y,out);
return out;
}
double distW1(NumericVector xV,NumericVector yV, NumericVector weight,
NumericVector levOrder,
double ncolMAX,NumericVector ncolVAR,NumericVector mixedConstant){
double out=0;
double weightsum = accumulate( weight.begin(), weight.end(), 0.0 );
// compute the distance contribution of each variable
for (int k=0; k<ncolMAX; k++) {
if(k<ncolVAR(0)){ //NUMERIC
out+=distW(xV(k),yV(k), 0, weight(k), weightsum);
}else if(k<(ncolVAR(0)+ncolVAR(1))){ //Categorical
out+=distW(xV(k),yV(k), 1, weight(k), weightsum);
}else if(k<(ncolVAR(0)+ncolVAR(1)+ncolVAR(2))){ //Ordered
out+=distW(xV(k),yV(k), 2, weight(k), weightsum,levOrder(k-(ncolVAR(0)+ncolVAR(1))));
}else if(k<(ncolVAR(0)+ncolVAR(1)+ncolVAR(2)+ncolVAR(3))){ //Semi-Continous
out+=distW(xV(k),yV(k), 3, weight(k), weightsum,1,mixedConstant(k-(ncolVAR(0)+ncolVAR(1)+ncolVAR(2))));
}
}
return out;
}
// [[Rcpp::export]]
RcppExport SEXP gowerd(SEXP dataX, SEXP dataY,SEXP weights,SEXP ncolNUMFAC,
SEXP levOrders,SEXP mixedConstants) {
BEGIN_RCPP
NumericMatrix xMat(dataX); // creates Rcpp matrix from SEXP
NumericMatrix yMat(dataY); // creates Rcpp matrix from SEXP
NumericVector ncolVAR(ncolNUMFAC); // creates Rcpp matrix from SEXP
NumericVector weight(weights); // creates Rcpp matrix from SEXP
NumericVector levOrder(levOrders); // creates Rcpp matrix from SEXP
NumericVector mixedConstant(mixedConstants);
int nx = xMat.nrow();
int ny = yMat.nrow();
NumericMatrix delta(nx,ny);
double ncolMAX = ncolVAR(0)+ncolVAR(1)+ncolVAR(2)+ncolVAR(3);
for (int i=0; i<nx; i++) {
for (int j=0; j<ny; j++) {
delta(i,j)=distW1(xMat(i,_),yMat(j,_),weight,levOrder,
ncolMAX,ncolVAR,mixedConstant);
}
}
return List::create(
Named( "delta" ) = delta
);
END_RCPP
}
// Find the index of nR minimal values per column of a matrix xR
// [[Rcpp::export]]
RcppExport SEXP whichminN(SEXP xR, SEXP nR, int returnValue) {
BEGIN_RCPP
NumericVector x(xR); // creates Rcpp matrix from SEXP
int n = as<int>(nR);
NumericVector out(n);
NumericVector::iterator it = min_element(x.begin(), x.end()); // STL algo // iterator type
out[0]=it - x.begin()+1;
if(returnValue==1){
NumericVector outMin(n);
outMin[0]=x[it - x.begin()];
x[it - x.begin()]=R_PosInf;
for (int i=2; i<=n; i++) {
it = min_element(x.begin(), x.end());
out[i-1]=it - x.begin()+1;
outMin[i-1]=x[it - x.begin()];
x[it - x.begin()]=R_PosInf;
}
return List::create(
Named( "which" ) = out,
Named( "mins" ) = outMin
) ;
}else{
x[it - x.begin()]=R_PosInf;
for (int i=2; i<=n; i++) {
it = min_element(x.begin(), x.end());
out[i-1]=it - x.begin()+1;
x[it - x.begin()]=R_PosInf;
}
return List::create(
Named( "which" ) = out
) ;
}
END_RCPP
}
// [[Rcpp::export]]
RcppExport SEXP gowerDind(SEXP dataX, SEXP dataY,SEXP weights,SEXP ncolNUMFAC,SEXP levOrders,
SEXP mixedConstants,SEXP nR,SEXP returnMinR){
BEGIN_RCPP
List dist = gowerd( dataX, dataY, weights, ncolNUMFAC, levOrders, mixedConstants);
NumericMatrix delta = as<NumericMatrix>(dist["delta"]);
int nc=delta.cols();
int n = as<int>(nR);
int returnMin = as<int>(returnMinR);
NumericMatrix inds(n,nc);
if(returnMin==0){
for (int i=0; i<nc; i++) {
NumericVector zz1 = delta( Rcpp::_, i);
List resultList(whichminN(zz1,nR,returnMin));
inds(_,i)=as<NumericVector>(resultList["which"]);
}
return List::create(
Named( "ind" ) = inds
);
}else{
NumericMatrix mins(n,nc);
for (int i=0; i<nc; i++) {
NumericVector zz1 = delta(_, i);
List resultList(whichminN(zz1,nR,returnMin));
inds(_,i)=as<NumericVector>(resultList["which"]);
mins(_,i)=as<NumericVector>(resultList["mins"]);
}
return List::create(
Named( "ind" ) = inds,
Named( "min" ) = mins
);
}
END_RCPP
}