Skip to content

Commit

Permalink
Creating generator class
Browse files Browse the repository at this point in the history
  • Loading branch information
Manerone committed Sep 13, 2019
1 parent f486179 commit d55654e
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 112 deletions.
5 changes: 3 additions & 2 deletions CMakeLists.txt
Expand Up @@ -33,16 +33,17 @@ set(SOURCES ${CMAKE_SOURCE_DIR}/src/helpers/column.cpp
${CMAKE_SOURCE_DIR}/src/indexes/kd_tree/kd_node.cpp
${CMAKE_SOURCE_DIR}/src/indexes/standard_cracking.cpp
${CMAKE_SOURCE_DIR}/src/data/data_reader.cpp
${CMAKE_SOURCE_DIR}/src/data/generator.cpp
)

add_library(MDAI_lib ${SOURCES})

target_link_libraries(MDAI_lib sqlite3)

add_executable(main ${CMAKE_SOURCE_DIR}/src/main.cpp)
target_link_libraries(main MDAI_lib)

add_executable(generator ${CMAKE_SOURCE_DIR}/src/data/generator.cpp)
add_executable(generator ${CMAKE_SOURCE_DIR}/src/generator.cpp)
target_link_libraries(generator MDAI_lib)

add_subdirectory("test")

Expand Down
33 changes: 33 additions & 0 deletions include/data/generator.hpp
@@ -0,0 +1,33 @@
#ifndef GENERATOR_H
#define GENERATOR_H

#include <string>
#include <cstdint>

class Generator{
public:
Generator(
int64_t n_of_rows_,
int64_t dimensions_,
int64_t workload_,
float selectivity_,
int64_t number_of_queries_,
int64_t query_type_,
const std::string &power_dataset_file_,
const std::string &feature_vectors_file_,
const std::string &genes_file_
);
bool generate(const std::string &table_path, const std::string &workload_path);

private:
int64_t n_of_rows;
int64_t dimensions;
int64_t workload;
float selectivity;
int64_t number_of_queries;
int64_t query_type;
std::string POWER_DATASET_FILE = "DEBS2012-ChallengeData.txt";
std::string FEATUREVECTORS_FILE= "chr22_feature.vectors";
std::string GENES_FILE = "genes.txt";
};
#endif // GENERATOR_H
155 changes: 45 additions & 110 deletions src/data/generator.cpp
@@ -1,3 +1,5 @@
#include "generator.hpp"

#include <cmath>
#include <cstring>

Expand All @@ -10,110 +12,43 @@
#include <unordered_map>
#include <algorithm>

// For the command line parsing
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>

#define FEATUREVECTORS_FILE "data/datasets/chr22_feature.vectors"
#define GENES_FILE "data/datasets/genes.txt"
#define POWER_DATASET_FILE "data/datasets/DEBS2012-ChallengeData.txt"

#define DATA_FILE "data"
#define QUERY_FILE "queries"

void usage(){
std::cout << std::endl;
std::cout << "Usage:" <<std::endl;
std::cout << "-r <number_of_rows>" << std::endl;
std::cout << "-d <number_of_dimensions>" << std::endl;
std::cout << "-w <workload_choice>" << std::endl;
std::cout << " '-(0=normal, 1=clustered, 2=uniform, 3=GMRQB, 4=power)" << std::endl;
std::cout << " '- GMRQB has fixed selectivity and 19 dimensions." << std::endl;
std::cout << " '- power has fixed 4 dimensions." << std::endl;
std::cout << "-s <selectivity>" << std::endl;
std::cout << "-q <number_of_queries>" << std::endl;
std::cout << "-t <query_type>" << std::endl;
}

int main(int argc, char* argv[]) {
int64_t n_of_rows = -1;
int64_t dimensions = -1;
int64_t workload = -1;
float selectivity = -1;
int64_t number_of_queries = -1;
int64_t query_type = -1;

int c;
while ((c = getopt (argc, argv, "r:d:s:w:t:q:")) != -1){
switch (c)
{
case 'w':
workload = atoi(optarg);
break;
case 'r':
n_of_rows = atoi(optarg);
break;
case 'd':
dimensions = atoi(optarg);
break;
case 's':
selectivity = atof(optarg);
break;
case 'q':
number_of_queries = atoi(optarg);
break;
case 't':
query_type = atoi(optarg);
break;
default:
usage();
exit(-1);
}
}

if(n_of_rows == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-r <n_of_rows> required" << std::endl;
usage();
exit(-1);
}
if(dimensions == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-d <dimensions> required" << std::endl;
usage();
exit(-1);
}
if(workload == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-w <workload> required" << std::endl;
usage();
exit(-1);
}
if(selectivity == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-s <selectivity> required" << std::endl;
usage();
exit(-1);
}
if(number_of_queries == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-q <number_of_queries> required" << std::endl;
usage();
exit(-1);
}
if(query_type == -1){
query_type = 7;
exit(-1);
}

if(workload == 3)
std::cout << "INFO: " << n_of_rows << " vectors, " << 19 << " dimensions." << std::endl;
else if(workload == 4)
std::cout << "INFO: " << n_of_rows << " vectors, " << 4 << " dimensions." << std::endl;
else
std::cout << "INFO: " << n_of_rows << " vectors, " << dimensions << " dimensions." << std::endl;

//void usage(){
// std::cout << std::endl;
// std::cout << "Usage:" <<std::endl;
// std::cout << "-r <number_of_rows>" << std::endl;
// std::cout << "-d <number_of_dimensions>" << std::endl;
// std::cout << "-w <workload_choice>" << std::endl;
// std::cout << " '-(0=normal, 1=clustered, 2=uniform, 3=GMRQB, 4=power)" << std::endl;
// std::cout << " '- GMRQB has fixed selectivity and 19 dimensions." << std::endl;
// std::cout << " '- power has fixed 4 dimensions." << std::endl;
// std::cout << "-s <selectivity>" << std::endl;
// std::cout << "-q <number_of_queries>" << std::endl;
// std::cout << "-t <query_type>" << std::endl;
//}

Generator::Generator(
int64_t n_of_rows_,
int64_t dimensions_,
int64_t workload_,
float selectivity_,
int64_t number_of_queries_,
int64_t query_type_,
const std::string &power_dataset_file_,
const std::string &feature_vectors_file_,
const std::string &genes_file_
)
: n_of_rows(n_of_rows_),
dimensions(dimensions_),
workload(workload_),
selectivity(selectivity_),
number_of_queries(number_of_queries_),
query_type(query_type_),
POWER_DATASET_FILE(power_dataset_file_),
FEATUREVECTORS_FILE(feature_vectors_file_),
GENES_FILE(genes_file_)
{}

bool Generator::generate(const std::string &table_path, const std::string &workload_path){
std::vector< std::vector<float> > data_points(n_of_rows, std::vector<float>(dimensions));

// Get data or generate it
Expand All @@ -125,7 +60,7 @@ int main(int argc, char* argv[]) {
std::string line;
std::string token;

std::ofstream myfile(DATA_FILE);
std::ofstream myfile(table_path);

while (std::getline(feature_vectors, line) && i < n_of_rows) {
std::vector<float> data_point(dimensions);
Expand Down Expand Up @@ -155,7 +90,7 @@ int main(int argc, char* argv[]) {
std::string line;
std::string token;

std::ofstream myfile(DATA_FILE);
std::ofstream myfile(table_path);

while (std::getline(tuples, line) && i < n_of_rows) {
std::vector<float> data_point(dimensions);
Expand Down Expand Up @@ -188,7 +123,7 @@ int main(int argc, char* argv[]) {
mmd[1] = std::normal_distribution<double>(0.4, 0.2);
mmd[2] = std::normal_distribution<double>(0.6, 0.2);

std::ofstream myfile(DATA_FILE);
std::ofstream myfile(table_path);

for (size_t i = 0; i < n_of_rows; ++i) {
std::vector<float> data_point(dimensions);
Expand Down Expand Up @@ -223,7 +158,7 @@ int main(int argc, char* argv[]) {
std::string line;
std::string token;

std::ofstream myfile(QUERY_FILE);
std::ofstream myfile(workload_path);

std::vector<float> cols;
while (std::getline(genes, line) && i < number_of_queries) {
Expand Down Expand Up @@ -490,7 +425,7 @@ int main(int argc, char* argv[]) {
}
myfile.close();
} else {
std::ofstream myfile(QUERY_FILE);
std::ofstream myfile(workload_path);
for (size_t i = 0; i < number_of_queries; ++i) {
// myfile << "SELECT * FROM synthetic WHERE";
int first = rand() % n_of_rows;
Expand Down Expand Up @@ -527,5 +462,5 @@ int main(int argc, char* argv[]) {
myfile.close();
}

return 0;
return true;
}
124 changes: 124 additions & 0 deletions src/generator.cpp
@@ -0,0 +1,124 @@
#include <iostream>

// For the command line parsing
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>

#include "generator.hpp"

#define FEATUREVECTORS_FILE "data/datasets/chr22_feature.vectors"
#define GENES_FILE "data/datasets/genes.txt"
#define POWER_DATASET_FILE "data/datasets/DEBS2012-ChallengeData.txt"

#define DATA_FILE "data"
#define QUERY_FILE "queries"

void usage(){
std::cout << std::endl;
std::cout << "Usage:" <<std::endl;
std::cout << "-r <number_of_rows>" << std::endl;
std::cout << "-d <number_of_dimensions>" << std::endl;
std::cout << "-w <workload_choice>" << std::endl;
std::cout << " '-(0=normal, 1=clustered, 2=uniform, 3=GMRQB, 4=power)" << std::endl;
std::cout << " '- GMRQB has fixed selectivity and 19 dimensions." << std::endl;
std::cout << " '- power has fixed 4 dimensions." << std::endl;
std::cout << "-s <selectivity>" << std::endl;
std::cout << "-q <number_of_queries>" << std::endl;
std::cout << "-t <query_type>" << std::endl;
}

int main(int argc, char* argv[]) {
int64_t n_of_rows = -1;
int64_t dimensions = -1;
int64_t workload = -1;
float selectivity = -1;
int64_t number_of_queries = -1;
int64_t query_type = -1;

int c;
while ((c = getopt (argc, argv, "r:d:s:w:t:q:")) != -1){
switch (c)
{
case 'w':
workload = atoi(optarg);
break;
case 'r':
n_of_rows = atoi(optarg);
break;
case 'd':
dimensions = atoi(optarg);
break;
case 's':
selectivity = atof(optarg);
break;
case 'q':
number_of_queries = atoi(optarg);
break;
case 't':
query_type = atoi(optarg);
break;
default:
usage();
exit(-1);
}
}

if(n_of_rows == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-r <n_of_rows> required" << std::endl;
usage();
exit(-1);
}
if(dimensions == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-d <dimensions> required" << std::endl;
usage();
exit(-1);
}
if(workload == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-w <workload> required" << std::endl;
usage();
exit(-1);
}
if(selectivity == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-s <selectivity> required" << std::endl;
usage();
exit(-1);
}
if(number_of_queries == -1){
std::cout << "Errors:" << std::endl;
std::cout << "-q <number_of_queries> required" << std::endl;
usage();
exit(-1);
}
if(query_type == -1){
query_type = 7;
usage();
exit(-1);
}

if(workload == 3)
std::cout << "INFO: " << n_of_rows << " vectors, " << 19 << " dimensions." << std::endl;
else if(workload == 4)
std::cout << "INFO: " << n_of_rows << " vectors, " << 4 << " dimensions." << std::endl;
else
std::cout << "INFO: " << n_of_rows << " vectors, " << dimensions << " dimensions." << std::endl;

auto generator = Generator(n_of_rows,
dimensions,
workload,
selectivity,
number_of_queries,
query_type,
POWER_DATASET_FILE,
FEATUREVECTORS_FILE,
GENES_FILE
);

generator.generate(DATA_FILE, QUERY_FILE);

return 0;
}

0 comments on commit d55654e

Please sign in to comment.