Skip to content

Commit

Permalink
v. 2.3.0
Browse files Browse the repository at this point in the history
short k-mers (k<10) support
  • Loading branch information
a committed Sep 10, 2015
1 parent 78601be commit 0635dae
Show file tree
Hide file tree
Showing 112 changed files with 13,063 additions and 261 deletions.
Binary file modified API.pdf
Binary file not shown.
405 changes: 376 additions & 29 deletions kmc_api/kmc_file.cpp

Large diffs are not rendered by default.

53 changes: 43 additions & 10 deletions kmc_api/kmc_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
*/
Version: 2.3.0
Date : 2015-08-21
*/

#ifndef _KMC_FILE_H
#define _KMC_FILE_H
Expand All @@ -16,6 +16,19 @@
#include <string>
#include <vector>

struct CKMCFileInfo
{
uint32 kmer_length;
uint32 mode;
uint32 counter_size;
uint32 lut_prefix_length;
uint32 signature_len;
uint32 min_count;
uint64 max_count;
bool both_strands;
uint64 total_kmers;
};

class CKMCFile
{
enum open_mode {closed, opened_for_RA, opened_for_listing};
Expand Down Expand Up @@ -44,19 +57,20 @@ class CKMCFile
uint32 lut_prefix_length;
uint32 signature_len;
uint32 min_count;
uint32 max_count;
uint64 max_count;
uint64 total_kmers;
bool both_strands;

uint32 kmc_version;
uint32 sufix_size; // sufix's size in bytes
uint32 sufix_rec_size; // sufix_size + counter_size

uint32 original_min_count;
uint32 original_max_count;
uint64 original_max_count;

static uint64 part_size; // the size of a block readed to sufix_file_buf, in listing mode

bool BinarySearch(int64 index_start, int64 index_stop, const CKmerAPI& kmer, uint32& counter, uint32 pattern_offset);
bool BinarySearch(int64 index_start, int64 index_stop, const CKmerAPI& kmer, uint64& counter, uint32 pattern_offset);

// Open a file, recognize its size and check its marker. Auxiliary function.
bool OpenASingleFile(const std::string &file_name, FILE *&file_handler, uint64 &size, char marker[]);
Expand All @@ -67,8 +81,17 @@ class CKMCFile
// Reload a contents of an array "sufix_file_buf" for listing mode. Auxiliary function.
void Reload_sufix_file_buf();

// Implementation of GetCountersForRead for kmc1 database format
bool GetCountersForRead_kmc1(const std::string& read, std::vector<uint32>& counters);
// Implementation of GetCountersForRead for kmc1 database format for both strands
bool GetCountersForRead_kmc1_both_strands(const std::string& read, std::vector<uint32>& counters);

// Implementation of GetCountersForRead for kmc1 database format without choosing canonical k-mer
bool GetCountersForRead_kmc1(const std::string& read, std::vector<uint32>& counters);

using super_kmers_t = std::vector<std::tuple<uint32, uint32, uint32>>;//start_pos, len, bin_no
void GetSuperKmers(const std::string& transformed_read, super_kmers_t& super_kmers);

// Implementation of GetCountersForRead for kmc2 database format for both strands
bool GetCountersForRead_kmc2_both_strands(const std::string& read, std::vector<uint32>& counters);

// Implementation of GetCountersForRead for kmc2 database format
bool GetCountersForRead_kmc2(const std::string& read, std::vector<uint32>& counters);
Expand All @@ -86,6 +109,8 @@ class CKMCFile
// Return next kmer in CKmerAPI &kmer. Return its counter in float &count. Return true if not EOF
bool ReadNextKmer(CKmerAPI &kmer, float &count);

bool ReadNextKmer(CKmerAPI &kmer, uint64 &count); //for small k-values when counter may be longer than 4bytes

bool ReadNextKmer(CKmerAPI &kmer, uint32 &count);
// Release memory and close files in case they were opened
bool Close();
Expand All @@ -100,7 +125,10 @@ class CKMCFile
bool SetMaxCount(uint32 x);

// Return a value of max_count. Kmers with counters above this theshold are ignored
uint32 GetMaxCount(void);
uint64 GetMaxCount(void);

//Return true if kmc was run without -b switch.
bool GetBothStrands(void);

// Return the total number of kmers between min_count and max_count
uint64 KmerCount(void);
Expand All @@ -119,15 +147,20 @@ class CKMCFile

bool CheckKmer(CKmerAPI &kmer, uint32 &count);

bool CheckKmer(CKmerAPI &kmer, uint64 &count);

// Return true if kmer exists
bool IsKmer(CKmerAPI &kmer);

// Set original (readed from *.kmer_pre) values for min_count and max_count
void ResetMinMaxCounts(void);

// Get current parameters from kmer_database
bool Info(uint32 &_kmer_length, uint32 &_mode, uint32 &_counter_size, uint32 &_lut_prefix_length, uint32 &_signature_len, uint32 &_min_count, uint32 &_max_count, uint64 &_total_kmers);
bool Info(uint32 &_kmer_length, uint32 &_mode, uint32 &_counter_size, uint32 &_lut_prefix_length, uint32 &_signature_len, uint32 &_min_count, uint64 &_max_count, uint64 &_total_kmers);

// Get current parameters from kmer_database
bool Info(CKMCFileInfo& info);

// Get counters for all k-mers in read
bool GetCountersForRead(const std::string& read, std::vector<uint32>& counters);
bool GetCountersForRead(const std::string& read, std::vector<float>& counters);
Expand Down
4 changes: 2 additions & 2 deletions kmc_api/kmer_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/


Expand Down
40 changes: 38 additions & 2 deletions kmc_api/kmer_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#ifndef _KMER_API_H
Expand All @@ -15,6 +15,7 @@ Date : 2015-04-15
#include "kmer_defs.h"
#include <string>
#include <iostream>
#include <vector>
#include "mmer.h"
class CKMCFile;

Expand Down Expand Up @@ -65,6 +66,20 @@ class CKmerAPI
}
kmer_data[no_of_rows - 1] += (uint64)val << (62 - (((kmer_length - 1 + byte_alignment) & 31) * 2));
}

//----------------------------------------------------------------------------------
inline void SHR_insert2bits(uchar val)
{
for (uint32 i = no_of_rows - 1; i > 0; --i)
{
kmer_data[i] >>= 2;
kmer_data[i] += kmer_data[i - 1] << 62;
}
kmer_data[0] >>= 2;
kmer_data[no_of_rows - 1] &= ~((1ull << ((32 - (kmer_length + byte_alignment - (no_of_rows - 1) * 32)) * 2)) - 1);//mask falling of symbol
kmer_data[0] += ((uint64)val << 62) >> (byte_alignment * 2);
}

// ----------------------------------------------------------------------------------
inline void from_binary(const char* kmer)
{
Expand All @@ -73,6 +88,14 @@ class CKmerAPI
insert2bits(i, kmer[i]);
}

// ----------------------------------------------------------------------------------
inline void from_binary_rev(const char* kmer)
{
clear();
for (uint32 i = 0; i < kmer_length; ++i)
insert2bits(i, 3 - kmer[kmer_length - i - 1]);
}

// ----------------------------------------------------------------------------------
template<typename RandomAccessIterator>
inline void to_string_impl(RandomAccessIterator iter)
Expand Down Expand Up @@ -424,6 +447,19 @@ class CKmerAPI
str[kmer_length] = '\0';
};


inline void to_long(std::vector<uint64>& kmer)
{
kmer.resize(no_of_rows);
uint32 offset = 62 - ((kmer_length - 1 + byte_alignment) & 31) * 2;
for (int32 i = no_of_rows - 1; i >= 1; --i)
{
kmer[i] = kmer_data[i] >> offset;
kmer[i] += kmer_data[i - 1] << (64 - offset);
}
kmer[0] = kmer_data[0] >> offset;
}

//-----------------------------------------------------------------------
// Convert kmer into string (an alphabet ACGT)
// OUT : str - string kmer
Expand Down
8 changes: 4 additions & 4 deletions kmc_api/kmer_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/


#ifndef _KMER_DEFS_H
#define _KMER_DEFS_H

#define KMC_VER "2.2.0"
#define KMC_DATE "2015-04-15"
#define KMC_VER "2.3.0"
#define KMC_DATE "2015-08-21"

#define MIN(x,y) ((x) < (y) ? (x) : (y))

Expand Down
4 changes: 2 additions & 2 deletions kmc_api/mmer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#include "../kmc_api/mmer.h"
Expand Down
4 changes: 2 additions & 2 deletions kmc_api/mmer.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#ifndef _MMER_H
Expand Down
8 changes: 4 additions & 4 deletions kmc_dump/kmc_dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#include "stdafx.h"
Expand Down Expand Up @@ -85,7 +85,7 @@ int _tmain(int argc, char* argv[])
uint32 _lut_prefix_length;
uint32 _signature_len;
uint32 _min_count;
uint32 _max_count;
uint64 _max_count;
uint64 _total_kmers;

kmer_data_base.Info(_kmer_length, _mode, _counter_size, _lut_prefix_length, _signature_len, _min_count, _max_count, _total_kmers);
Expand Down Expand Up @@ -118,7 +118,7 @@ int _tmain(int argc, char* argv[])
}
else
{
uint32 counter;
uint64 counter;
while (kmer_data_base.ReadNextKmer(kmer_object, counter))
{
kmer_object.to_string(str);
Expand Down
4 changes: 2 additions & 2 deletions kmc_dump/nc_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#include "stdafx.h"
Expand Down
4 changes: 2 additions & 2 deletions kmc_dump/nc_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#include <string>
Expand Down
6 changes: 3 additions & 3 deletions kmc_dump_sample/kmc_dump_sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 2.2.0
Date : 2015-04-15
Version: 2.3.0
Date : 2015-08-21
*/

#include "stdafx.h"
Expand Down Expand Up @@ -83,7 +83,7 @@ int _tmain(int argc, char* argv[])
uint32 _lut_prefix_length;
uint32 _signature_len;
uint32 _min_count;
uint32 _max_count;
uint64 _max_count;
uint64 _total_kmers;

kmer_data_base.Info(_kmer_length, _mode, _counter_size, _lut_prefix_length, _signature_len, _min_count, _max_count, _total_kmers);
Expand Down
Binary file added kmc_tools.pdf
Binary file not shown.
23 changes: 23 additions & 0 deletions kmc_tools/asmlib_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
This file is a part of KMC software distributed under GNU GPL 3 licence.
The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
Authors: Marek Kokot
Version: 2.3.0
Date : 2015-08-21
*/


#ifndef _ASMLIB_WRAPPER_H
#define _ASMLIB_WRAPPER_H

#include "defs.h"
#ifdef DISABLE_ASMLIB
#define A_memcpy memcpy
#define SetMemcpyCacheLimit(X)
#else
#include "libs/asmlib.h"
#endif

#endif
Loading

0 comments on commit 0635dae

Please sign in to comment.