Browse files

towards a new capped impl; NamespaceDetails readability; cleaning

  • Loading branch information...
1 parent 14296b4 commit b904b0dd1d6999a6f88cd710ccbf8b604ae9bb1d @dwight dwight committed Sep 9, 2010
Showing with 265 additions and 92 deletions.
  1. +0 −2 db/capped-inl.h
  2. +144 −1 db/capped.cpp
  3. +26 −22 db/capped.h
  4. +20 −24 db/diskloc.h
  5. +3 −4 db/extsort.cpp
  6. +9 −4 db/namespace.cpp
  7. +14 −2 db/namespace.h
  8. +12 −7 db/pdfile.cpp
  9. +2 −2 db/pdfile.h
  10. +2 −2 db/rec.h
  11. +1 −1 db/tests.cpp
  12. +14 −6 util/mmap.cpp
  13. +11 −7 util/mmap.h
  14. +7 −8 util/mmap_win.cpp
View
2 db/capped-inl.h
@@ -1,2 +0,0 @@
-// @file capped_impl.h
-
View
145 db/capped.cpp
@@ -3,10 +3,153 @@
#include "pch.h"
#include "capped.h"
+#include "namespace.h"
+#include "nonce.h"
+#include "../util/mongoutils/str.h"
+#include "../util/mmap.h"
+
+using namespace mongoutils;
namespace mongo {
- CappedCollection::~CappedCollection() {
+ /* file is chunked up into pieces or "regions" this size. You cannot store a record larger than that. */
+ const unsigned RegionSize = 256 * 1024 * 1024;
+
+#pragma pack(1)
+ /** each capped collection has its own file */
+ struct FileHeader {
+ char ns[512];
+ char reserved[512];
+ unsigned ver1;
+ unsigned ver2;
+ unsigned long long fileSize;
+ unsigned regionSize;
+ unsigned nRegions;
+ char reserved3[8192 - 1024 - 3*8];
+ };
+ struct RecordHeader {
+ unsigned long long ord;
+ unsigned recordSize;
+ // then: data[recordSize]
+ // then: unsigned recordSizeRepeats;
+ };
+ struct Region {
+ unsigned size; // size of this region. the last region in the file may be smaller than the rest
+ unsigned zero; // future
+ char reserved[8192 - 8];
+ union {
+ char data[ RegionSize - 8192 ];
+ RecordHeader firstRecord;
+ };
+ };
+ struct File {
+ FileHeader fileHeader;
+ Region regions[1];
+ };
+#pragma pack()
+
+ class CappedCollection2 : boost::noncopyable {
+ public:
+ MMF file;
+ MMF::Pointer ptr;
+ Region* regionHeader(unsigned i) {
+ size_t ofs = 8192 + i * RegionSize;
+ return (Region *) ptr.at(ofs, 8);
+ }
+ RecordHeader* firstRecordHeaderForRegion(unsigned i) {
+ size_t ofs = 8192 + i * RegionSize + 8192;
+ return (RecordHeader *) ptr.at(ofs, 8);
+ }
+ };
+
+ void findAFileNumber(boost::filesystem::path p, unsigned& fileNumber, unsigned toTry) {
+ if( toTry == 0 ) return; // 0 not allowed that's our null sentinel
+ p /= (str::stream() << hex << toTry);
+ if( MMF::exists(p) )
+ return;
+ fileNumber = toTry;
+ }
+
+ string filename(string path, string db, unsigned fileNumber) {
+ boost::filesystem::path p(path);
+ stringstream ss;
+ ss << db << ".c_" << hex << fileNumber;
+ p /= ss.str();
+ return p.string();
+ }
+
+ void cappedcollection::open(string path, string db, NamespaceDetails *nsd) {
+ if( nsd->capped2.cc2_ptr ) // already open
+ return;
+ assertInWriteLock();
+ uassert(13466, str::stream() << "bad filenumber for capped collection in db " << db << " - try repair?", nsd->capped2.fileNumber);
+ auto_ptr<CappedCollection2> cc( new CappedCollection2 );
+ cc->ptr = cc->file.map( filename(path, db, nsd->capped2.fileNumber).c_str() );
+ nsd->capped2.cc2_ptr = (unsigned long long) cc.release();
+ }
+
+ void cappedcollection::create(string path, string ns, NamespaceDetails *nsd, unsigned long long sz) {
+ assertInWriteLock();
+ boost::filesystem::path p(path);
+ NamespaceString nsstr(ns);
+ {
+ stringstream ss;
+ ss << nsstr.db << ".c_";
+ p /= ss.str();
+ }
+
+ massert(13467, str::stream() << "bad cappedcollection::create filenumber - try repair?" << ns, nsd->capped2.fileNumber == 0);
+
+ unsigned n = 0;
+ for( int i = 0; i < 3 && n == 0; i++ ) {
+ findAFileNumber(p, n, ((unsigned) rand()) % 0x100);
+ }
+ for( int i = 0; i < 3 && n == 0; i++ ) {
+ findAFileNumber(p, n, ((unsigned) rand()) % 0x1000);
+ }
+ for( int i = 0; i < 10 && n == 0; i++ ) {
+ findAFileNumber(p, n, (unsigned) security.getNonce());
+ }
+ if( n == 0 ) {
+ uasserted(10000, str::stream() << "couldn't find a file number to assign to new capped collection " << ns);
+ }
+
+ {
+ unsigned long long realSize;
+ if( sz >= 1024 * 1024 )
+ realSize = sz & 0x1fff;
+ else {
+ realSize = (sz + 0x1fff) & (~0x1fffLL);
+ assert(realSize >= sz);
+ realSize += 16 * 1024;
+ }
+
+ auto_ptr<CappedCollection2> cc( new CappedCollection2 );
+ cc->ptr = cc->file.create( filename(path, nsstr.db, n), realSize, true);
+ {
+ // init file header
+ FileHeader *h = (FileHeader *) cc->ptr.at(0, 8192);
+ if( ns.size() < sizeof(h->ns) )
+ strcpy(h->ns, ns.c_str());
+ h->ver1 = 1;
+ h->ver2 = 1;
+ h->fileSize = realSize;
+ h->regionSize = RegionSize;
+ h->nRegions = (unsigned) ((realSize - 8192) / RegionSize);
+ assert( h->nRegions >= 1 );
+
+ // init region headers
+ for( unsigned r = 0; r < h->nRegions-1; r++ ) {
+ Region *region = cc->regionHeader(r);
+ region->size = RegionSize;
+ }
+ unsigned long long left = realSize - 8192 - (h->nRegions - 1) * RegionSize;
+ assert( left > 0 && left <= RegionSize );
+ cc->regionHeader(h->nRegions-1)->size = (unsigned) left;
+ }
+ nsd->capped2.cc2_ptr = (unsigned long long) cc.release();
+ nsd->capped2.fileNumber = n;
+ }
}
}
View
48 db/capped.h
@@ -7,30 +7,34 @@
namespace mongo {
- class CappedCollection : boost::noncopyable {
- public:
- /* open */
- CappedCollection(string z, const string &dbpath, const string& ns);
-
- /* create */
- CappedCollection(const string& dbpath, const string& ns, unsigned long long size);
-
- /* closes file */
- ~CappedCollection();
-
- struct InsertJob : boost::noncopyable {
- void * data;
- DiskLoc loc;
- InsertJob(CappedCollection *cc);
- ~InsertJob();
- private:
- CappedCollection *_cc;
+ class NamespaceDetails;
+
+ namespace cappedcollection {
+ // must be in write lock for these:
+ void create(string path, string ns, NamespaceDetails *nsd, unsigned long long approxSize);
+ void open (string path, string db, NamespaceDetails *nsd);
+
+ class Insert : boost::noncopyable {
+ public:
+ void* start(unsigned recLen);
+ DiskLoc finish();
+ //private:
+ // CappedCollection *_cc;
};
- void truncateFrom(DiskLoc x);
-
- // drop?
- };
+ }
+
+ /* operations
+ closefile???
+ open
+ create
+ insert
+ insert getting a ptr and then finishing
+ truncateFrom
+ drop
+ empty
+ cursors
+ */
class ForwardCappedCursor2 : public BasicCursor, public AdvanceStrategy {
public:
View
44 db/diskloc.h
@@ -38,91 +38,87 @@ namespace mongo {
(such as adding a virtual function)
*/
class DiskLoc {
- int fileNo; // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
+ int _a; // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
int ofs;
public:
enum SentinelValues {
- MaxFiles=16000, // thus a limit of about 32TB of data per db
- NullOfs = -1
+ NullOfs = -1,
+ MaxFiles=16000 // thus a limit of about 32TB of data per db
};
- DiskLoc(int a, int b) : fileNo(a), ofs(b) { }
+ DiskLoc(int a, int b) : _a(a), ofs(b) { }
DiskLoc() { Null(); }
DiskLoc(const DiskLoc& l) {
- fileNo=l.fileNo;
+ _a=l._a;
ofs=l.ofs;
}
bool questionable() const {
return ofs < -1 ||
- fileNo < -1 ||
- fileNo > 524288;
+ _a < -1 ||
+ _a > 524288;
}
- bool isNull() const { return fileNo == -1; }
+ bool isNull() const { return _a == -1; }
void Null() {
- fileNo = NullOfs;
- ofs = 0;
+ _a = -1;
+ ofs = 0; /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
}
void assertOk() { assert(!isNull()); }
void setInvalid() {
- fileNo = -2;
+ _a = -2;
ofs = 0;
}
bool isValid() const {
- return fileNo != -2;
+ return _a != -2;
}
string toString() const {
if ( isNull() )
return "null";
stringstream ss;
- ss << hex << fileNo << ':' << ofs;
+ ss << hex << _a << ':' << ofs;
return ss.str();
}
BSONObj toBSONObj() const {
- return BSON( "file" << fileNo << "offset" << ofs );
+ return BSON( "file" << _a << "offset" << ofs );
}
- int a() const { return fileNo; }
+ int a() const { return _a; }
int& GETOFS() { return ofs; }
int getOfs() const { return ofs; }
void set(int a, int b) {
- fileNo=a;
+ _a=a;
ofs=b;
}
- void setOfs(int _fileNo, int _ofs) {
- fileNo = _fileNo;
- ofs = _ofs;
- }
void inc(int amt) {
assert( !isNull() );
ofs += amt;
}
bool sameFile(DiskLoc b) {
- return fileNo == b.fileNo;
+ return _a== b._a;
}
bool operator==(const DiskLoc& b) const {
- return fileNo==b.fileNo && ofs == b.ofs;
+ return _a==b._a&& ofs == b.ofs;
}
bool operator!=(const DiskLoc& b) const {
return !(*this==b);
}
const DiskLoc& operator=(const DiskLoc& b) {
- fileNo=b.fileNo;
+ _a=b._a;
ofs = b.ofs;
//assert(ofs!=0);
return *this;
}
int compare(const DiskLoc& b) const {
- int x = fileNo - b.fileNo;
+ int x = _a - b._a;
if ( x )
return x;
return ofs - b.ofs;
View
7 db/extsort.cpp
@@ -218,14 +218,13 @@ namespace mongo {
// -----------------------------------
BSONObjExternalSorter::FileIterator::FileIterator( string file ){
- long length;
+ unsigned long long length;
_buf = (char*)_file.map( file.c_str() , length , MemoryMappedFile::SEQUENTIAL );
massert( 10308 , "mmap failed" , _buf );
- assert( (unsigned long long)length == (unsigned long long)file_size( file ) );
+ assert( length == file_size( file ) );
_end = _buf + length;
}
- BSONObjExternalSorter::FileIterator::~FileIterator(){
- }
+ BSONObjExternalSorter::FileIterator::~FileIterator() {}
bool BSONObjExternalSorter::FileIterator::more(){
return _buf < _end;
View
13 db/namespace.cpp
@@ -67,6 +67,9 @@ namespace mongo {
reservedA = 0;
extraOffset = 0;
backgroundIndexBuildInProgress = 0;
+ reservedB = 0;
+ capped2.cc2_ptr = 0;
+ capped2.fileNumber = 0;
memset(reserved, 0, sizeof(reserved));
}
@@ -104,6 +107,7 @@ namespace mongo {
log() << "backgroundIndexBuildInProgress was " << backgroundIndexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl;
backgroundIndexBuildInProgress = 0;
}
+ capped2.cc2_ptr = 0;
}
static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) {
@@ -125,7 +129,7 @@ namespace mongo {
i.dbDropped();
}
*/
- int len = -1;
+ unsigned long long len = 0;
boost::filesystem::path nsPath = path();
string pathString = nsPath.string();
MMF::Pointer p;
@@ -143,10 +147,10 @@ namespace mongo {
// use lenForNewNsFiles, we are making a new database
massert( 10343 , "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 );
maybeMkdir();
- long l = lenForNewNsFiles;
+ unsigned long long l = lenForNewNsFiles;
p = f.map(pathString.c_str(), l);
if( !p.isNull() ) {
- len = (int) l;
+ len = l;
assert( len == lenForNewNsFiles );
}
}
@@ -156,7 +160,8 @@ namespace mongo {
dbexit( EXIT_FS );
}
- ht = new HashTable<Namespace,NamespaceDetails,MMF::Pointer>(p, len, "namespace index");
+ assert( len <= 0x7fffffff );
+ ht = new HashTable<Namespace,NamespaceDetails,MMF::Pointer>(p, (int) len, "namespace index");
if( checkNsFilesOnLoad )
ht->iterAll(namespaceOnLoadCallback);
}
View
16 db/namespace.h
@@ -113,7 +113,7 @@ namespace mongo {
public:
enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 };
- /* data fields, as present on disk : */
+ /*-------- data fields, as present on disk : */
DiskLoc firstExtent;
DiskLoc lastExtent;
/* NOTE: capped collections v1 override the meaning of deletedList.
@@ -124,28 +124,40 @@ namespace mongo {
yet computed.
*/
DiskLoc deletedList[Buckets];
+ // ofs 168 (8 byte aligned)
long long datasize;
long long nrecords;
int lastExtentSize;
int nIndexes;
private:
+ // ofs 192
IndexDetails _indexes[NIndexesBase];
public:
+ // ofs 352 (16 byte aligned)
int capped;
int max; // max # of objects for a capped table. TODO: should this be 64 bit?
double paddingFactor; // 1.0 = no padding.
+ // ofs 386 (16)
int flags;
DiskLoc capExtent;
DiskLoc capFirstNewRecord;
unsigned short dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h
unsigned short indexFileVersion;
unsigned long long multiKeyIndexBits;
private:
+ // ofs 400 (16)
unsigned long long reservedA;
long long extraOffset; // where the $extra info is located (bytes relative to this)
public:
int backgroundIndexBuildInProgress; // 1 if in prog
- char reserved[76];
+ unsigned reservedB;
+ // ofs 424 (8)
+ struct Capped2 {
+ unsigned long long cc2_ptr; // see capped.cpp
+ unsigned fileNumber;
+ } capped2;
+ char reserved[60];
+ /*-------- end data 496 bytes */
explicit NamespaceDetails( const DiskLoc &loc, bool _capped );
View
19 db/pdfile.cpp
@@ -376,8 +376,13 @@ namespace mongo {
}
return;
}
-
- _p = mmf.map(filename, size);
+
+ {
+ unsigned long long sz = size;
+ _p = mmf.map(filename, sz);
+ assert( sz <= 0x7fffffff );
+ size = (int) sz;
+ }
header = (DataFileHeader *) _p.at(0, DataFileHeader::HeaderSize);
if( sizeof(char *) == 4 )
uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", header);
@@ -429,9 +434,9 @@ namespace mongo {
return cc().database()->addAFile( 0, true )->createExtent(ns, approxSize, newCapped, loops+1);
}
int offset = header->unused.getOfs();
- header->unused.setOfs( fileNo, offset + ExtentSize );
+ header->unused.set( fileNo, offset + ExtentSize );
header->unusedLength -= ExtentSize;
- loc.setOfs(fileNo, offset);
+ loc.set(fileNo, offset);
Extent *e = _getExtent(loc);
DiskLoc emptyLoc = e->init(ns, ExtentSize, fileNo, offset);
@@ -536,7 +541,7 @@ namespace mongo {
/* assumes already zeroed -- insufficient for block 'reuse' perhaps */
DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset) {
magic = 0x41424344;
- myLoc.setOfs(_fileNo, _offset);
+ myLoc.set(_fileNo, _offset);
xnext.Null();
xprev.Null();
nsDiagnostic = nsname;
@@ -850,13 +855,13 @@ namespace mongo {
if ( todelete->nextOfs == DiskLoc::NullOfs )
e->firstRecord.Null();
else
- e->firstRecord.setOfs(dl.a(), todelete->nextOfs);
+ e->firstRecord.set(dl.a(), todelete->nextOfs);
}
if ( e->lastRecord == dl ) {
if ( todelete->prevOfs == DiskLoc::NullOfs )
e->lastRecord.Null();
else
- e->lastRecord.setOfs(dl.a(), todelete->prevOfs);
+ e->lastRecord.set(dl.a(), todelete->prevOfs);
}
}
View
4 db/pdfile.h
@@ -310,7 +310,7 @@ namespace mongo {
fileLength = filelength;
version = VERSION;
versionMinor = VERSION_MINOR;
- unused.setOfs( fileno, HeaderSize );
+ unused.set( fileno, HeaderSize );
assert( (data-(char*)this) == HeaderSize );
unusedLength = fileLength - HeaderSize - 16;
//memcpy(data+unusedLength, " \nthe end\n", 16);
@@ -396,7 +396,7 @@ namespace mongo {
return BSONObj(rec());
}
inline DeletedRecord* DiskLoc::drec() const {
- assert( fileNo != -1 );
+ assert( _a != -1 );
return (DeletedRecord*) rec();
}
inline Extent* DiskLoc::ext() const {
View
4 db/rec.h
@@ -123,12 +123,12 @@ extern StoreToUse *btreeStore;
const int BucketSize = 8192;
inline BtreeBucket* DiskLoc::btree() const {
- assert( fileNo != -1 );
+ assert( _a != -1 );
return (BtreeBucket*) btreeStore->get(*this, BucketSize);
}
inline BtreeBucket* DiskLoc::btreemod() const {
- assert( fileNo != -1 );
+ assert( _a != -1 );
BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize);
btreeStore->modified(*this);
return b;
View
2 db/tests.cpp
@@ -32,7 +32,7 @@ namespace mongo {
MemoryMappedFile f;
- long len = 64*1024*1024;
+ unsigned long long len = 64*1024*1024;
char *p = (char *) f.map("/tmp/test.dat", len);
char *start = p;
char *end = p + 64*1024*1024-2;
View
20 util/mmap.cpp
@@ -22,20 +22,28 @@
namespace mongo {
- /*static*/ void MemoryMappedFile::updateLength( const char *filename, long &length ) {
+ /* Create. Must not exist.
+ @param zero fill file with zeros when true
+ */
+ void* MemoryMappedFile::create(string filename, unsigned long long len, bool zero) {
+ uassert( 13468, string("can't create file already exists ") + filename, !exists(filename) );
+ void *p = map(filename.c_str(), len);
+ if( p )
+ memset(p, 0, len);
+ return p;
+ }
+
+ /*static*/ void MemoryMappedFile::updateLength( const char *filename, unsigned long long &length ) {
if ( !boost::filesystem::exists( filename ) )
return;
// make sure we map full length if preexisting file.
boost::uintmax_t l = boost::filesystem::file_size( filename );
- assert( l <= 0x7fffffff );
- length = (long) l;
+ length = l;
}
void* MemoryMappedFile::map(const char *filename) {
boost::uintmax_t l = boost::filesystem::file_size( filename );
- assert( l <= 0x7fffffff );
- long i = (long)l;
- return map( filename , i );
+ return map( filename , l );
}
void printMemInfo( const char * where ){
View
18 util/mmap.h
@@ -48,7 +48,7 @@ namespace mongo {
public:
virtual ~MongoFile() {}
- virtual long length() = 0;
+ virtual unsigned long long length() const = 0;
enum Options {
SEQUENTIAL = 1 // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
@@ -156,7 +156,12 @@ namespace mongo {
/* Creates with length if DNE, otherwise uses existing file length,
passed length.
*/
- void* map(const char *filename, long &length, int options = 0 );
+ void* map(const char *filename, unsigned long long &length, int options = 0 );
+
+ /* Create. Must not exist.
+ @param zero fill file with zeros when true
+ */
+ void* create(string filename, unsigned long long len, bool zero);
void flush(bool sync);
virtual Flushable * prepareFlush();
@@ -165,19 +170,18 @@ namespace mongo {
return view;
}*/
- long length() {
- return len;
- }
+ long shortLength() const { return (long) len; }
+ unsigned long long length() const { return len; }
string filename() const { return _filename; }
private:
- static void updateLength( const char *filename, long &length );
+ static void updateLength( const char *filename, unsigned long long &length );
HANDLE fd;
HANDLE maphandle;
void *view;
- long len;
+ unsigned long long len;
string _filename;
#ifdef _WIN32
View
15 util/mmap_win.cpp
@@ -44,9 +44,9 @@ namespace mongo {
fd = 0;
}
- unsigned mapped = 0;
+ unsigned long long mapped = 0;
- void* MemoryMappedFile::map(const char *filenameIn, long &length, int options) {
+ void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) {
_filename = filenameIn;
/* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */
char filename[256];
@@ -75,23 +75,22 @@ namespace mongo {
GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ,
NULL, OPEN_ALWAYS, createOptions , NULL);
if ( fd == INVALID_HANDLE_VALUE ) {
- out() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl;
+ log() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl;
return 0;
}
mapped += length;
- maphandle = CreateFileMapping(fd, NULL, PAGE_READWRITE, 0, length, NULL);
+ maphandle = CreateFileMapping(fd, NULL, PAGE_READWRITE, length >> 32, (unsigned) length, NULL);
if ( maphandle == NULL ) {
- out() << "CreateFileMapping failed " << filename << ' ' << GetLastError() << endl;
+ log() << "CreateFileMapping failed " << filename << ' ' << GetLastError() << endl;
return 0;
}
view = MapViewOfFile(maphandle, FILE_MAP_ALL_ACCESS, 0, 0, 0);
if ( view == 0 ) {
- out() << "MapViewOfFile failed " << filename << " " << errnoWithDescription() << " ";
- out() << GetLastError();
- out() << endl;
+ log() << "MapViewOfFile failed " << filename << " " << errnoWithDescription() << " " <<
+ GetLastError() << endl;
}
len = length;
return view;

0 comments on commit b904b0d

Please sign in to comment.