Permalink
Browse files

Tag types system is now in place and integrated

    - Added type file creation in generate_testdb.pl
    - Added logging functions from bbfs
    - Added in some missing types.h includes (query.c, tagdb.c)
    - newdb now takes an argument for the types file
    - Cleaned up naming in _dbstruct_from_file
    - Changed tagfs_getattr to identify more exotic queries
    - Tag tables now store the values associated with a file, but no additonal work is necessary to do the insert
    - Brought tests up to date (especially ttdb)
    - And fixed a few minor bugs here and there
  • Loading branch information...
1 parent 3a1c0d5 commit fa28f9e92e6d5b1f112ca7d043e3605a5340e48d Mark Watts committed Apr 8, 2012
Showing with 725 additions and 229 deletions.
  1. +9 −9 Makefile
  2. +40 −0 NOTES
  3. +21 −2 generate_testdb.pl
  4. +176 −0 log.c
  5. +16 −0 log.h
  6. +2 −0 params.h
  7. +58 −19 query.c
  8. +1 −1 query.h
  9. +21 −2 set_ops.c
  10. +44 −20 tagdb.c
  11. +6 −1 tagdb.h
  12. +51 −27 tagdb_priv.c
  13. +68 −29 tagfs.c
  14. +28 −23 test_query.c
  15. +33 −25 test_tagdb.c
  16. +103 −65 test_tagfs.c
  17. +41 −0 types.c
  18. +7 −2 types.h
  19. +0 −4 util.c
View
@@ -1,9 +1,9 @@
-tagfs: tagfs.c tagdb.c util.c code_table.c tokenizer.c stream.c set_ops.c query.c tagdb_priv.c
- gcc -o tagfs `pkg-config --libs --cflags glib-2.0 fuse` tagfs.c tagdb.c util.c code_table.c tokenizer.c stream.c set_ops.c query.c tagdb_priv.c
-ttfs: test_tagfs.c tagdb.c util.c tokenizer.c code_table.c stream.c tagdb_priv.c set_ops.c query.c
- gcc -g -o ttfs `pkg-config --libs --cflags glib-2.0` test_tagfs.c tagdb.c util.c tokenizer.c code_table.c stream.c tagdb_priv.c set_ops.c query.c
-ttdb: test_tagdb.c tagdb.c code_table.c util.c tokenizer.c set_ops.c stream.c
- gcc -g -o ttdb `pkg-config --libs --cflags glib-2.0` test_tagdb.c tagdb.c code_table.c util.c tokenizer.c set_ops.c stream.c
+tagfs: tagfs.c tagdb.c util.c code_table.c tokenizer.c stream.c set_ops.c query.c tagdb_priv.c log.c types.c
+ gcc -o tagfs `pkg-config --libs --cflags glib-2.0 fuse` tagfs.c tagdb.c util.c code_table.c tokenizer.c stream.c set_ops.c query.c tagdb_priv.c log.c types.c
+ttfs: test_tagfs.c tagdb.c util.c tokenizer.c code_table.c stream.c tagdb_priv.c set_ops.c query.c types.c
+ gcc -g -o ttfs `pkg-config --libs --cflags glib-2.0` test_tagfs.c tagdb.c util.c tokenizer.c code_table.c stream.c tagdb_priv.c set_ops.c query.c types.c
+ttdb: test_tagdb.c tagdb.c code_table.c util.c tokenizer.c set_ops.c stream.c tagdb_priv.c query.c types.c
+ gcc -g -o ttdb `pkg-config --libs --cflags glib-2.0` test_tagdb.c tagdb.c code_table.c util.c tokenizer.c set_ops.c stream.c tagdb_priv.c query.c types.c
tcmd: test_cmd.c tagdb.c util.c
gcc -g -o tcmd `pkg-config --libs --cflags glib-2.0` test_cmd.c tagdb.c util.c
tct: test_code_table.c util.c code_table.c
@@ -12,9 +12,9 @@ ths: util.c test_hash_sets.c set_ops.c
gcc -g -o ths `pkg-config --libs --cflags glib-2.0` test_hash_sets.c util.c set_ops.c
ttk: test_tokenizer.c tokenizer.c stream.c
gcc -g -o ttk `pkg-config --libs --cflags glib-2.0` test_tokenizer.c tokenizer.c stream.c
-tq: query.c test_query.c tagdb.c tokenizer.c stream.c util.c code_table.c tagdb_priv.c set_ops.c
- gcc -g -o tq `pkg-config --libs --cflags glib-2.0` test_query.c query.c tagdb.c tokenizer.c stream.c util.c code_table.c tagdb_priv.c set_ops.c
+tq: query.c test_query.c tagdb.c tokenizer.c stream.c util.c code_table.c tagdb_priv.c set_ops.c types.c
+ gcc -g -o tq `pkg-config --libs --cflags glib-2.0` test_query.c query.c tagdb.c tokenizer.c stream.c util.c code_table.c tagdb_priv.c set_ops.c types.c
testdb:
- ./generate_testdb.pl test.db 10 50 10 copies
+ ./generate_testdb.pl test.db 6 50 10 copies
clean:
rm *.o
View
40 NOTES
@@ -14,3 +14,43 @@ so that they are hidden while you can still cd to those directories without
the dot. It shouldn't create any conflicts sense tags with filetype names
should refer to files with that type. That all is ultimately up to the user
though.
+
+Tag Types
+----------
+The tag types are given in the name.types file corresponding to the name.db file for a tagdb.
+A tag's type is created when the tag is created and saved when the database is saved. No code
+other than that dealing with the actual storage and retrieval of the database files
+cares about the inclusion of types. However, including them allows us to do some pretty fancy
+stuff without knowing about the type of a tag beforehand.
+
+In terms of actual code, the tagdb struct gets a new member tag_types a pointer to GHashTable.
+This table has the form
+{ tag_code_1=>value_type_1,
+ tag_code_2=>value_type_2, ...}
+Where
+ tag_code_n is an integer tag_code which must be the same as those for the other tables--
+ that is, it uses tag_codes for the "tag_code" s :p
+and
+ value_type_n is an integer type code corresponding to those in types.h. These are also
+ the ones used for query results. Note that the types include record types which may not
+ be translatable into persistant disk storage easily.
+
+The method that fills this data structure from name.types must skip the comments in this file
+(they start with #) and for every non-comment line,
+ 1) read in the tag name (all chars upto ':') and associate it with a code
+ 2) read in the type (everything after the ':') and convert it to an integer (like atoi)
+ 3) Store the tag name and type into the tag_types structure
+There isn't a real sanity check for this process, but if a tag shows up twice, we just
+give it the type of the last entry.
+
+Once we have the tag_types structure we can dispatch on tag type like:
+ switch (tagdb_get_tag_type(db, tag))
+ {
+ case (tagdb_dict_t):
+ //do dict stuff
+ case (tagdb_str_t):
+ //do string stuff
+ }
+or use a function table since switch statements are EVIL (yeah, right). Also, when reading in
+the database, reading in types should obviously be done before reading in the tags so we know
+how to store the data we get in there.
View
@@ -1,5 +1,6 @@
#!/usr/bin/env perl
-
+use Data::Dumper;
+my %tags = ();
sub random_tags_upto_max
{
my $max = shift @_;
@@ -16,7 +17,9 @@ sub random_tags_upto_max
$no = int(rand($max));
$in = grep {$_ eq $no} @used;
} until ($in == 0);
- push @res, "tag" . sprintf("%03d", $no) . ":" . sprintf("%03d", $no);
+ $tname = "tag" . sprintf("%03d", $no);
+ $tags{$tname} = 2; # 2 == tagdb_int_t
+ push @res, $tname . ":" . sprintf("%03d", $no);
}
join ",", @res;
}
@@ -34,16 +37,32 @@ sub numbered_file_with_tags_upto_max
$max_tags_per_file);
}
+sub make_types_file
+{
+ open(F, ">", $_[0]);
+ for my $key ( keys %tags )
+ {
+ my $value = $tags{$key};
+ print F "$key:$value\n";
+ }
+}
my $name = shift;
my $size = shift;
my $max_tags = shift;
my $max_tags_per_file = shift;
my $copies_dir = shift;
my @files = ();
+(my $types_name = $name) =~ s/(\..*)$/.types/;
open(FILE, ">", $name);
+for my $f (glob($copies_dir . "/*"))
+{
+ print "Unlinking $f\n";
+ unlink $f;
+}
for my $i (1 .. $size)
{
push @files, numbered_file_with_tags_upto_max($i, $max_tags,
$max_tags_per_file, $copies_dir);
}
print FILE join " ", @files;
+make_types_file($types_name);
View
176 log.c
@@ -0,0 +1,176 @@
+// Since the point of this filesystem is to learn FUSE and its
+// datastructures, I want to see *everything* that happens related to
+// its data structures. This file contains macros and functions to
+// accomplish this.
+
+#include "params.h"
+
+#include <fuse.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "log.h"
+
+FILE *log_open(const char *name)
+{
+ FILE *logfile;
+
+ // very first thing, open up the logfile and mark that we got in
+ // here. If we can't open the logfile, we're dead.
+ logfile = fopen(name, "w");
+ if (logfile == NULL) {
+ perror("logfile");
+ exit(EXIT_FAILURE);
+ }
+
+ // set logfile to line buffering
+ setvbuf(logfile, NULL, _IOLBF, 0);
+
+ return logfile;
+}
+
+void log_msg(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+
+ vfprintf(TAGFS_DATA->logfile, format, ap);
+}
+
+// struct fuse_file_info keeps information about files (surprise!).
+// This dumps all the information in a struct fuse_file_info. The struct
+// definition, and comments, come from /usr/include/fuse/fuse_common.h
+// Duplicated here for convenience.
+void log_fi (struct fuse_file_info *fi)
+{
+ /** Open flags. Available in open() and release() */
+ // int flags;
+ log_struct(fi, flags, 0x%08x, );
+
+ /** Old file handle, don't use */
+ // unsigned long fh_old;
+ log_struct(fi, fh_old, 0x%08lx, );
+
+ /** In case of a write operation indicates if this was caused by a
+ writepage */
+ // int writepage;
+ log_struct(fi, writepage, %d, );
+
+ /** Can be filled in by open, to use direct I/O on this file.
+ Introduced in version 2.4 */
+ // unsigned int keep_cache : 1;
+ log_struct(fi, direct_io, %d, );
+
+ /** Can be filled in by open, to indicate, that cached file data
+ need not be invalidated. Introduced in version 2.4 */
+ // unsigned int flush : 1;
+ log_struct(fi, keep_cache, %d, );
+
+ /** Padding. Do not use*/
+ // unsigned int padding : 29;
+
+ /** File handle. May be filled in by filesystem in open().
+ Available in all other file operations */
+ // uint64_t fh;
+ log_struct(fi, fh, 0x%016llx, );
+
+ /** Lock owner id. Available in locking operations and flush */
+ // uint64_t lock_owner;
+ log_struct(fi, lock_owner, 0x%016llx, );
+};
+
+// This dumps the info from a struct stat. The struct is defined in
+// <bits/stat.h>; this is indirectly included from <fcntl.h>
+void log_stat(struct stat *si)
+{
+ // dev_t st_dev; /* ID of device containing file */
+ log_struct(si, st_dev, %lld, );
+
+ // ino_t st_ino; /* inode number */
+ log_struct(si, st_ino, %lld, );
+
+ // mode_t st_mode; /* protection */
+ log_struct(si, st_mode, 0%o, );
+
+ // nlink_t st_nlink; /* number of hard links */
+ log_struct(si, st_nlink, %d, );
+
+ // uid_t st_uid; /* user ID of owner */
+ log_struct(si, st_uid, %d, );
+
+ // gid_t st_gid; /* group ID of owner */
+ log_struct(si, st_gid, %d, );
+
+ // dev_t st_rdev; /* device ID (if special file) */
+ log_struct(si, st_rdev, %lld, );
+
+ // off_t st_size; /* total size, in bytes */
+ log_struct(si, st_size, %lld, );
+
+ // blksize_t st_blksize; /* blocksize for filesystem I/O */
+ log_struct(si, st_blksize, %ld, );
+
+ // blkcnt_t st_blocks; /* number of blocks allocated */
+ log_struct(si, st_blocks, %lld, );
+
+ // time_t st_atime; /* time of last access */
+ log_struct(si, st_atime, 0x%08lx, );
+
+ // time_t st_mtime; /* time of last modification */
+ log_struct(si, st_mtime, 0x%08lx, );
+
+ // time_t st_ctime; /* time of last status change */
+ log_struct(si, st_ctime, 0x%08lx, );
+
+}
+
+void log_statvfs(struct statvfs *sv)
+{
+ // unsigned long f_bsize; /* file system block size */
+ log_struct(sv, f_bsize, %ld, );
+
+ // unsigned long f_frsize; /* fragment size */
+ log_struct(sv, f_frsize, %ld, );
+
+ // fsblkcnt_t f_blocks; /* size of fs in f_frsize units */
+ log_struct(sv, f_blocks, %lld, );
+
+ // fsblkcnt_t f_bfree; /* # free blocks */
+ log_struct(sv, f_bfree, %lld, );
+
+ // fsblkcnt_t f_bavail; /* # free blocks for non-root */
+ log_struct(sv, f_bavail, %lld, );
+
+ // fsfilcnt_t f_files; /* # inodes */
+ log_struct(sv, f_files, %lld, );
+
+ // fsfilcnt_t f_ffree; /* # free inodes */
+ log_struct(sv, f_ffree, %lld, );
+
+ // fsfilcnt_t f_favail; /* # free inodes for non-root */
+ log_struct(sv, f_favail, %lld, );
+
+ // unsigned long f_fsid; /* file system ID */
+ log_struct(sv, f_fsid, %ld, );
+
+ // unsigned long f_flag; /* mount flags */
+ log_struct(sv, f_flag, 0x%08lx, );
+
+ // unsigned long f_namemax; /* maximum filename length */
+ log_struct(sv, f_namemax, %ld, );
+
+}
+
+void log_utime(struct utimbuf *buf)
+{
+ // time_t actime;
+ log_struct(buf, actime, 0x%08lx, );
+
+ // time_t modtime;
+ log_struct(buf, modtime, 0x%08lx, );
+}
View
16 log.h
@@ -0,0 +1,16 @@
+#ifndef _LOG_H_
+#define _LOG_H_
+#include <stdio.h>
+
+// macro to log fields in structs.
+#define log_struct(st, field, format, typecast) \
+ log_msg(" " #field " = " #format "\n", typecast st->field)
+
+FILE *log_open(const char *name);
+void log_fi (struct fuse_file_info *fi);
+void log_stat(struct stat *si);
+void log_statvfs(struct statvfs *sv);
+void log_utime(struct utimbuf *buf);
+
+void log_msg(const char *format, ...);
+#endif
View
@@ -1,13 +1,15 @@
#ifndef PARAMS_H
#define PARAMS_H
#include <fuse.h>
+#include <stdio.h>
#include "tagdb.h"
struct tagfs_state {
char *copiesdir;
char *mountdir;
char *listen;
tagdb *db;
+ FILE *logfile;
};
#define TAGFS_DATA ((struct tagfs_state *) fuse_get_context()->private_data)
Oops, something went wrong.

0 comments on commit fa28f9e

Please sign in to comment.