From 162307cb2f4aaa2703d209b3ce650135c6c86e01 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Wed, 1 Sep 2021 09:38:33 +0800 Subject: [PATCH 1/2] Support java string and concatenation Implement `invokedynamic` opcode that can run bootstrap methods in order to do string concatenation, and modify `ldc` so that it can create new string. Every string will be created by `create_string` function, which can place each string into heap so that strings can be released correctly. Note that current implmentmentation of `invokedynamic` only supports `makeConcatWithConstants`, so callsite and lambda aren't supported. Add a new test script: "Strings.java" --- Makefile | 3 +- class-heap.c | 9 +++ classfile.c | 63 +++++++++++++++++++- classfile.h | 17 ++++++ constant-pool.c | 43 ++++++++++++++ constant-pool.h | 19 ++++++ jvm.c | 142 +++++++++++++++++++++++++++++++++++++++++++-- object-heap.c | 24 ++++++++ object-heap.h | 3 + tests/Strings.java | 23 ++++++++ type.h | 3 +- 11 files changed, 341 insertions(+), 8 deletions(-) create mode 100644 tests/Strings.java diff --git a/Makefile b/Makefile index e847e89..21be076 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,8 @@ TESTS = \ Static \ Invokevirtual \ Inherit \ - Initializer + Initializer \ + Strings check: $(addprefix tests/,$(TESTS:=-result.out)) diff --git a/class-heap.c b/class-heap.c index eda6840..75e0284 100644 --- a/class-heap.c +++ b/class-heap.c @@ -109,6 +109,15 @@ void free_class_heap() } free(class_heap.class_info[i]->clazz->methods); + bootmethods_attr_t *bootstrap = + class_heap.class_info[i]->clazz->bootstrap; + if (bootstrap) { + for (u2 j = 0; j < bootstrap->num_bootstrap_methods; j++) + free(bootstrap->bootstrap_methods[j].bootstrap_arguments); + free(bootstrap->bootstrap_methods); + free(bootstrap); + } + free(class_heap.class_info[i]->clazz); free(class_heap.class_info[i]->name); free(class_heap.class_info[i]); diff --git a/classfile.c b/classfile.c index da6242f..f905c62 100644 --- a/classfile.c +++ b/classfile.c @@ -22,7 +22,7 @@ class_info_t *get_class_info(FILE *class_file) } /** - * Get the number of integer parameters that a method takes. + * Get the number of parameters that a method takes. * Use the descriptor string of the method to determine its signature. */ uint16_t get_number_of_parameters(method_t *method) @@ -175,6 +175,15 @@ char *find_class_name_from_index(uint16_t idx, class_file_t *clazz) return (char *) name->info; } +bootmethods_t *find_bootstrap_method(uint16_t idx, class_file_t *clazz) +{ + const_pool_info *info = get_constant(&clazz->constant_pool, idx); + assert(info->tag == CONSTANT_InvokeDynamic && "Expected a InvokeDynanmic"); + return &clazz->bootstrap + ->bootstrap_methods[((CONSTANT_InvokeDynamic_info *) info->info) + ->bootstrap_method_attr_index]; +} + void read_field_attributes(FILE *class_file, field_info *info) { for (u2 i = 0; i < info->attributes_count; i++) { @@ -223,6 +232,54 @@ void read_method_attributes(FILE *class_file, assert(found_code && "Missing method code"); } +bootmethods_attr_t *read_bootstrap_attribute(FILE *class_file, + constant_pool_t *cp) +{ + u2 attributes_count = read_u2(class_file); + for (u2 i = 0; i < attributes_count; i++) { + attribute_info ainfo = { + .attribute_name_index = read_u2(class_file), + .attribute_length = read_u4(class_file), + }; + long attribute_end = ftell(class_file) + ainfo.attribute_length; + const_pool_info *type_constant = + get_constant(cp, ainfo.attribute_name_index); + assert(type_constant->tag == CONSTANT_Utf8 && "Expected a UTF8"); + if (!strcmp((char *) type_constant->info, "BootstrapMethods")) { + bootmethods_attr_t *bootstrap = malloc(sizeof(*bootstrap)); + + bootstrap->num_bootstrap_methods = read_u2(class_file); + bootstrap->bootstrap_methods = malloc( + sizeof(bootmethods_t) * bootstrap->num_bootstrap_methods); + + assert(bootstrap->bootstrap_methods && + "Failed to allocate bootstrap method"); + for (int j = 0; j < bootstrap->num_bootstrap_methods; ++j) { + bootstrap->bootstrap_methods[j].bootstrap_method_ref = + read_u2(class_file); + bootstrap->bootstrap_methods[j].num_bootstrap_arguments = + read_u2(class_file); + bootstrap->bootstrap_methods[j].bootstrap_arguments = malloc( + sizeof(u2) * + bootstrap->bootstrap_methods[j].num_bootstrap_arguments); + assert(bootstrap->bootstrap_methods[j].bootstrap_arguments && + "Failed to allocate bootstrap argument"); + for (int k = 0; + k < + bootstrap->bootstrap_methods[j].num_bootstrap_arguments; + ++k) { + bootstrap->bootstrap_methods[j].bootstrap_arguments[k] = + read_u2(class_file); + } + } + return bootstrap; + } + /* Skip the rest of the attribute */ + fseek(class_file, attribute_end, SEEK_SET); + } + return NULL; +} + #define IS_STATIC 0x0008 field_t *get_fields(FILE *class_file, constant_pool_t *cp, class_file_t *clazz) @@ -311,6 +368,10 @@ class_file_t get_class(FILE *class_file) /* Read the list of static methods */ clazz.methods = get_methods(class_file, &clazz.constant_pool); + /* Read the list of attributes */ + clazz.bootstrap = + read_bootstrap_attribute(class_file, &clazz.constant_pool); + clazz.initialized = false; return clazz; diff --git a/classfile.h b/classfile.h index 3683a06..ea3d9ee 100644 --- a/classfile.h +++ b/classfile.h @@ -54,12 +54,26 @@ typedef struct { variable_t *static_var; /* store static fields in the class */ } field_t; +typedef struct { + u2 bootstrap_method_ref; + u2 num_bootstrap_arguments; + u2 *bootstrap_arguments; +} bootmethods_t; + +typedef struct { + u2 attribute_name_index; + u4 attribute_length; + u2 num_bootstrap_methods; + bootmethods_t *bootstrap_methods; +} bootmethods_attr_t; + typedef struct class_file { constant_pool_t constant_pool; class_info_t *info; method_t *methods; field_t *fields; u2 fields_count; + bootmethods_attr_t *bootstrap; bool initialized; struct class_file *next; struct class_file *prev; @@ -89,4 +103,7 @@ char *find_field_info_from_index(uint16_t idx, char **name_info, char **descriptor_info); void read_field_attributes(FILE *class_file, field_info *info); +bootmethods_t *find_bootstrap_method(uint16_t idx, class_file_t *clazz); +bootmethods_attr_t *read_bootstrap_attribute(FILE *class_file, + constant_pool_t *cp); field_t *get_fields(FILE *class_file, constant_pool_t *cp, class_file_t *clazz); \ No newline at end of file diff --git a/constant-pool.c b/constant-pool.c index be07931..78dc3d8 100644 --- a/constant-pool.c +++ b/constant-pool.c @@ -48,6 +48,23 @@ CONSTANT_Class_info *get_class_name(constant_pool_t *cp, u2 idx) return (CONSTANT_Class_info *) class->info; } +CONSTANT_MethodHandle_info *get_method_handle(constant_pool_t *cp, u2 idx) +{ + const_pool_info *handle = get_constant(cp, idx); + assert(handle->tag == CONSTANT_MethodHandle && "Expected a MethodHandle"); + return (CONSTANT_MethodHandle_info *) handle->info; +} + +char *get_string_utf(constant_pool_t *cp, u2 idx) +{ + const_pool_info *str = get_constant(cp, idx); + assert(str->tag == CONSTANT_String && "Expected a String"); + const_pool_info *utf8 = + get_constant(cp, ((CONSTANT_String_info *) str->info)->string_index); + assert(utf8->tag == CONSTANT_Utf8 && "Expected a UTF8"); + return (char *) utf8->info; +} + constant_pool_t get_constant_pool(FILE *class_file) { constant_pool_t cp = { @@ -120,6 +137,32 @@ constant_pool_t get_constant_pool(FILE *class_file) break; } + case CONSTANT_String: { + CONSTANT_String_info *value = malloc(sizeof(*value)); + assert(value && "Failed to allocate String constant"); + value->string_index = read_u2(class_file); + constant->info = (u1 *) value; + break; + } + + case CONSTANT_InvokeDynamic: { + CONSTANT_InvokeDynamic_info *value = malloc(sizeof(*value)); + assert(value && "Failed to allocate InvokeDynamic constant"); + value->bootstrap_method_attr_index = read_u2(class_file); + value->name_and_type_index = read_u2(class_file); + constant->info = (u1 *) value; + break; + } + + case CONSTANT_MethodHandle: { + CONSTANT_MethodHandle_info *value = malloc(sizeof(*value)); + assert(value && "Failed to allocate MethodHandle constant"); + value->reference_kind = read_u1(class_file); + value->reference_index = read_u2(class_file); + constant->info = (u1 *) value; + break; + } + default: fprintf(stderr, "Unknown constant type %d\n", constant->tag); exit(1); diff --git a/constant-pool.h b/constant-pool.h index cd4f5a5..a622168 100644 --- a/constant-pool.h +++ b/constant-pool.h @@ -13,9 +13,12 @@ typedef enum { CONSTANT_Integer = 3, CONSTANT_Long = 5, CONSTANT_Class = 7, + CONSTANT_String = 8, CONSTANT_FieldRef = 9, CONSTANT_MethodRef = 10, CONSTANT_NameAndType = 12, + CONSTANT_MethodHandle = 15, + CONSTANT_InvokeDynamic = 18, } const_pool_tag_t; typedef struct { @@ -41,6 +44,20 @@ typedef struct { u2 descriptor_index; } CONSTANT_NameAndType_info; +typedef struct { + u2 string_index; +} CONSTANT_String_info; + +typedef struct { + u2 bootstrap_method_attr_index; + u2 name_and_type_index; +} CONSTANT_InvokeDynamic_info; + +typedef struct { + u1 reference_kind; + u2 reference_index; +} CONSTANT_MethodHandle_info; + typedef struct { const_pool_tag_t tag; u1 *info; @@ -58,3 +75,5 @@ const_pool_info *get_constant(constant_pool_t *constant_pool, u2 index); constant_pool_t get_constant_pool(FILE *class_file); CONSTANT_FieldOrMethodRef_info *get_methodref(constant_pool_t *cp, u2 idx); CONSTANT_Class_info *get_class_name(constant_pool_t *cp, u2 idx); +CONSTANT_MethodHandle_info *get_method_handle(constant_pool_t *cp, u2 idx); +char *get_string_utf(constant_pool_t *cp, u2 idx); diff --git a/jvm.c b/jvm.c index 4b6ee93..af0ecd5 100644 --- a/jvm.c +++ b/jvm.c @@ -101,6 +101,7 @@ typedef enum { i_invokevirtual = 0xb6, i_invokespecial = 0xb7, i_invokestatic = 0xb8, + i_invokedynamic = 0xba, i_new = 0xbb, } jvm_opcode_t; @@ -501,11 +502,27 @@ stack_entry_t *execute(method_t *method, */ int16_t param = code_buf[pc + 1]; - /* get the constant */ - uint8_t *info = get_constant(&constant_pool, param)->info; - - /* need to check type */ - push_int(op_stack, ((CONSTANT_Integer_info *) info)->bytes); + const_pool_info *info = get_constant(&constant_pool, param); + switch (info->tag) { + case CONSTANT_Integer: { + push_int(op_stack, + ((CONSTANT_Integer_info *) info->info)->bytes); + break; + } + case CONSTANT_String: { + char *src = + (char *) get_constant( + &constant_pool, + ((CONSTANT_String_info *) info->info)->string_index) + ->info; + char *dest = create_string(clazz, src); + push_ref(op_stack, dest); + break; + } + default: + assert(0 && "ldc only support int and string"); + break; + } pc += 2; break; } @@ -1002,6 +1019,12 @@ stack_entry_t *execute(method_t *method, printf("%ld\n", op); break; } + /* string */ + case STACK_ENTRY_REF: { + void *op = pop_ref(op_stack); + printf("%s\n", (char *) op); + break; + } default: printf("print type (%d) is not supported\n", element.type); break; @@ -1305,6 +1328,115 @@ stack_entry_t *execute(method_t *method, break; } + /* Invokes a dynamic method */ + case i_invokedynamic: { + uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2]; + uint16_t index = ((param1 << 8) | param2); + + bootmethods_t *bootstrap_method = + find_bootstrap_method(index, clazz); + CONSTANT_MethodHandle_info *handle = get_method_handle( + &clazz->constant_pool, bootstrap_method->bootstrap_method_ref); + + char *method_name, *method_descriptor; + find_method_info_from_index(handle->reference_index, clazz, + &method_name, &method_descriptor); + + if (strcmp(method_name, "makeConcatWithConstants")) + assert(0 && "Only support makeConcatWithConstants"); + + char *arg = NULL; + arg = get_string_utf(&clazz->constant_pool, + bootstrap_method->bootstrap_arguments[0]); + + /* In the first argument string, there are three types of character + * \1 (Unicode point 0001): an ordinary argument. + * \2 (Unicode point 0002): a constant. + * Any other char value: a single character constant. + * + * \1 will be replaced by value in the stack + * \2 will be replaced by value in other bootstrap arguments + */ + uint16_t num_params = 0; + uint16_t num_constant = 0; + char *iter = arg; + while (*iter != '\0') { + if (*iter == 1 || *iter == 2) { + num_params++; + } + iter++; + } + num_constant = strlen(arg) - num_params; + char **recipe = calloc(sizeof(char *), num_params); + size_t max_len = 0; + + iter = arg; + int curr = 0, + arg_num = bootstrap_method->num_bootstrap_arguments - 1; + while (*iter != '\0') { + if (*iter == 1) { + stack_entry_t element = top(op_stack); + switch (element.type) { + /* integer */ + case STACK_ENTRY_INT: + case STACK_ENTRY_SHORT: + case STACK_ENTRY_BYTE: + case STACK_ENTRY_LONG: { + int64_t value = pop_int(op_stack); + /* 20 is the maximal digits in 64 bits sign integer */ + char str[20]; + /* integer to string */ + snprintf(str, 20, "%ld", value); + char *dest = create_string(clazz, str); + recipe[curr] = dest; + break; + } + /* string */ + case STACK_ENTRY_REF: { + recipe[curr] = (char *) pop_ref(op_stack); + break; + } + default: { + printf("unknown stack top type (%d)\n", element.type); + break; + } + } + max_len += strlen(recipe[curr++]); + } else if (*iter == 2) { + recipe[curr] = get_string_utf( + &clazz->constant_pool, + bootstrap_method->bootstrap_arguments[arg_num--]); + max_len += strlen(recipe[curr++]); + } + iter++; + } + + max_len += num_constant; + char *result = calloc(max_len + 1, sizeof(char)); + + iter = arg; + while (*iter != '\0') { + if (*iter == 1 || *iter == 2) { + strcat(result, recipe[--num_params]); + } else { + strncat(result, iter, 1); + } + iter++; + } + result[max_len] = '\0'; + + char *dest = create_string(clazz, result); + push_ref(op_stack, dest); + free(recipe); + free(result); + + /* two bytes values indicate the class in constant pool and the next + * two bytes are always zero, program counter should plus five. + */ + pc += 5; + break; + } + default: fprintf(stderr, "Unknown instruction %x\n", current); exit(1); diff --git a/object-heap.c b/object-heap.c index 6674556..1441888 100644 --- a/object-heap.c +++ b/object-heap.c @@ -40,6 +40,25 @@ object_t *create_object(class_file_t *clazz) return new_obj; } +char *create_string(class_file_t *clazz, char *src) +{ + size_t len = strlen(src); + char *dest = calloc((len + 1), sizeof(char)); + strncpy(dest, src, len); + + object_t *str_obj = malloc(sizeof(object_t)); + str_obj->value = malloc(sizeof(variable_t)); + str_obj->value->type = VAR_STR_PTR; + str_obj->value->value.ptr_value = dest; + str_obj->class = clazz; + str_obj->parent = NULL; + str_obj->fields_count = 1; + + object_heap.objects[object_heap.length++] = str_obj; + + return dest; +} + variable_t *find_field_addr(object_t *obj, char *name) { field_t *field = obj->class->fields; @@ -57,6 +76,11 @@ void free_object_heap() /* free object and all its parent */ for (object_t *cur = object_heap.objects[i], *next; cur; cur = next) { next = cur->parent; + if (cur->value) { + if (cur->value->type == VAR_STR_PTR) { + free(cur->value->value.ptr_value); + } + } free(cur->value); free(cur); } diff --git a/object-heap.h b/object-heap.h index b58104e..9c1a372 100644 --- a/object-heap.h +++ b/object-heap.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "classfile.h" #include "list.h" @@ -18,4 +20,5 @@ typedef struct { void init_object_heap(); void free_object_heap(); object_t *create_object(class_file_t *clazz); +char *create_string(class_file_t *clazz, char *src); variable_t *find_field_addr(object_t *obj, char *name); \ No newline at end of file diff --git a/tests/Strings.java b/tests/Strings.java new file mode 100644 index 0000000..fcbf167 --- /dev/null +++ b/tests/Strings.java @@ -0,0 +1,23 @@ +public class Strings { + public static String f(String x) { + return x + " abc " + x; + } + public static void main(String args[]) + { + String str1 = "Hello"; + String str2 = " Jvm "; + String str3 = "string"; + String str4 = "x\bx\tx\\x\nx\1x\2x\3x\4x\5x\6x"; + int x = 100; + long y = java.lang.Long.MAX_VALUE; + /* test string argument */ + System.out.println(f("test")); + /* test string concat */ + System.out.println(str1 + " constant " + str2); + System.out.println(str1 + str2 + str3 + str4); + /* test string concat with number */ + System.out.println("1" + 2 + x + "3" + y + 0.8 + str2); + /* test invokedynamic arguments */ + System.out.println("prefix \1" + str1 + "suffix \2"); + } +} \ No newline at end of file diff --git a/type.h b/type.h index 68435aa..438b3f2 100644 --- a/type.h +++ b/type.h @@ -13,7 +13,8 @@ typedef enum { VAR_SHORT = 2, VAR_INT = 3, VAR_LONG = 4, - VAR_PTR = 5, /* reference */ + VAR_PTR = 5, /* reference */ + VAR_STR_PTR = 6 /* string reference */ } variable_type_t; typedef union { From 32eedd6d46beb722b2fb8879a44d50552a9fbf83 Mon Sep 17 00:00:00 2001 From: hankluo6 Date: Wed, 31 Aug 2022 14:28:27 +0800 Subject: [PATCH 2/2] Add aload opcode --- jvm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/jvm.c b/jvm.c index af0ecd5..49d8f5e 100644 --- a/jvm.c +++ b/jvm.c @@ -577,6 +577,15 @@ stack_entry_t *execute(method_t *method, break; } + /* Load object from local variable */ + case i_aload: { + int32_t param = code_buf[pc + 1]; + object_t *obj = locals[param].entry.ptr_value; + + push_ref(op_stack, obj); + pc += 2; + } break; + /* Load object from local variable */ case i_aload_0: case i_aload_1: