diff --git a/extract.cc b/extract.cc new file mode 100644 index 0000000..98240f7 --- /dev/null +++ b/extract.cc @@ -0,0 +1,138 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// A command line tool to extract a Mach-O binary from a fat binary. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mach-o/fat.h" + +using namespace std; + +bool be = false; + +static void fixEndian(uint32_t* p) { + if (!be) { + return; + } + + uint32_t v = *p; + *p = (v << 24) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0xff00) | (v >> 24); +} + +static const char* getArchName(uint32_t a) { + switch (a) { + case CPU_TYPE_X86: + return "x86"; + case CPU_TYPE_X86_64: + return "x86-64"; + case CPU_TYPE_POWERPC: + return "ppc"; + case CPU_TYPE_POWERPC64: + return "ppc64"; + default: + return "???"; + } +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s fat [arch out]\n", argv[0]); + exit(1); + } + + int fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror("open"); + exit(1); + } + + off_t len = lseek(fd, 0, SEEK_END); + char* bin = reinterpret_cast( + mmap(NULL, len, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0)); + + fat_header* header = reinterpret_cast(bin); + + if (header->magic == FAT_CIGAM) { + be = true; + } else if (header->magic != FAT_MAGIC) { + fprintf(stderr, "Not fat\n"); + exit(1); + } + + fixEndian(&header->nfat_arch); + + printf("magic=%x nfat_arch=%d\n", + header->magic, header->nfat_arch); + + map archs; + + char* fat_ptr = bin + sizeof(fat_header); + for (uint32_t i = 0; i < header->nfat_arch; i++) { + fat_arch* arch = reinterpret_cast(fat_ptr); + + fixEndian(&arch->cputype); + fixEndian(&arch->cpusubtype); + fixEndian(&arch->offset); + fixEndian(&arch->size); + fixEndian(&arch->align); + + const char* name = getArchName(arch->cputype); + + printf("cputype=%d (%s) cpusubtype=%d offset=%d size=%d align=%d\n", + arch->cputype, name, arch->cpusubtype, + arch->offset, arch->size, arch->align); + + archs.insert(make_pair(name, arch)); + + fat_ptr += sizeof(fat_arch); + } + + for (int i = 2; i + 1 < argc; i += 2) { + const char* arch_name = argv[i]; + map::const_iterator found = archs.find(arch_name); + if (found == archs.end()) { + printf("unknown arch: %s\n", arch_name); + continue; + } + + fat_arch* arch = found->second; + FILE* fp = fopen(argv[i+1], "wb"); + fwrite(bin + arch->offset, 1, arch->size, fp); + fclose(fp); + } +} diff --git a/include/_types.h b/include/_types.h new file mode 100644 index 0000000..980c1a2 --- /dev/null +++ b/include/_types.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004, 2008, 2009 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __TYPES_H_ +#define __TYPES_H_ + +#include +//#include + +#if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7 +#define __strfmonlike(fmtarg, firstvararg) \ + __attribute__((__format__ (__strfmon__, fmtarg, firstvararg))) +#define __strftimelike(fmtarg) \ + __attribute__((__format__ (__strftime__, fmtarg, 0))) +#else +#define __strfmonlike(fmtarg, firstvararg) +#define __strftimelike(fmtarg) +#endif + +typedef int __darwin_nl_item; +typedef int __darwin_wctrans_t; +#ifdef __LP64__ +typedef uint32_t __darwin_wctype_t; +#else /* !__LP64__ */ +typedef unsigned long __darwin_wctype_t; +#endif /* __LP64__ */ + +#ifdef __WCHAR_MAX__ +#define __DARWIN_WCHAR_MAX __WCHAR_MAX__ +#else /* ! __WCHAR_MAX__ */ +#define __DARWIN_WCHAR_MAX 0x7fffffff +#endif /* __WCHAR_MAX__ */ + +#if __DARWIN_WCHAR_MAX > 0xffffU +#define __DARWIN_WCHAR_MIN (-0x7fffffff - 1) +#else +#define __DARWIN_WCHAR_MIN 0 +#endif +#define __DARWIN_WEOF ((__darwin_wint_t)-1) + +#ifndef _FORTIFY_SOURCE +# if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1050) +# define _FORTIFY_SOURCE 0 +# else +# define _FORTIFY_SOURCE 2 /* on by default */ +# endif +#endif + +#endif /* __TYPES_H_ */ diff --git a/include/mac-ctype.h b/include/mac-ctype.h new file mode 100644 index 0000000..495f0f4 --- /dev/null +++ b/include/mac-ctype.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2000, 2005, 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ctype.h 8.4 (Berkeley) 1/21/94 + */ + +#ifndef MAC_CTYPE_H_ +#define MAC_CTYPE_H_ + +#define _CTYPE_A 0x00000100L /* Alpha */ +#define _CTYPE_C 0x00000200L /* Control */ +#define _CTYPE_D 0x00000400L /* Digit */ +#define _CTYPE_G 0x00000800L /* Graph */ +#define _CTYPE_L 0x00001000L /* Lower */ +#define _CTYPE_P 0x00002000L /* Punct */ +#define _CTYPE_S 0x00004000L /* Space */ +#define _CTYPE_U 0x00008000L /* Upper */ +#define _CTYPE_X 0x00010000L /* X digit */ +#define _CTYPE_B 0x00020000L /* Blank */ +#define _CTYPE_R 0x00040000L /* Print */ +#define _CTYPE_I 0x00080000L /* Ideogram */ +#define _CTYPE_T 0x00100000L /* Special */ +#define _CTYPE_Q 0x00200000L /* Phonogram */ + +/* + * Backward compatibility + */ +#define _A _CTYPE_A /* Alpha */ +#define _C _CTYPE_C /* Control */ +#define _D _CTYPE_D /* Digit */ +#define _G _CTYPE_G /* Graph */ +#define _L _CTYPE_L /* Lower */ +#define _P _CTYPE_P /* Punct */ +#define _S _CTYPE_S /* Space */ +#define _U _CTYPE_U /* Upper */ +#define _X _CTYPE_X /* X digit */ +#define _B _CTYPE_B /* Blank */ +#define _R _CTYPE_R /* Print */ +#define _I _CTYPE_I /* Ideogram */ +#define _T _CTYPE_T /* Special */ +#define _Q _CTYPE_Q /* Phonogram */ + +#endif /* !MAC_CTYPE_H_ */ diff --git a/include/mach-o/fat.h b/include/mach-o/fat.h new file mode 100644 index 0000000..d6cfe04 --- /dev/null +++ b/include/mach-o/fat.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACH_O_FAT_H_ +#define _MACH_O_FAT_H_ +/* + * This header file describes the structures of the file format for "fat" + * architecture specific file (wrapper design). At the begining of the file + * there is one fat_header structure followed by a number of fat_arch + * structures. For each architecture in the file, specified by a pair of + * cputype and cpusubtype, the fat_header describes the file offset, file + * size and alignment in the file of the architecture specific member. + * The padded bytes in the file to place each member on it's specific alignment + * are defined to be read as zeros and can be left as "holes" if the file system + * can support them as long as they read as zeros. + * + * All structures defined here are always written and read to/from disk + * in big-endian order. + */ + +/* + * is needed here for the cpu_type_t and cpu_subtype_t types + * and contains the constants for the possible values of these types. + */ +#include +#include +//#include + +#define FAT_MAGIC 0xcafebabe +#define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */ + +struct fat_header { + uint32_t magic; /* FAT_MAGIC */ + uint32_t nfat_arch; /* number of structs that follow */ +}; + +struct fat_arch { + cpu_type_t cputype; /* cpu specifier (int) */ + cpu_subtype_t cpusubtype; /* machine specifier (int) */ + uint32_t offset; /* file offset to this object file */ + uint32_t size; /* size of this object file */ + uint32_t align; /* alignment as a power of 2 */ +}; + +#endif /* _MACH_O_FAT_H_ */ diff --git a/include/mach-o/loader.h b/include/mach-o/loader.h new file mode 100644 index 0000000..5c849fe --- /dev/null +++ b/include/mach-o/loader.h @@ -0,0 +1,1343 @@ +/* + * Copyright (c) 1999-2008 Apple Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACHO_LOADER_H_ +#define _MACHO_LOADER_H_ + +/* + * This file describes the format of mach object files. + */ +#include + +/* + * is needed here for the cpu_type_t and cpu_subtype_t types + * and contains the constants for the possible values of these types. + */ +#include + +/* + * is needed here for the vm_prot_t type and contains the + * constants that are or'ed together for the possible values of this type. + */ +#include + +/* + * is expected to define the flavors of the thread + * states and the structures of those flavors for each machine. + */ +//#include +//#include + +/* + * The 32-bit mach header appears at the very beginning of the object file for + * 32-bit architectures. + */ +struct mach_header { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ +}; + +/* Constant for the magic field of the mach_header (32-bit architectures) */ +#define MH_MAGIC 0xfeedface /* the mach magic number */ +#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */ + +/* + * The 64-bit mach header appears at the very beginning of object files for + * 64-bit architectures. + */ +struct mach_header_64 { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ + uint32_t reserved; /* reserved */ +}; + +/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ +#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ +#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */ + +/* + * The layout of the file depends on the filetype. For all but the MH_OBJECT + * file type the segments are padded out and aligned on a segment alignment + * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB, + * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part + * of their first segment. + * + * The file type MH_OBJECT is a compact format intended as output of the + * assembler and input (and possibly output) of the link editor (the .o + * format). All sections are in one unnamed segment with no segment padding. + * This format is used as an executable format when the file is so small the + * segment padding greatly increases its size. + * + * The file type MH_PRELOAD is an executable format intended for things that + * are not executed under the kernel (proms, stand alones, kernels, etc). The + * format can be executed under the kernel but may demand paged it and not + * preload it before execution. + * + * A core file is in MH_CORE format and can be any in an arbritray legal + * Mach-O file. + * + * Constants for the filetype field of the mach_header + */ +#define MH_OBJECT 0x1 /* relocatable object file */ +#define MH_EXECUTE 0x2 /* demand paged executable file */ +#define MH_FVMLIB 0x3 /* fixed VM shared library file */ +#define MH_CORE 0x4 /* core file */ +#define MH_PRELOAD 0x5 /* preloaded executable file */ +#define MH_DYLIB 0x6 /* dynamically bound shared library */ +#define MH_DYLINKER 0x7 /* dynamic link editor */ +#define MH_BUNDLE 0x8 /* dynamically bound bundle file */ +#define MH_DYLIB_STUB 0x9 /* shared library stub for static */ + /* linking only, no section contents */ +#define MH_DSYM 0xa /* companion file with only debug */ + /* sections */ +#define MH_KEXT_BUNDLE 0xb /* x86_64 kexts */ + +/* Constants for the flags field of the mach_header */ +#define MH_NOUNDEFS 0x1 /* the object file has no undefined + references */ +#define MH_INCRLINK 0x2 /* the object file is the output of an + incremental link against a base file + and can't be link edited again */ +#define MH_DYLDLINK 0x4 /* the object file is input for the + dynamic linker and can't be staticly + link edited again */ +#define MH_BINDATLOAD 0x8 /* the object file's undefined + references are bound by the dynamic + linker when loaded. */ +#define MH_PREBOUND 0x10 /* the file has its dynamic undefined + references prebound. */ +#define MH_SPLIT_SEGS 0x20 /* the file has its read-only and + read-write segments split */ +#define MH_LAZY_INIT 0x40 /* the shared library init routine is + to be run lazily via catching memory + faults to its writeable segments + (obsolete) */ +#define MH_TWOLEVEL 0x80 /* the image is using two-level name + space bindings */ +#define MH_FORCE_FLAT 0x100 /* the executable is forcing all images + to use flat name space bindings */ +#define MH_NOMULTIDEFS 0x200 /* this umbrella guarantees no multiple + defintions of symbols in its + sub-images so the two-level namespace + hints can always be used. */ +#define MH_NOFIXPREBINDING 0x400 /* do not have dyld notify the + prebinding agent about this + executable */ +#define MH_PREBINDABLE 0x800 /* the binary is not prebound but can + have its prebinding redone. only used + when MH_PREBOUND is not set. */ +#define MH_ALLMODSBOUND 0x1000 /* indicates that this binary binds to + all two-level namespace modules of + its dependent libraries. only used + when MH_PREBINDABLE and MH_TWOLEVEL + are both set. */ +#define MH_SUBSECTIONS_VIA_SYMBOLS 0x2000/* safe to divide up the sections into + sub-sections via symbols for dead + code stripping */ +#define MH_CANONICAL 0x4000 /* the binary has been canonicalized + via the unprebind operation */ +#define MH_WEAK_DEFINES 0x8000 /* the final linked image contains + external weak symbols */ +#define MH_BINDS_TO_WEAK 0x10000 /* the final linked image uses + weak symbols */ + +#define MH_ALLOW_STACK_EXECUTION 0x20000/* When this bit is set, all stacks + in the task will be given stack + execution privilege. Only used in + MH_EXECUTE filetypes. */ +#define MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs. When + linking against a dylib that + has this bit set, the static linker + will automatically not create a + LC_LOAD_DYLIB load command to the + dylib if no symbols are being + referenced from the dylib. */ +#define MH_ROOT_SAFE 0x40000 /* When this bit is set, the binary + declares it is safe for use in + processes with uid zero */ + +#define MH_SETUID_SAFE 0x80000 /* When this bit is set, the binary + declares it is safe for use in + processes when issetugid() is true */ + +#define MH_NO_REEXPORTED_DYLIBS 0x100000 /* When this bit is set on a dylib, + the static linker does not need to + examine dependent dylibs to see + if any are re-exported */ +#define MH_PIE 0x200000 /* When this bit is set, the OS will + load the main executable at a + random address. Only used in + MH_EXECUTE filetypes. */ + +/* + * The load commands directly follow the mach_header. The total size of all + * of the commands is given by the sizeofcmds field in the mach_header. All + * load commands must have as their first two fields cmd and cmdsize. The cmd + * field is filled in with a constant for that command type. Each command type + * has a structure specifically for it. The cmdsize field is the size in bytes + * of the particular load command structure plus anything that follows it that + * is a part of the load command (i.e. section structures, strings, etc.). To + * advance to the next load command the cmdsize can be added to the offset or + * pointer of the current load command. The cmdsize for 32-bit architectures + * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple + * of 8 bytes (these are forever the maximum alignment of any load commands). + * The padded bytes must be zero. All tables in the object file must also + * follow these rules so the file can be memory mapped. Otherwise the pointers + * to these tables will not work well or at all on some machines. With all + * padding zeroed like objects will compare byte for byte. + */ +struct load_command { + uint32_t cmd; /* type of load command */ + uint32_t cmdsize; /* total size of command in bytes */ +}; + +/* + * After MacOS X 10.1 when a new load command is added that is required to be + * understood by the dynamic linker for the image to execute properly the + * LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic + * linker sees such a load command it it does not understand will issue a + * "unknown load command required for execution" error and refuse to use the + * image. Other load commands without this bit that are not understood will + * simply be ignored. + */ +#define LC_REQ_DYLD 0x80000000 + +/* Constants for the cmd field of all load commands, the type */ +#define LC_SEGMENT 0x1 /* segment of this file to be mapped */ +#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */ +#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ +#define LC_THREAD 0x4 /* thread */ +#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ +#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ +#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ +#define LC_IDENT 0x8 /* object identification info (obsolete) */ +#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ +#define LC_PREPAGE 0xa /* prepage command (internal use) */ +#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ +#define LC_LOAD_DYLIB 0xc /* load a dynamically linked shared library */ +#define LC_ID_DYLIB 0xd /* dynamically linked shared lib ident */ +#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ +#define LC_ID_DYLINKER 0xf /* dynamic linker identification */ +#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamically */ + /* linked shared library */ +#define LC_ROUTINES 0x11 /* image routines */ +#define LC_SUB_FRAMEWORK 0x12 /* sub framework */ +#define LC_SUB_UMBRELLA 0x13 /* sub umbrella */ +#define LC_SUB_CLIENT 0x14 /* sub client */ +#define LC_SUB_LIBRARY 0x15 /* sub library */ +#define LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */ +#define LC_PREBIND_CKSUM 0x17 /* prebind checksum */ + +/* + * load a dynamically linked shared library that is allowed to be missing + * (all symbols are weak imported). + */ +#define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD) + +#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be + mapped */ +#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ +#define LC_UUID 0x1b /* the uuid */ +#define LC_RPATH (0x1c | LC_REQ_DYLD) /* runpath additions */ +#define LC_CODE_SIGNATURE 0x1d /* local of code signature */ +#define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */ +#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */ +#define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */ +#define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */ +#define LC_DYLD_INFO 0x22 /* compressed dyld information */ +#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD) /* compressed dyld information only */ + +/* + * A variable length string in a load command is represented by an lc_str + * union. The strings are stored just after the load command structure and + * the offset is from the start of the load command structure. The size + * of the string is reflected in the cmdsize field of the load command. + * Once again any padded bytes to bring the cmdsize field to a multiple + * of 4 bytes must be zero. + */ +union lc_str { + uint32_t offset; /* offset to the string */ +#ifndef __LP64__ + char *ptr; /* pointer to the string */ +#endif +}; + +/* + * The segment load command indicates that a part of this file is to be + * mapped into the task's address space. The size of this segment in memory, + * vmsize, maybe equal to or larger than the amount to map from this file, + * filesize. The file is mapped starting at fileoff to the beginning of + * the segment in memory, vmaddr. The rest of the memory of the segment, + * if any, is allocated zero fill on demand. The segment's maximum virtual + * memory protection and initial virtual memory protection are specified + * by the maxprot and initprot fields. If the segment has sections then the + * section structures directly follow the segment command and their size is + * reflected in cmdsize. + */ +struct segment_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_SEGMENT */ + uint32_t cmdsize; /* includes sizeof section structs */ + char segname[16]; /* segment name */ + uint32_t vmaddr; /* memory address of this segment */ + uint32_t vmsize; /* memory size of this segment */ + uint32_t fileoff; /* file offset of this segment */ + uint32_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +struct segment_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_SEGMENT_64 */ + uint32_t cmdsize; /* includes sizeof section_64 structs */ + char segname[16]; /* segment name */ + uint64_t vmaddr; /* memory address of this segment */ + uint64_t vmsize; /* memory size of this segment */ + uint64_t fileoff; /* file offset of this segment */ + uint64_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + +/* Constants for the flags field of the segment_command */ +#define SG_HIGHVM 0x1 /* the file contents for this segment is for + the high part of the VM space, the low part + is zero filled (for stacks in core files) */ +#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by + a fixed VM library, for overlap checking in + the link editor */ +#define SG_NORELOC 0x4 /* this segment has nothing that was relocated + in it and nothing relocated to it, that is + it maybe safely replaced without relocation*/ +#define SG_PROTECTED_VERSION_1 0x8 /* This segment is protected. If the + segment starts at file offset 0, the + first page of the segment is not + protected. All other pages of the + segment are protected. */ + +/* + * A segment is made up of zero or more sections. Non-MH_OBJECT files have + * all of their segments with the proper sections in each, and padded to the + * specified segment alignment when produced by the link editor. The first + * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header + * and load commands of the object file before its first section. The zero + * fill sections are always last in their segment (in all formats). This + * allows the zeroed segment padding to be mapped into memory where zero fill + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. + * + * The MH_OBJECT format has all of its sections in one segment for + * compactness. There is no padding to a specified segment boundary and the + * mach_header and load commands are not part of the segment. + * + * Sections with the same section name, sectname, going into the same segment, + * segname, are combined by the link editor. The resulting section is aligned + * to the maximum alignment of the combined sections and is the new section's + * alignment. The combined sections are aligned to their original alignment in + * the combined section. Any padded bytes to get the specified alignment are + * zeroed. + * + * The format of the relocation entries referenced by the reloff and nreloc + * fields of the section structure for mach object files is described in the + * header file . + */ +struct section { /* for 32-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint32_t addr; /* memory address of this section */ + uint32_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ +}; + +struct section_64 { /* for 64-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint64_t addr; /* memory address of this section */ + uint64_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ + uint32_t reserved3; /* reserved */ +}; + +/* + * The flags field of a section structure is separated into two parts a section + * type and section attributes. The section types are mutually exclusive (it + * can only have one type) but the section attributes are not (it may have more + * than one attribute). + */ +#define SECTION_TYPE 0x000000ff /* 256 section types */ +#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ + +/* Constants for the type of a section */ +#define S_REGULAR 0x0 /* regular section */ +#define S_ZEROFILL 0x1 /* zero fill on demand section */ +#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ +#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ +#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ +#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */ + /* literals */ +/* + * For the two types of symbol pointers sections and the symbol stubs section + * they have indirect symbol table entries. For each of the entries in the + * section the indirect symbol table entries, in corresponding order in the + * indirect symbol table, start at the index stored in the reserved1 field + * of the section structure. Since the indirect symbol table entries + * correspond to the entries in the section the number of indirect symbol table + * entries is inferred from the size of the section divided by the size of the + * entries in the section. For symbol pointers sections the size of the entries + * in the section is 4 bytes and for symbol stubs sections the byte size of the + * stubs is stored in the reserved2 field of the section structure. + */ +#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy + symbol pointers */ +#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol + pointers */ +#define S_SYMBOL_STUBS 0x8 /* section with only symbol + stubs, byte size of stub in + the reserved2 field */ +#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function + pointers for initialization*/ +#define S_MOD_TERM_FUNC_POINTERS 0xa /* section with only function + pointers for termination */ +#define S_COALESCED 0xb /* section contains symbols that + are to be coalesced */ +#define S_GB_ZEROFILL 0xc /* zero fill on demand section + (that can be larger than 4 + gigabytes) */ +#define S_INTERPOSING 0xd /* section with only pairs of + function pointers for + interposing */ +#define S_16BYTE_LITERALS 0xe /* section with only 16 byte + literals */ +#define S_DTRACE_DOF 0xf /* section contains + DTrace Object Format */ +#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy + symbol pointers to lazy + loaded dylibs */ +/* + * Constants for the section attributes part of the flags field of a section + * structure. + */ +#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ +#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true + machine instructions */ +#define S_ATTR_NO_TOC 0x40000000 /* section contains coalesced + symbols that are not to be + in a ranlib table of + contents */ +#define S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols + in this section in files + with the MH_DYLDLINK flag */ +#define S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */ +#define S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they + reference live blocks */ +#define S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs + written on by dyld */ +/* + * If a segment contains any sections marked with S_ATTR_DEBUG then all + * sections in that segment must have this attribute. No section other than + * a section marked with this attribute may reference the contents of this + * section. A section with this attribute may contain no symbols and must have + * a section type S_REGULAR. The static linker will not copy section contents + * from sections with this attribute into its output file. These sections + * generally contain DWARF debugging info. + */ +#define S_ATTR_DEBUG 0x02000000 /* a debug section */ +#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ +#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some + machine instructions */ +#define S_ATTR_EXT_RELOC 0x00000200 /* section has external + relocation entries */ +#define S_ATTR_LOC_RELOC 0x00000100 /* section has local + relocation entries */ + + +/* + * The names of segments and sections in them are mostly meaningless to the + * link-editor. But there are few things to support traditional UNIX + * executables that require the link-editor and assembler to use some names + * agreed upon by convention. + * + * The initial protection of the "__TEXT" segment has write protection turned + * off (not writeable). + * + * The link-editor will allocate common symbols at the end of the "__common" + * section in the "__DATA" segment. It will create the section and segment + * if needed. + */ + +/* The currently known segment names and the section names in those segments */ + +#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */ + /* protections and catches NULL */ + /* references for MH_EXECUTE files */ + + +#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */ +#define SECT_TEXT "__text" /* the real text part of the text */ + /* section no headers, and no padding */ +#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */ + /* section */ +#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */ + /* fvmlib initialization */ + /* section */ + +#define SEG_DATA "__DATA" /* the tradition UNIX data segment */ +#define SECT_DATA "__data" /* the real initialized data section */ + /* no padding, no bss overlap */ +#define SECT_BSS "__bss" /* the real uninitialized data section*/ + /* no padding */ +#define SECT_COMMON "__common" /* the section common symbols are */ + /* allocated in by the link editor */ + +#define SEG_OBJC "__OBJC" /* objective-C runtime segment */ +#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */ +#define SECT_OBJC_MODULES "__module_info" /* module information */ +#define SECT_OBJC_STRINGS "__selector_strs" /* string table */ +#define SECT_OBJC_REFS "__selector_refs" /* string table */ + +#define SEG_ICON "__ICON" /* the icon segment */ +#define SECT_ICON_HEADER "__header" /* the icon headers */ +#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */ + +#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */ + /* created and maintained by the link */ + /* editor. Created with -seglinkedit */ + /* option to ld(1) for MH_EXECUTE and */ + /* FVMLIB file types only */ + +#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */ + +#define SEG_IMPORT "__IMPORT" /* the segment for the self (dyld) */ + /* modifing code stubs that has read, */ + /* write and execute permissions */ + +/* + * Fixed virtual memory shared libraries are identified by two things. The + * target pathname (the name of the library as found for execution), and the + * minor version number. The address of where the headers are loaded is in + * header_addr. (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib { + union lc_str name; /* library's target pathname */ + uint32_t minor_version; /* library's minor version number */ + uint32_t header_addr; /* library's header address */ +}; + +/* + * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) + * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. + * An object that uses a fixed virtual shared library also contains a + * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. + * (THIS IS OBSOLETE and no longer supported). + */ +struct fvmlib_command { + uint32_t cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct fvmlib fvmlib; /* the library identification */ +}; + +/* + * Dynamicly linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + */ +struct dylib { + union lc_str name; /* library's path name */ + uint32_t timestamp; /* library's build time stamp */ + uint32_t current_version; /* library's current version number */ + uint32_t compatibility_version; /* library's compatibility vers number*/ +}; + +/* + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + */ +struct dylib_command { + uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB, + LC_REEXPORT_DYLIB */ + uint32_t cmdsize; /* includes pathname string */ + struct dylib dylib; /* the library identification */ +}; + +/* + * A dynamically linked shared library may be a subframework of an umbrella + * framework. If so it will be linked with "-umbrella umbrella_name" where + * Where "umbrella_name" is the name of the umbrella framework. A subframework + * can only be linked against by its umbrella framework or other subframeworks + * that are part of the same umbrella framework. Otherwise the static link + * editor produces an error and states to link against the umbrella framework. + * The name of the umbrella framework for subframeworks is recorded in the + * following structure. + */ +struct sub_framework_command { + uint32_t cmd; /* LC_SUB_FRAMEWORK */ + uint32_t cmdsize; /* includes umbrella string */ + union lc_str umbrella; /* the umbrella framework name */ +}; + +/* + * For dynamically linked shared libraries that are subframework of an umbrella + * framework they can allow clients other than the umbrella framework or other + * subframeworks in the same umbrella framework. To do this the subframework + * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load + * command is created for each -allowable_client flag. The client_name is + * usually a framework name. It can also be a name used for bundles clients + * where the bundle is built with "-client_name client_name". + */ +struct sub_client_command { + uint32_t cmd; /* LC_SUB_CLIENT */ + uint32_t cmdsize; /* includes client string */ + union lc_str client; /* the client name */ +}; + +/* + * A dynamically linked shared library may be a sub_umbrella of an umbrella + * framework. If so it will be linked with "-sub_umbrella umbrella_name" where + * Where "umbrella_name" is the name of the sub_umbrella framework. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * umbrella framework will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks to be implicited linked in. Any other + * dependent dynamic libraries will not be linked it when -twolevel_namespace + * is in effect. The primary library recorded by the static linker when + * resolving a symbol in these libraries will be the umbrella framework. + * Zero or more sub_umbrella frameworks may be use by an umbrella framework. + * The name of a sub_umbrella framework is recorded in the following structure. + */ +struct sub_umbrella_command { + uint32_t cmd; /* LC_SUB_UMBRELLA */ + uint32_t cmdsize; /* includes sub_umbrella string */ + union lc_str sub_umbrella; /* the sub_umbrella framework name */ +}; + +/* + * A dynamically linked shared library may be a sub_library of another shared + * library. If so it will be linked with "-sub_library library_name" where + * Where "library_name" is the name of the sub_library shared library. When + * staticly linking when -twolevel_namespace is in effect a twolevel namespace + * shared library will only cause its subframeworks and those frameworks + * listed as sub_umbrella frameworks and libraries listed as sub_libraries to + * be implicited linked in. Any other dependent dynamic libraries will not be + * linked it when -twolevel_namespace is in effect. The primary library + * recorded by the static linker when resolving a symbol in these libraries + * will be the umbrella framework (or dynamic library). Zero or more sub_library + * shared libraries may be use by an umbrella framework or (or dynamic library). + * The name of a sub_library framework is recorded in the following structure. + * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". + */ +struct sub_library_command { + uint32_t cmd; /* LC_SUB_LIBRARY */ + uint32_t cmdsize; /* includes sub_library string */ + union lc_str sub_library; /* the sub_library name */ +}; + +/* + * A program (filetype == MH_EXECUTE) that is + * prebound to its dynamic libraries has one of these for each library that + * the static linker used in prebinding. It contains a bit vector for the + * modules in the library. The bits indicate which modules are bound (1) and + * which are not (0) from the library. The bit for module 0 is the low bit + * of the first byte. So the bit for the Nth module is: + * (linked_modules[N/8] >> N%8) & 1 + */ +struct prebound_dylib_command { + uint32_t cmd; /* LC_PREBOUND_DYLIB */ + uint32_t cmdsize; /* includes strings */ + union lc_str name; /* library's path name */ + uint32_t nmodules; /* number of modules in library */ + union lc_str linked_modules; /* bit vector of linked modules */ +}; + +/* + * A program that uses a dynamic linker contains a dylinker_command to identify + * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker + * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). + * A file can have at most one of these. + */ +struct dylinker_command { + uint32_t cmd; /* LC_ID_DYLINKER or LC_LOAD_DYLINKER */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* dynamic linker's path name */ +}; + +/* + * Thread commands contain machine-specific data structures suitable for + * use in the thread state primitives. The machine specific data structures + * follow the struct thread_command as follows. + * Each flavor of machine specific data structure is preceded by an unsigned + * long constant for the flavor of that data structure, an uint32_t + * that is the count of longs of the size of the state data structure and then + * the state data structure follows. This triple may be repeated for many + * flavors. The constants for the flavors, counts and state data structure + * definitions are expected to be in the header file . + * These machine specific data structures sizes must be multiples of + * 4 bytes The cmdsize reflects the total size of the thread_command + * and all of the sizes of the constants for the flavors, counts and state + * data structures. + * + * For executable objects that are unix processes there will be one + * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. + * This is the same as a LC_THREAD, except that a stack is automatically + * created (based on the shell's limit for the stack size). Command arguments + * and environment variables are copied onto that stack. + */ +struct thread_command { + uint32_t cmd; /* LC_THREAD or LC_UNIXTHREAD */ + uint32_t cmdsize; /* total size of this command */ + /* uint32_t flavor flavor of thread state */ + /* uint32_t count count of longs in thread state */ + /* struct XXX_thread_state state thread state for this flavor */ + /* ... */ +}; + +/* + * The routines command contains the address of the dynamic shared library + * initialization routine and an index into the module table for the module + * that defines the routine. Before any modules are used from the library the + * dynamic linker fully binds the module that defines the initialization routine + * and then calls it. This gets called before any module initialization + * routines (used for C++ static constructors) in the library. + */ +struct routines_command { /* for 32-bit architectures */ + uint32_t cmd; /* LC_ROUTINES */ + uint32_t cmdsize; /* total size of this command */ + uint32_t init_address; /* address of initialization routine */ + uint32_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +/* + * The 64-bit routines command. Same use as above. + */ +struct routines_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_ROUTINES_64 */ + uint32_t cmdsize; /* total size of this command */ + uint64_t init_address; /* address of initialization routine */ + uint64_t init_module; /* index into the module table that */ + /* the init routine is defined in */ + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +/* + * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD + * "stab" style symbol table information as described in the header files + * and . + */ +struct symtab_command { + uint32_t cmd; /* LC_SYMTAB */ + uint32_t cmdsize; /* sizeof(struct symtab_command) */ + uint32_t symoff; /* symbol table offset */ + uint32_t nsyms; /* number of symbol table entries */ + uint32_t stroff; /* string table offset */ + uint32_t strsize; /* string table size in bytes */ +}; + +/* + * This is the second set of the symbolic information which is used to support + * the data structures for the dynamically link editor. + * + * The original set of symbolic information in the symtab_command which contains + * the symbol and string tables must also be present when this load command is + * present. When this load command is present the symbol table is organized + * into three groups of symbols: + * local symbols (static and debugging symbols) - grouped by module + * defined external symbols - grouped by module (sorted by name if not lib) + * undefined external symbols (sorted by name if MH_BINDATLOAD is not set, + * and in order the were seen by the static + * linker if MH_BINDATLOAD is set) + * In this load command there are offsets and counts to each of the three groups + * of symbols. + * + * This load command contains a the offsets and sizes of the following new + * symbolic information tables: + * table of contents + * module table + * reference symbol table + * indirect symbol table + * The first three tables above (the table of contents, module table and + * reference symbol table) are only present if the file is a dynamically linked + * shared library. For executable and object modules, which are files + * containing only one module, the information that would be in these three + * tables is determined as follows: + * table of contents - the defined external symbols are sorted by name + * module table - the file contains only one module so everything in the + * file is part of the module. + * reference symbol table - is the defined and undefined external symbols + * + * For dynamically linked shared library files this load command also contains + * offsets and sizes to the pool of relocation entries for all sections + * separated into two groups: + * external relocation entries + * local relocation entries + * For executable and object modules the relocation entries continue to hang + * off the section structures. + */ +struct dysymtab_command { + uint32_t cmd; /* LC_DYSYMTAB */ + uint32_t cmdsize; /* sizeof(struct dysymtab_command) */ + + /* + * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command + * are grouped into the following three groups: + * local symbols (further grouped by the module they are from) + * defined external symbols (further grouped by the module they are from) + * undefined symbols + * + * The local symbols are used only for debugging. The dynamic binding + * process may have to use them to indicate to the debugger the local + * symbols for a module that is being bound. + * + * The last two groups are used by the dynamic binding process to do the + * binding (indirectly through the module table and the reference symbol + * table when this is a dynamically linked shared library file). + */ + uint32_t ilocalsym; /* index to local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextdefsym;/* index to externally defined symbols */ + uint32_t nextdefsym;/* number of externally defined symbols */ + + uint32_t iundefsym; /* index to undefined symbols */ + uint32_t nundefsym; /* number of undefined symbols */ + + /* + * For the for the dynamic binding process to find which module a symbol + * is defined in the table of contents is used (analogous to the ranlib + * structure in an archive) which maps defined external symbols to modules + * they are defined in. This exists only in a dynamically linked shared + * library file. For executable and object modules the defined external + * symbols are sorted by name and is use as the table of contents. + */ + uint32_t tocoff; /* file offset to table of contents */ + uint32_t ntoc; /* number of entries in table of contents */ + + /* + * To support dynamic binding of "modules" (whole object files) the symbol + * table must reflect the modules that the file was created from. This is + * done by having a module table that has indexes and counts into the merged + * tables for each module. The module structure that these two entries + * refer to is described below. This exists only in a dynamically linked + * shared library file. For executable and object modules the file only + * contains one module so everything in the file belongs to the module. + */ + uint32_t modtaboff; /* file offset to module table */ + uint32_t nmodtab; /* number of module table entries */ + + /* + * To support dynamic module binding the module structure for each module + * indicates the external references (defined and undefined) each module + * makes. For each module there is an offset and a count into the + * reference symbol table for the symbols that the module references. + * This exists only in a dynamically linked shared library file. For + * executable and object modules the defined external symbols and the + * undefined external symbols indicates the external references. + */ + uint32_t extrefsymoff; /* offset to referenced symbol table */ + uint32_t nextrefsyms; /* number of referenced symbol table entries */ + + /* + * The sections that contain "symbol pointers" and "routine stubs" have + * indexes and (implied counts based on the size of the section and fixed + * size of the entry) into the "indirect symbol" table for each pointer + * and stub. For every section of these two types the index into the + * indirect symbol table is stored in the section header in the field + * reserved1. An indirect symbol table entry is simply a 32bit index into + * the symbol table to the symbol that the pointer or stub is referring to. + * The indirect symbol table is ordered to match the entries in the section. + */ + uint32_t indirectsymoff; /* file offset to the indirect symbol table */ + uint32_t nindirectsyms; /* number of indirect symbol table entries */ + + /* + * To support relocating an individual module in a library file quickly the + * external relocation entries for each module in the library need to be + * accessed efficiently. Since the relocation entries can't be accessed + * through the section headers for a library file they are separated into + * groups of local and external entries further grouped by module. In this + * case the presents of this load command who's extreloff, nextrel, + * locreloff and nlocrel fields are non-zero indicates that the relocation + * entries of non-merged sections are not referenced through the section + * structures (and the reloff and nreloc fields in the section headers are + * set to zero). + * + * Since the relocation entries are not accessed through the section headers + * this requires the r_address field to be something other than a section + * offset to identify the item to be relocated. In this case r_address is + * set to the offset from the vmaddr of the first LC_SEGMENT command. + * For MH_SPLIT_SEGS images r_address is set to the the offset from the + * vmaddr of the first read-write LC_SEGMENT command. + * + * The relocation entries are grouped by module and the module table + * entries have indexes and counts into them for the group of external + * relocation entries for that the module. + * + * For sections that are merged across modules there must not be any + * remaining external relocation entries for them (for merged sections + * remaining relocation entries must be local). + */ + uint32_t extreloff; /* offset to external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + /* + * All the local relocation entries are grouped together (they are not + * grouped by their module since they are only used if the object is moved + * from it staticly link edited address). + */ + uint32_t locreloff; /* offset to local relocation entries */ + uint32_t nlocrel; /* number of local relocation entries */ + +}; + +/* + * An indirect symbol table entry is simply a 32bit index into the symbol table + * to the symbol that the pointer or stub is refering to. Unless it is for a + * non-lazy symbol pointer section for a defined symbol which strip(1) as + * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the + * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. + */ +#define INDIRECT_SYMBOL_LOCAL 0x80000000 +#define INDIRECT_SYMBOL_ABS 0x40000000 + + +/* a table of contents entry */ +struct dylib_table_of_contents { + uint32_t symbol_index; /* the defined external symbol + (index into the symbol table) */ + uint32_t module_index; /* index into the module table this symbol + is defined in */ +}; + +/* a module table entry */ +struct dylib_module { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ +}; + +/* a 64-bit module table entry */ +struct dylib_module_64 { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module size of */ + objc_module_info_size; /* the (__OBJC,__module_info) section */ + uint64_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ +}; + +/* + * The entries in the reference symbol table are used when loading the module + * (both by the static and dynamic link editors) and if the module is unloaded + * or replaced. Therefore all external symbols (defined and undefined) are + * listed in the module's reference table. The flags describe the type of + * reference that is being made. The constants for the flags are defined in + * as they are also used for symbol table entries. + */ +struct dylib_reference { + uint32_t isym:24, /* index into the symbol table */ + flags:8; /* flags to indicate the type of reference */ +}; + +/* + * The twolevel_hints_command contains the offset and number of hints in the + * two-level namespace lookup hints table. + */ +struct twolevel_hints_command { + uint32_t cmd; /* LC_TWOLEVEL_HINTS */ + uint32_t cmdsize; /* sizeof(struct twolevel_hints_command) */ + uint32_t offset; /* offset to the hint table */ + uint32_t nhints; /* number of hints in the hint table */ +}; + +/* + * The entries in the two-level namespace lookup hints table are twolevel_hint + * structs. These provide hints to the dynamic link editor where to start + * looking for an undefined symbol in a two-level namespace image. The + * isub_image field is an index into the sub-images (sub-frameworks and + * sub-umbrellas list) that made up the two-level image that the undefined + * symbol was found in when it was built by the static link editor. If + * isub-image is 0 the the symbol is expected to be defined in library and not + * in the sub-images. If isub-image is non-zero it is an index into the array + * of sub-images for the umbrella with the first index in the sub-images being + * 1. The array of sub-images is the ordered list of sub-images of the umbrella + * that would be searched for a symbol that has the umbrella recorded as its + * primary library. The table of contents index is an index into the + * library's table of contents. This is used as the starting point of the + * binary search or a directed linear search. + */ +struct twolevel_hint { + uint32_t + isub_image:8, /* index into the sub images */ + itoc:24; /* index into the table of contents */ +}; + +/* + * The prebind_cksum_command contains the value of the original check sum for + * prebound files or zero. When a prebound file is first created or modified + * for other than updating its prebinding information the value of the check sum + * is set to zero. When the file has it prebinding re-done and if the value of + * the check sum is zero the original check sum is calculated and stored in + * cksum field of this load command in the output file. If when the prebinding + * is re-done and the cksum field is non-zero it is left unchanged from the + * input file. + */ +struct prebind_cksum_command { + uint32_t cmd; /* LC_PREBIND_CKSUM */ + uint32_t cmdsize; /* sizeof(struct prebind_cksum_command) */ + uint32_t cksum; /* the check sum or zero */ +}; + +/* + * The uuid load command contains a single 128-bit unique random number that + * identifies an object produced by the static link editor. + */ +struct uuid_command { + uint32_t cmd; /* LC_UUID */ + uint32_t cmdsize; /* sizeof(struct uuid_command) */ + uint8_t uuid[16]; /* the 128-bit uuid */ +}; + +/* + * The rpath_command contains a path which at runtime should be added to + * the current run path used to find @rpath prefixed dylibs. + */ +struct rpath_command { + uint32_t cmd; /* LC_RPATH */ + uint32_t cmdsize; /* includes string */ + union lc_str path; /* path to add to run path */ +}; + +/* + * The linkedit_data_command contains the offsets and sizes of a blob + * of data in the __LINKEDIT segment. + */ +struct linkedit_data_command { + uint32_t cmd; /* LC_CODE_SIGNATURE or LC_SEGMENT_SPLIT_INFO */ + uint32_t cmdsize; /* sizeof(struct linkedit_data_command) */ + uint32_t dataoff; /* file offset of data in __LINKEDIT segment */ + uint32_t datasize; /* file size of data in __LINKEDIT segment */ +}; + +/* + * The encryption_info_command contains the file offset and size of an + * of an encrypted segment. + */ +struct encryption_info_command { + uint32_t cmd; /* LC_ENCRYPTION_INFO */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ +}; + +/* + * The dyld_info_command contains the file offsets and sizes of + * the new compressed form of the information dyld needs to + * load the image. This information is used by dyld on Mac OS X + * 10.6 and later. All information pointed to by this command + * is encoded using byte streams, so no endian swapping is needed + * to interpret it. + */ +struct dyld_info_command { + uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */ + uint32_t cmdsize; /* sizeof(struct dyld_info_command) */ + + /* + * Dyld rebases an image whenever dyld loads it at an address different + * from its preferred address. The rebase information is a stream + * of byte sized opcodes whose symbolic names start with REBASE_OPCODE_. + * Conceptually the rebase information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like "every n'th offset for m times" can be encoded in a few + * bytes. + */ + uint32_t rebase_off; /* file offset to rebase info */ + uint32_t rebase_size; /* size of rebase info */ + + /* + * Dyld binds an image during the loading process, if the image + * requires any pointers to be initialized to symbols in other images. + * The rebase information is a stream of byte sized + * opcodes whose symbolic names start with BIND_OPCODE_. + * Conceptually the bind information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like for runs of pointers initialzed to the same value can be + * encoded in a few bytes. + */ + uint32_t bind_off; /* file offset to binding info */ + uint32_t bind_size; /* size of binding info */ + + /* + * Some C++ programs require dyld to unique symbols so that all + * images in the process use the same copy of some code/data. + * This step is done after binding. The content of the weak_bind + * info is an opcode stream like the bind_info. But it is sorted + * alphabetically by symbol name. This enable dyld to walk + * all images with weak binding information in order and look + * for collisions. If there are no collisions, dyld does + * no updating. That means that some fixups are also encoded + * in the bind_info. For instance, all calls to "operator new" + * are first bound to libstdc++.dylib using the information + * in bind_info. Then if some image overrides operator new + * that is detected when the weak_bind information is processed + * and the call to operator new is then rebound. + */ + uint32_t weak_bind_off; /* file offset to weak binding info */ + uint32_t weak_bind_size; /* size of weak binding info */ + + /* + * Some uses of external symbols do not need to be bound immediately. + * Instead they can be lazily bound on first use. The lazy_bind + * are contains a stream of BIND opcodes to bind all lazy symbols. + * Normal use is that dyld ignores the lazy_bind section when + * loading an image. Instead the static linker arranged for the + * lazy pointer to initially point to a helper function which + * pushes the offset into the lazy_bind area for the symbol + * needing to be bound, then jumps to dyld which simply adds + * the offset to lazy_bind_off to get the information on what + * to bind. + */ + uint32_t lazy_bind_off; /* file offset to lazy binding info */ + uint32_t lazy_bind_size; /* size of lazy binding infs */ + + /* + * The symbols exported by a dylib are encoded in a trie. This + * is a compact representation that factors out common prefixes. + * It also reduces LINKEDIT pages in RAM because it encodes all + * information (name, address, flags) in one small, contiguous range. + * The export area is a stream of nodes. The first node sequentially + * is the start node for the trie. + * + * Nodes for a symbol start with a byte that is the length of + * the exported symbol information for the string so far. + * If there is no exported symbol, the byte is zero. If there + * is exported info, it follows the length byte. The exported + * info normally consists of a flags and offset both encoded + * in uleb128. The offset is location of the content named + * by the symbol. It is the offset from the mach_header for + * the image. + * + * After the initial byte and optional exported symbol information + * is a byte of how many edges (0-255) that this node has leaving + * it, followed by each edge. + * Each edge is a zero terminated cstring of the addition chars + * in the symbol, followed by a uleb128 offset for the node that + * edge points to. + * + */ + uint32_t export_off; /* file offset to lazy binding info */ + uint32_t export_size; /* size of lazy binding infs */ +}; + +/* + * The following are used to encode rebasing information + */ +#define REBASE_TYPE_POINTER 1 +#define REBASE_TYPE_TEXT_ABSOLUTE32 2 +#define REBASE_TYPE_TEXT_PCREL32 3 + +#define REBASE_OPCODE_MASK 0xF0 +#define REBASE_IMMEDIATE_MASK 0x0F +#define REBASE_OPCODE_DONE 0x00 +#define REBASE_OPCODE_SET_TYPE_IMM 0x10 +#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20 +#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30 +#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40 +#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60 +#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80 + + +/* + * The following are used to encode binding information + */ +#define BIND_TYPE_POINTER 1 +#define BIND_TYPE_TEXT_ABSOLUTE32 2 +#define BIND_TYPE_TEXT_PCREL32 3 + +#define BIND_SPECIAL_DYLIB_SELF 0 +#define BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE -1 +#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2 + +#define BIND_SYMBOL_FLAGS_WEAK_IMPORT 0x1 +#define BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION 0x8 + +#define BIND_OPCODE_MASK 0xF0 +#define BIND_IMMEDIATE_MASK 0x0F +#define BIND_OPCODE_DONE 0x00 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20 +#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30 +#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40 +#define BIND_OPCODE_SET_TYPE_IMM 0x50 +#define BIND_OPCODE_SET_ADDEND_SLEB 0x60 +#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70 +#define BIND_OPCODE_ADD_ADDR_ULEB 0x80 +#define BIND_OPCODE_DO_BIND 0x90 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0 +#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0 + + +/* + * The following are used on the flags byte of a terminal node + * in the export information. + */ +#define EXPORT_SYMBOL_FLAGS_KIND_MASK 0x03 +#define EXPORT_SYMBOL_FLAGS_KIND_REGULAR 0x00 +#define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL 0x01 +#define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION 0x04 +#define EXPORT_SYMBOL_FLAGS_INDIRECT_DEFINITION 0x08 +#define EXPORT_SYMBOL_FLAGS_HAS_SPECIALIZATIONS 0x10 + +/* + * The symseg_command contains the offset and size of the GNU style + * symbol table information as described in the header file . + * The symbol roots of the symbol segments must also be aligned properly + * in the file. So the requirement of keeping the offsets aligned to a + * multiple of a 4 bytes translates to the length field of the symbol + * roots also being a multiple of a long. Also the padding must again be + * zeroed. (THIS IS OBSOLETE and no longer supported). + */ +struct symseg_command { + uint32_t cmd; /* LC_SYMSEG */ + uint32_t cmdsize; /* sizeof(struct symseg_command) */ + uint32_t offset; /* symbol segment offset */ + uint32_t size; /* symbol segment size in bytes */ +}; + +/* + * The ident_command contains a free format string table following the + * ident_command structure. The strings are null terminated and the size of + * the command is padded out with zero bytes to a multiple of 4 bytes/ + * (THIS IS OBSOLETE and no longer supported). + */ +struct ident_command { + uint32_t cmd; /* LC_IDENT */ + uint32_t cmdsize; /* strings that follow this command */ +}; + +/* + * The fvmfile_command contains a reference to a file to be loaded at the + * specified virtual address. (Presently, this command is reserved for + * internal use. The kernel ignores this command when loading a program into + * memory). + */ +struct fvmfile_command { + uint32_t cmd; /* LC_FVMFILE */ + uint32_t cmdsize; /* includes pathname string */ + union lc_str name; /* files pathname */ + uint32_t header_addr; /* files virtual address */ +}; + +#endif /* _MACHO_LOADER_H_ */ diff --git a/include/mach/machine.h b/include/mach/machine.h new file mode 100644 index 0000000..53e97ab --- /dev/null +++ b/include/mach/machine.h @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* File: machine.h + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Machine independent machine abstraction. + */ + +#ifndef _MACH_MACHINE_H_ +#define _MACH_MACHINE_H_ + +#include +//#include +//#include + +typedef uint32_t integer_t; + +typedef integer_t cpu_type_t; +typedef integer_t cpu_subtype_t; +typedef integer_t cpu_threadtype_t; + +#define CPU_STATE_MAX 4 + +#define CPU_STATE_USER 0 +#define CPU_STATE_SYSTEM 1 +#define CPU_STATE_IDLE 2 +#define CPU_STATE_NICE 3 + + + +/* + * Capability bits used in the definition of cpu_type. + */ +#define CPU_ARCH_MASK 0xff000000 /* mask for architecture bits */ +#define CPU_ARCH_ABI64 0x01000000 /* 64 bit ABI */ + +/* + * Machine types known by all. + */ + +#define CPU_TYPE_ANY ((cpu_type_t) -1) + +#define CPU_TYPE_VAX ((cpu_type_t) 1) +/* skip ((cpu_type_t) 2) */ +/* skip ((cpu_type_t) 3) */ +/* skip ((cpu_type_t) 4) */ +/* skip ((cpu_type_t) 5) */ +#define CPU_TYPE_MC680x0 ((cpu_type_t) 6) +#define CPU_TYPE_X86 ((cpu_type_t) 7) +#define CPU_TYPE_I386 CPU_TYPE_X86 /* compatibility */ +#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64) + +/* skip CPU_TYPE_MIPS ((cpu_type_t) 8) */ +/* skip ((cpu_type_t) 9) */ +#define CPU_TYPE_MC98000 ((cpu_type_t) 10) +#define CPU_TYPE_HPPA ((cpu_type_t) 11) +#define CPU_TYPE_ARM ((cpu_type_t) 12) +#define CPU_TYPE_MC88000 ((cpu_type_t) 13) +#define CPU_TYPE_SPARC ((cpu_type_t) 14) +#define CPU_TYPE_I860 ((cpu_type_t) 15) +/* skip CPU_TYPE_ALPHA ((cpu_type_t) 16) */ +/* skip ((cpu_type_t) 17) */ +#define CPU_TYPE_POWERPC ((cpu_type_t) 18) +#define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64) + +/* + * Machine subtypes (these are defined here, instead of in a machine + * dependent directory, so that any program can get all definitions + * regardless of where is it compiled). + */ + +/* + * Capability bits used in the definition of cpu_subtype. + */ +#define CPU_SUBTYPE_MASK 0xff000000 /* mask for feature flags */ +#define CPU_SUBTYPE_LIB64 0x80000000 /* 64 bit libraries */ + + +/* + * Object files that are hand-crafted to run on any + * implementation of an architecture are tagged with + * CPU_SUBTYPE_MULTIPLE. This functions essentially the same as + * the "ALL" subtype of an architecture except that it allows us + * to easily find object files that may need to be modified + * whenever a new implementation of an architecture comes out. + * + * It is the responsibility of the implementor to make sure the + * software handles unsupported implementations elegantly. + */ +#define CPU_SUBTYPE_MULTIPLE ((cpu_subtype_t) -1) +#define CPU_SUBTYPE_LITTLE_ENDIAN ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_BIG_ENDIAN ((cpu_subtype_t) 1) + +/* + * Machine threadtypes. + * This is none - not defined - for most machine types/subtypes. + */ +#define CPU_THREADTYPE_NONE ((cpu_threadtype_t) 0) + +/* + * VAX subtypes (these do *not* necessary conform to the actual cpu + * ID assigned by DEC available via the SID register). + */ + +#define CPU_SUBTYPE_VAX_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_VAX780 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_VAX785 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_VAX750 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_VAX730 ((cpu_subtype_t) 4) +#define CPU_SUBTYPE_UVAXI ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_UVAXII ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_VAX8200 ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_VAX8500 ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_VAX8600 ((cpu_subtype_t) 9) +#define CPU_SUBTYPE_VAX8650 ((cpu_subtype_t) 10) +#define CPU_SUBTYPE_VAX8800 ((cpu_subtype_t) 11) +#define CPU_SUBTYPE_UVAXIII ((cpu_subtype_t) 12) + +/* + * 680x0 subtypes + * + * The subtype definitions here are unusual for historical reasons. + * NeXT used to consider 68030 code as generic 68000 code. For + * backwards compatability: + * + * CPU_SUBTYPE_MC68030 symbol has been preserved for source code + * compatability. + * + * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same + * subtype as CPU_SUBTYPE_MC68030 for binary comatability. + * + * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object + * files to be tagged as containing 68030-specific instructions. + */ + +#define CPU_SUBTYPE_MC680x0_ALL ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MC68030 ((cpu_subtype_t) 1) /* compat */ +#define CPU_SUBTYPE_MC68040 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_MC68030_ONLY ((cpu_subtype_t) 3) + +/* + * I386 subtypes + */ + +#define CPU_SUBTYPE_INTEL(f, m) ((cpu_subtype_t) (f) + ((m) << 4)) + +#define CPU_SUBTYPE_I386_ALL CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_386 CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_486 CPU_SUBTYPE_INTEL(4, 0) +#define CPU_SUBTYPE_486SX CPU_SUBTYPE_INTEL(4, 8) // 8 << 4 = 128 +#define CPU_SUBTYPE_586 CPU_SUBTYPE_INTEL(5, 0) +#define CPU_SUBTYPE_PENT CPU_SUBTYPE_INTEL(5, 0) +#define CPU_SUBTYPE_PENTPRO CPU_SUBTYPE_INTEL(6, 1) +#define CPU_SUBTYPE_PENTII_M3 CPU_SUBTYPE_INTEL(6, 3) +#define CPU_SUBTYPE_PENTII_M5 CPU_SUBTYPE_INTEL(6, 5) +#define CPU_SUBTYPE_CELERON CPU_SUBTYPE_INTEL(7, 6) +#define CPU_SUBTYPE_CELERON_MOBILE CPU_SUBTYPE_INTEL(7, 7) +#define CPU_SUBTYPE_PENTIUM_3 CPU_SUBTYPE_INTEL(8, 0) +#define CPU_SUBTYPE_PENTIUM_3_M CPU_SUBTYPE_INTEL(8, 1) +#define CPU_SUBTYPE_PENTIUM_3_XEON CPU_SUBTYPE_INTEL(8, 2) +#define CPU_SUBTYPE_PENTIUM_M CPU_SUBTYPE_INTEL(9, 0) +#define CPU_SUBTYPE_PENTIUM_4 CPU_SUBTYPE_INTEL(10, 0) +#define CPU_SUBTYPE_PENTIUM_4_M CPU_SUBTYPE_INTEL(10, 1) +#define CPU_SUBTYPE_ITANIUM CPU_SUBTYPE_INTEL(11, 0) +#define CPU_SUBTYPE_ITANIUM_2 CPU_SUBTYPE_INTEL(11, 1) +#define CPU_SUBTYPE_XEON CPU_SUBTYPE_INTEL(12, 0) +#define CPU_SUBTYPE_XEON_MP CPU_SUBTYPE_INTEL(12, 1) + +#define CPU_SUBTYPE_INTEL_FAMILY(x) ((x) & 15) +#define CPU_SUBTYPE_INTEL_FAMILY_MAX 15 + +#define CPU_SUBTYPE_INTEL_MODEL(x) ((x) >> 4) +#define CPU_SUBTYPE_INTEL_MODEL_ALL 0 + +/* + * X86 subtypes. + */ + +#define CPU_SUBTYPE_X86_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_64_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_ARCH1 ((cpu_subtype_t)4) + + +#define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1) + +/* + * Mips subtypes. + */ + +#define CPU_SUBTYPE_MIPS_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MIPS_R2300 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MIPS_R2600 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_MIPS_R2800 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_MIPS_R2000a ((cpu_subtype_t) 4) /* pmax */ +#define CPU_SUBTYPE_MIPS_R2000 ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_MIPS_R3000a ((cpu_subtype_t) 6) /* 3max */ +#define CPU_SUBTYPE_MIPS_R3000 ((cpu_subtype_t) 7) + +/* + * MC98000 (PowerPC) subtypes + */ +#define CPU_SUBTYPE_MC98000_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MC98601 ((cpu_subtype_t) 1) + +/* + * HPPA subtypes for Hewlett-Packard HP-PA family of + * risc processors. Port by NeXT to 700 series. + */ + +#define CPU_SUBTYPE_HPPA_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_HPPA_7100 ((cpu_subtype_t) 0) /* compat */ +#define CPU_SUBTYPE_HPPA_7100LC ((cpu_subtype_t) 1) + +/* + * MC88000 subtypes. + */ +#define CPU_SUBTYPE_MC88000_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_MC88100 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_MC88110 ((cpu_subtype_t) 2) + +/* + * SPARC subtypes + */ +#define CPU_SUBTYPE_SPARC_ALL ((cpu_subtype_t) 0) + +/* + * I860 subtypes + */ +#define CPU_SUBTYPE_I860_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_I860_860 ((cpu_subtype_t) 1) + +/* + * PowerPC subtypes + */ +#define CPU_SUBTYPE_POWERPC_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_POWERPC_601 ((cpu_subtype_t) 1) +#define CPU_SUBTYPE_POWERPC_602 ((cpu_subtype_t) 2) +#define CPU_SUBTYPE_POWERPC_603 ((cpu_subtype_t) 3) +#define CPU_SUBTYPE_POWERPC_603e ((cpu_subtype_t) 4) +#define CPU_SUBTYPE_POWERPC_603ev ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_POWERPC_604 ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_POWERPC_604e ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_POWERPC_620 ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_POWERPC_750 ((cpu_subtype_t) 9) +#define CPU_SUBTYPE_POWERPC_7400 ((cpu_subtype_t) 10) +#define CPU_SUBTYPE_POWERPC_7450 ((cpu_subtype_t) 11) +#define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100) + +/* + * ARM subtypes + */ +#define CPU_SUBTYPE_ARM_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_ARM_V4T ((cpu_subtype_t) 5) +#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6) +#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7) +#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8) +#define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9) + +/* + * CPU families (sysctl hw.cpufamily) + * + * These are meant to identify the CPU's marketing name - an + * application can map these to (possibly) localized strings. + * NB: the encodings of the CPU families are intentionally arbitrary. + * There is no ordering, and you should never try to deduce whether + * or not some feature is available based on the family. + * Use feature flags (eg, hw.optional.altivec) to test for optional + * functionality. + */ +#define CPUFAMILY_UNKNOWN 0 +#define CPUFAMILY_POWERPC_G3 0xcee41549 +#define CPUFAMILY_POWERPC_G4 0x77c184ae +#define CPUFAMILY_POWERPC_G5 0xed76d8aa +#define CPUFAMILY_INTEL_6_13 0xaa33392b +#define CPUFAMILY_INTEL_6_14 0x73d67300 /* "Intel Core Solo" and "Intel Core Duo" (32-bit Pentium-M with SSE3) */ +#define CPUFAMILY_INTEL_6_15 0x426f69ef /* "Intel Core 2 Duo" */ +#define CPUFAMILY_INTEL_6_23 0x78ea4fbc /* Penryn */ +#define CPUFAMILY_INTEL_6_26 0x6b5a4cd2 /* Nehalem */ +#define CPUFAMILY_ARM_9 0xe73283ae +#define CPUFAMILY_ARM_11 0x8ff620d8 +#define CPUFAMILY_ARM_XSCALE 0x53b005f5 +#define CPUFAMILY_ARM_13 0x0cc90e64 + +#define CPUFAMILY_INTEL_YONAH CPUFAMILY_INTEL_6_14 +#define CPUFAMILY_INTEL_MEROM CPUFAMILY_INTEL_6_15 +#define CPUFAMILY_INTEL_PENRYN CPUFAMILY_INTEL_6_23 +#define CPUFAMILY_INTEL_NEHALEM CPUFAMILY_INTEL_6_26 + +#define CPUFAMILY_INTEL_CORE CPUFAMILY_INTEL_6_14 +#define CPUFAMILY_INTEL_CORE2 CPUFAMILY_INTEL_6_15 + +#endif /* _MACH_MACHINE_H_ */ diff --git a/include/mach/vm_prot.h b/include/mach/vm_prot.h new file mode 100644 index 0000000..eed5107 --- /dev/null +++ b/include/mach/vm_prot.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: mach/vm_prot.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * + * Virtual memory protection definitions. + * + */ + +#ifndef _MACH_VM_PROT_H_ +#define _MACH_VM_PROT_H_ + +/* + * Types defined: + * + * vm_prot_t VM protection values. + */ + +typedef int vm_prot_t; + +/* + * Protection values, defined as bits within the vm_prot_t type + */ + +#define VM_PROT_NONE ((vm_prot_t) 0x00) + +#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */ +#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */ +#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */ + +/* + * The default protection for newly-created virtual memory + */ + +#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE) + +/* + * The maximum privileges possible, for parameter checking. + */ + +#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) + +/* + * An invalid protection value. + * Used only by memory_object_lock_request to indicate no change + * to page locks. Using -1 here is a bad idea because it + * looks like VM_PROT_ALL and then some. + */ + +#define VM_PROT_NO_CHANGE ((vm_prot_t) 0x08) + +/* + * When a caller finds that he cannot obtain write permission on a + * mapped entry, the following flag can be used. The entry will + * be made "needs copy" effectively copying the object (using COW), + * and write permission will be added to the maximum protections + * for the associated entry. + */ + +#define VM_PROT_COPY ((vm_prot_t) 0x10) + + +/* + * Another invalid protection value. + * Used only by memory_object_data_request upon an object + * which has specified a copy_call copy strategy. It is used + * when the kernel wants a page belonging to a copy of the + * object, and is only asking the object as a result of + * following a shadow chain. This solves the race between pages + * being pushed up by the memory manager and the kernel + * walking down the shadow chain. + */ + +#define VM_PROT_WANTS_COPY ((vm_prot_t) 0x10) + + +#endif /* _MACH_VM_PROT_H_ */ diff --git a/include/runetype.h b/include/runetype.h new file mode 100644 index 0000000..9458b04 --- /dev/null +++ b/include/runetype.h @@ -0,0 +1,140 @@ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)runetype.h 8.1 (Berkeley) 6/2/93 + */ + +#ifndef _RUNETYPE_H_ +#define _RUNETYPE_H_ + +#include <_types.h> +#include + +typedef wchar_t __darwin_rune_t; +typedef size_t __darwin_size_t; + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) + +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +#ifndef _CT_RUNE_T +#define _CT_RUNE_T +typedef __darwin_ct_rune_t ct_rune_t; +#endif + +#ifndef _RUNE_T +#define _RUNE_T +typedef __darwin_rune_t rune_t; +#endif + +#ifndef __cplusplus +#ifndef _WCHAR_T +#define _WCHAR_T +typedef __darwin_wchar_t wchar_t; +#endif /* _WCHAR_T */ +#endif /* __cplusplus */ + +#ifndef _WINT_T +#define _WINT_T +typedef __darwin_wint_t wint_t; +#endif + +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +#define _CACHED_RUNES (1 <<8 ) /* Must be a power of 2 */ +#define _CRMASK (~(_CACHED_RUNES - 1)) + +/* + * The lower 8 bits of runetype[] contain the digit value of the rune. + */ +typedef struct { + __darwin_rune_t __min; /* First rune of the range */ + __darwin_rune_t __max; /* Last rune (inclusive) of the range */ + __darwin_rune_t __map; /* What first maps to in maps */ + uint32_t *__types; /* Array of types in range */ +} _RuneEntry; + +typedef struct { + int __nranges; /* Number of ranges stored */ + _RuneEntry *__ranges; /* Pointer to the ranges */ +} _RuneRange; + +typedef struct { + char __name[14]; /* CHARCLASS_NAME_MAX = 14 */ + uint32_t __mask; /* charclass mask */ +} _RuneCharClass; + +typedef struct { + char __magic[8]; /* Magic saying what version we are */ + char __encoding[32]; /* ASCII name of this encoding */ + + __darwin_rune_t (*__sgetrune)(const char *, __darwin_size_t, char const **); + int (*__sputrune)(__darwin_rune_t, char *, __darwin_size_t, char **); + __darwin_rune_t __invalid_rune; + + uint32_t __runetype[_CACHED_RUNES]; + __darwin_rune_t __maplower[_CACHED_RUNES]; + __darwin_rune_t __mapupper[_CACHED_RUNES]; + + /* + * The following are to deal with Runes larger than _CACHED_RUNES - 1. + * Their data is actually contiguous with this structure so as to make + * it easier to read/write from/to disk. + */ + _RuneRange __runetype_ext; + _RuneRange __maplower_ext; + _RuneRange __mapupper_ext; + + void *__variable; /* Data which depends on the encoding */ + int __variable_len; /* how long that data is */ + + /* + * extra fields to deal with arbitrary character classes + */ + int __ncharclasses; + _RuneCharClass *__charclasses; +} _RuneLocale; + +#define _RUNE_MAGIC_A "RuneMagA" /* Indicates version A of RuneLocale */ + +__BEGIN_DECLS +extern _RuneLocale _DefaultRuneLocale; +extern _RuneLocale *_CurrentRuneLocale; +__END_DECLS + +#endif /* !_RUNETYPE_H_ */ diff --git a/ld-mac.cc b/ld-mac.cc new file mode 100644 index 0000000..ad37f0d --- /dev/null +++ b/ld-mac.cc @@ -0,0 +1,526 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// A Mach-O loader for linux. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "mach-o.h" + +#ifdef NOLOG +# define LOG if (0) cout +#else +# define LOG cerr +#endif +#define ERR cerr + +using namespace std; + +class MachO; + +static map g_rename; +static const MachO* g_mach = NULL; +// TODO(hamaji): We might want to control this behavior with a flag. +#ifdef NOLOG +static bool g_use_trampoline = false; +#else +static bool g_use_trampoline = true; +#endif +static vector g_bound_names; +static set g_no_trampoline; + +static void initRename() { +#define RENAME(src, dst) g_rename.insert(make_pair(#src, #dst)); +#define WRAP(src) RENAME(src, __darwin_ ## src) +#include "rename.tab" +#undef RENAME +#undef WRAP +} + +static void initNoTrampoline() { +#define NO_TRAMPOLINE(name) g_no_trampoline.insert(#name); +#include "no_trampoline.tab" +#undef NO_TRAMPOLINE +} + +static void undefinedFunction() { + fprintf(stderr, "Undefined function called\n"); + abort(); +} + +static uint64_t alignMem(uint64_t p, uint64_t a) { + a--; + return (p + a) & ~a; +} + +static void dumpInt(int bound_name_id) { + if (bound_name_id < 0) { + fprintf(stderr, "%d: negative bound function id\n", bound_name_id); + return; + } + if (bound_name_id >= (int)g_bound_names.size()) { + fprintf(stderr, "%d: bound function id overflow\n", bound_name_id); + return; + } + if (!g_bound_names[bound_name_id]) { + fprintf(stderr, "%d: unbound function id\n", bound_name_id); + return; + } + printf("calling %s(%d)\n", g_bound_names[bound_name_id], bound_name_id); + fflush(stdout); +} + +template +struct BitsHelpers { + typedef uint64_t intptr; + typedef segment_command_64 mach_segment; + + static const vector& segments(const MachO& mach) { + return mach.segments64(); + } +}; + +template <> +struct BitsHelpers { + typedef uint32_t intptr; + typedef segment_command mach_segment; + + static const vector& segments(const MachO& mach) { + return mach.segments(); + } +}; + +template +class MachOLoader { + typedef BitsHelpers Helpers; + typedef typename Helpers::intptr intptr; + typedef typename Helpers::mach_segment Segment; + public: + MachOLoader() { + if (g_use_trampoline) { + // Push all arguments into stack. + + // push %rax + pushTrampolineCode(0x50); + // push %rdi + pushTrampolineCode(0x57); + // push %rsi + pushTrampolineCode(0x56); + // push %rdx + pushTrampolineCode(0x52); + // push %rcx + pushTrampolineCode(0x51); + // push %r8 + pushTrampolineCode(0x5041); + // push %r9 + pushTrampolineCode(0x5141); + + // push %xmm0..%xmm7 + for (int i = 0; i < 8; i++) { + // sub $8, %rsp + pushTrampolineCode(0x08ec8348); + + // movq %xmmN, (%rsp) + pushTrampolineCode(0xd60f66); + pushTrampolineCode(4 + i * 8); + pushTrampolineCode(0x24); + } + + // mov %r10, %rdi + pushTrampolineCode(0xd7894c); + + // mov $func, %rdx + pushTrampolineCode(0xba48); + pushTrampolineCode64((unsigned long long)(void*)&dumpInt); + + // call *%rdx + pushTrampolineCode(0xd2ff); + + // pop %xmm7..%xmm0 + for (int i = 7; i >= 0; i--) { + // movq (%rsp), %xmmN + pushTrampolineCode(0x7e0ff3); + pushTrampolineCode(4 + i * 8); + pushTrampolineCode(0x24); + + // add $8, %rsp + pushTrampolineCode(0x08c48348); + } + + // pop %r9 + pushTrampolineCode(0x5941); + // pop %r8 + pushTrampolineCode(0x5841); + // pop %rcx + pushTrampolineCode(0x59); + // pop %rdx + pushTrampolineCode(0x5a); + // pop %rsi + pushTrampolineCode(0x5e); + // pop %rdi + pushTrampolineCode(0x5f); + // pop %rax + pushTrampolineCode(0x58); + + // ret + pushTrampolineCode(0xc3); + } + } + + void load(const MachO& mach, int argc, char** argv, char** envp) { + const vector& segments = Helpers::segments(mach); + for (size_t i = 0; i < segments.size(); i++) { + Segment* seg = segments[i]; + const char* name = seg->segname; + if (!strcmp(name, SEG_PAGEZERO)) { + continue; + } + + LOG << seg->segname << ": " + << "fileoff=" << seg->fileoff + << "vmaddr=" << seg->vmaddr << endl; + + int prot = 0; + if (seg->initprot & VM_PROT_READ) { + prot |= PROT_READ; + } + if (seg->initprot & VM_PROT_WRITE) { + prot |= PROT_WRITE; + } + if (seg->initprot & VM_PROT_EXECUTE) { + prot |= PROT_EXEC; + } + + intptr filesize = alignMem(seg->filesize, 0x1000); + intptr vmsize = seg->vmsize; + void* mapped = mmap((void*)seg->vmaddr, filesize, prot, + MAP_PRIVATE | MAP_FIXED, + mach.fd(), seg->fileoff); + if (mapped == MAP_FAILED) { + perror("mmap failed"); + abort(); + } + + if (vmsize != filesize) { + assert(vmsize > filesize); + void* mapped = mmap((void*)(seg->vmaddr + filesize), + vmsize - filesize, prot, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, + 0, 0); + if (mapped == MAP_FAILED) { + perror("mmap failed"); + abort(); + } + } + } + + unsigned int common_code_size = (unsigned int)trampoline_.size(); + // Ensure that we won't change the address. + trampoline_.reserve(common_code_size + + (1 + 6 + 5 + 10 + 3 + 2 + 1) * mach.binds().size()); + g_bound_names.resize(mach.binds().size()); + for (size_t i = 0; i < mach.binds().size(); i++) { + MachO::Bind* bind = mach.binds()[i]; + if (bind->name[0] != '_') { + LOG << bind->name << ": skipping" << endl; + continue; + } + + if (bind->type == BIND_TYPE_POINTER) { + const char* name = bind->name + 1; + + map::const_iterator found = + g_rename.find(name); + if (found != g_rename.end()) { + LOG << "Applying renaming: " << name + << " => " << found->second.c_str() << endl; + name = found->second.c_str(); + } + + void** ptr = (void**)bind->vmaddr; + void* sym = dlsym(RTLD_DEFAULT, name); + if (!sym) { + ERR << name << ": undefined symbol" << endl; + sym = (void*)&undefinedFunction; + } + + LOG << "bind " << name << ": " + << *ptr << " => " << sym << " @" << ptr << endl; + + if (g_use_trampoline && !g_no_trampoline.count(name)) { + LOG << "Generating trampoline for " << name << "..." << endl; + + *ptr = &trampoline_[0] + trampoline_.size(); + g_bound_names[i] = name; + + // push %rax ; to adjust alignment for sse + pushTrampolineCode(0x50); + + // mov $i, %r10d + pushTrampolineCode(0xba41); + pushTrampolineCode32((unsigned int)i); + + // call &trampoline_[0] + pushTrampolineCode(0xe8); + pushTrampolineCode32((unsigned int)(-4-trampoline_.size())); + + // mov $sym, %r10 + pushTrampolineCode(0xba49); + pushTrampolineCode64((unsigned long long)(void*)sym); + // call *%r10 + pushTrampolineCode(0xd2ff41); + + // pop %r10 + pushTrampolineCode(0x5a41); + + // ret + pushTrampolineCode(0xc3); + } else { + *ptr = sym; + } + } else { + fprintf(stderr, "Unknown bind type: %d\n", bind->type); + abort(); + } + } + + char* trampoline_start_addr = + (char*)(((uintptr_t)&trampoline_[0]) & ~0xfff); + uint64_t trampoline_size = + alignMem(&trampoline_[0] + trampoline_.size() - trampoline_start_addr, + 0x1000); + mprotect(trampoline_start_addr, trampoline_size, + PROT_READ | PROT_WRITE | PROT_EXEC); + + LOG << "booting from " << mach.entry() << "..." << endl; + fflush(stdout); + assert(argc > 0); + boot(mach.entry(), argc, argv, envp); + /* + int (*fp)(int, char**, char**) = + (int(*)(int, char**, char**))mach.entry(); + int ret = fp(argc, argv, envp); + exit(ret); + */ + } + + private: + void boot(uint64_t entry, int argc, char** argv, char** envp); + + void pushTrampolineCode(unsigned int c) { + while (c) { + trampoline_.push_back(c & 255); + c = c >> 8; + } + } + + void pushTrampolineCode64(unsigned long long c) { + for (int i = 0; i < 8; i++) { + trampoline_.push_back(c & 255); + c = c >> 8; + } + } + + void pushTrampolineCode32(unsigned int c) { + for (int i = 0; i < 4; i++) { + trampoline_.push_back(c & 255); + c = c >> 8; + } + } + + string trampoline_; +}; + +template <> +void MachOLoader::boot( + uint64_t entry, int argc, char** argv, char** envp) { + __asm__ volatile(" mov %1, %%eax;\n" + " mov %2, %%rdx;\n" + " push $0;\n" + ".loop64:\n" + " sub $8, %%rdx;\n" + " push (%%rdx);\n" + " dec %%eax;\n" + " jnz .loop64;\n" + " mov %1, %%eax;\n" + " push %%rax;\n" + " jmp *%0;\n" + ::"r"(entry), "r"(argc), "r"(argv + argc), "r"(envp) + :"%rax", "%rdx"); + //fprintf(stderr, "done!\n"); +} + +template <> +void MachOLoader::boot( + uint64_t entry, int argc, char** argv, char** envp) { + __asm__ volatile("" + ::"r"(entry), "r"(argc), "r"(argv), "r"(envp)); +} + +template +void loadMachO(const MachO& mach, int argc, char** argv, char** envp) { + MachOLoader loader; + loader.load(mach, argc, argv, envp); +} + +static void lookupSymbol(const MachO& mach, void* addr, + const char** out_sym, ptrdiff_t* out_diff) { + *out_sym = NULL; + *out_diff = INT_MAX; + for (size_t i = 0; i < mach.binds().size(); i++) { + MachO::Bind* bind = mach.binds()[i]; + ptrdiff_t diff = (char*)addr - (char*)bind->vmaddr; + if (diff >= 0 && diff < *out_diff) { + *out_sym = bind->name; + *out_diff = diff; + } + } +} + +#if 0 +static int getBacktrace(void** trace, int max_depth) { + typedef struct frame { + struct frame *bp; + void *ret; + } frame; + + int depth; + frame* bp = (frame*)__builtin_frame_address(0); + for (depth = 0; bp && depth < max_depth; depth++) { + trace[depth] = bp->ret; + bp = bp->bp; + } + return depth; +} +#endif + +/* signal handler for fatal errors */ +static void handleSignal(int signum, siginfo_t* siginfo, void* vuc) { + ucontext_t *uc = (ucontext_t*)vuc; + void* pc = (void*)uc->uc_mcontext.gregs[REG_RIP]; + + fprintf(stderr, "%s(%d) %d (@%p) PC: %p\n\n", + strsignal(signum), signum, siginfo->si_code, siginfo->si_addr, pc); + + void* trace[100]; + int len = backtrace(trace, 99); + //int len = getBacktrace(trace, 99); + char** syms = backtrace_symbols(trace, len); + for (int i = 0; i < len; i++) { + if (syms[i] && syms[i][0] != '[') { + fprintf(stderr, "%s\n", syms[i]); + } else { + const char* sym = NULL; + if (g_mach) { + ptrdiff_t diff; + lookupSymbol(*g_mach, trace[i], &sym, &diff); + if (sym) { + fprintf(stderr, "%s(+%ld) %p\n", sym, (long)diff, trace[i]); + } + } + if (!sym) { + fprintf(stderr, "%p\n", trace[i]); + } + } + } +} + +/* Generate a stack backtrace when a CPU exception occurs. */ +static void initSignalHandler() { + struct sigaction sigact; + sigact.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigact.sa_sigaction = handleSignal; + sigemptyset(&sigact.sa_mask); + sigaction(SIGFPE, &sigact, NULL); + sigaction(SIGILL, &sigact, NULL); + sigaction(SIGSEGV, &sigact, NULL); + sigaction(SIGBUS, &sigact, NULL); + sigaction(SIGABRT, &sigact, NULL); +} + +int main(int argc, char* argv[], char* envp[]) { + initSignalHandler(); + initRename(); + initNoTrampoline(); + + if (!dlopen("libmac.so", RTLD_NOW | RTLD_GLOBAL)) { + if (!dlopen("libmac/libmac.so", RTLD_NOW | RTLD_GLOBAL)) { + fprintf(stderr, "libmac not found\n"); + exit(1); + } + } + + char* loader_path = (char*)dlsym(RTLD_DEFAULT, "__loader_path"); + realpath(argv[0], loader_path); + + argc--; + argv++; + for (;;) { + if (argc == 0) { + fprintf(stderr, "An argument required.\n"); + exit(1); + } + + const char* arg = argv[0]; + if (arg[0] != '-') { + break; + } + + // TODO(hamaji): Do something for switches. + + argc--; + argv++; + } + + char* darwin_executable_path = + (char*)dlsym(RTLD_DEFAULT, "__darwin_executable_path"); + realpath(argv[0], darwin_executable_path); + + MachO mach(argv[0]); + g_mach = &mach; + if (mach.is64()) { + loadMachO(mach, argc, argv, envp); + } else { + loadMachO(mach, argc, argv, envp); + } +} diff --git a/libmac/errno.c b/libmac/errno.c new file mode 100644 index 0000000..9ceedd1 --- /dev/null +++ b/libmac/errno.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved + */ +/* + * The world-renowned global variable + */ +#include + +int *__error(void) { + return &errno; +} + +int cthread_errno(void) { + return *__error(); +} + diff --git a/libmac/mac.c b/libmac/mac.c new file mode 100644 index 0000000..a380fba --- /dev/null +++ b/libmac/mac.c @@ -0,0 +1,730 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// Emulation for functions in Mac. + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef NOLOG +# define LOGF(fmt, ...) if (0) fprintf(stderr, fmt, __VA_ARGS__) +#else +# define LOGF(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) +#endif + +typedef __darwin_rune_t rune_t; +typedef int __darwin_ct_rune_t; /* ct_rune_t */ + +#define _INVALID_RUNE _DefaultRuneLocale.__invalid_rune + +#include "errno.c" +#include "none.c" +#include "popcountdi2.c" +#include "runetable.c" +#include "stack_protector-obsd.c" + +struct __darwin_timespec { + time_t tv_sec; + long tv_nsec; +}; + +typedef int32_t __darwin_dev_t; +typedef uint16_t __darwin_mode_t; +typedef uint16_t __darwin_nlink_t; +typedef uint64_t __darwin_ino64_t; +typedef uint32_t __darwin_uid_t; +typedef uint32_t __darwin_gid_t; + +// From /usr/include/sys/stat.h +typedef struct __darwin_stat64 { + __darwin_dev_t st_dev; + __darwin_mode_t st_mode; + __darwin_nlink_t st_nlink; + __darwin_ino64_t st_ino; + __darwin_uid_t st_uid; + __darwin_gid_t st_gid; + __darwin_dev_t st_rdev; + struct __darwin_timespec st_atimespec; + struct __darwin_timespec st_mtimespec; + struct __darwin_timespec st_ctimespec; + struct __darwin_timespec st_birthtimespec; + off_t st_size; + // TODO(hamaji): the size is not checked after this field. + blkcnt_t st_blocks; + blksize_t st_blksize; + __uint32_t st_flags; + __uint32_t st_gen; + __int32_t st_lspare; + __int64_t st_qspare[2]; +} __darwin_stat64; + +static void __translate_stat(struct stat64* linux_buf, + struct __darwin_stat64* mac) { + // TODO(hamaji): this memset seems to cause overflow... why? + //memset(mac, 0, sizeof(*mac)); + mac->st_dev = linux_buf->st_dev; + mac->st_mode = linux_buf->st_mode; + mac->st_nlink = linux_buf->st_nlink; + mac->st_ino = linux_buf->st_ino; + mac->st_uid = linux_buf->st_uid; + mac->st_gid = linux_buf->st_gid; + mac->st_rdev = linux_buf->st_rdev; + mac->st_size = linux_buf->st_size; + mac->st_blksize = linux_buf->st_blksize; + mac->st_blocks = linux_buf->st_blocks; + mac->st_atimespec.tv_sec = linux_buf->st_atime; + mac->st_mtimespec.tv_sec = linux_buf->st_mtime; + mac->st_ctimespec.tv_sec = linux_buf->st_ctime; +} + +int stat$INODE64(const char* path, struct __darwin_stat64* mac) { + LOGF("stat: path=%s buf=%p\n", path, mac); + struct stat64 linux_buf; + int ret = stat64(path, &linux_buf); + __translate_stat(&linux_buf, mac); + return ret; +} + +int fstat$INODE64(int fd, struct __darwin_stat64* mac) { + LOGF("fstat: fd=%d buf=%p\n", fd, mac); + LOGF("fstat: size_offset=%ld\n", (char*)&mac->st_size - (char*)mac); + fflush(stdout); + struct stat64 linux_buf; + int ret = fstat64(fd, &linux_buf); + __translate_stat(&linux_buf, mac); + return ret; +} + +// From /usr/include/sys/dirent.h +#define __DARWIN_MAXNAMLEN 255 +struct __darwin_dirent { + __darwin_ino64_t d_ino; + uint16_t d_reclen; + uint8_t d_type; + uint8_t d_namlen; + char d_name[__DARWIN_MAXNAMLEN + 1]; +}; + +struct __darwin_dirent* readdir$INODE64(DIR* dirp) { + static struct __darwin_dirent mac; + struct dirent* linux_buf = readdir(dirp); + if (!linux_buf) { + return NULL; + } + mac.d_ino = linux_buf->d_ino; + mac.d_reclen = linux_buf->d_reclen; + mac.d_type = linux_buf->d_type; + mac.d_namlen = strlen(linux_buf->d_name); + strcpy(mac.d_name, linux_buf->d_name); + return &mac; +} + +int __maskrune(__darwin_ct_rune_t _c, unsigned long _f) { + return _DefaultRuneLocale.__runetype[_c & 0xff] & _f; +} + +void libiconv_set_relocation_prefix(const char* orig, const char* curr) { + // TODO: What should we do? + abort(); +} + +// TODO: We need rdtsc. +struct mach_timebase_info { + uint32_t numer; + uint32_t denom; +}; + +uint64_t mach_absolute_time() { + return clock(); +} + +int mach_timebase_info(struct mach_timebase_info* info) { + info->numer = 1; + info->denom = CLOCKS_PER_SEC; + return 0; +} + +void* mach_task_self_; + +// From /usr/include/mach/host_info.h +struct __darwin_host_basic_info { + int max_cpus; + int avail_cpus; + int memory_size; + unsigned int cpu_type; + unsigned int cpu_subtype; + unsigned int cpu_threadtype; + int physical_cpu; + int physical_cpu_max; + int logical_cpu; + int logical_cpu_max; + uint64_t max_mem; +}; + +int host_info(int host, int flavor, + struct __darwin_host_basic_info* info, void* info_cnt) { + if (flavor != 1) { + fprintf(stderr, "host_info with flavor=%d isn't supported yet.\n", flavor); + abort(); + } + + // TODO(hamaji): only supports x86-64. + memset(info, 0, sizeof(*info)); + info->cpu_type = 16777223; + info->cpu_subtype = 3; + return 0; +} + +char* mach_error_string() { + //abort(); + //return ""; + return "mach_error_string"; +} + +void* mach_host_self() { + //abort(); + return NULL; +} + +int mach_port_deallocate() { + // TODO(hamaji): leak + //abort(); + return 0; +} + +int vm_allocate(int target_task, void** addr, size_t size, int flags) { + //fprintf(stderr, "vm_allocate: size=%lu\n", size); + *addr = calloc(size, 1); + return 0; +} + +int vm_deallocate() { + // TODO(hamaji): munmap, maybe + //fprintf(stderr, "vm_deallocate()\n"); + return 0; + //abort(); +} + +void *__darwin_mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) { + LOGF("mmap: addr=%p length=%lu prot=%d flags=%d fd=%d offset=%lu\n", + addr, length, prot, flags, fd, offset); + + // MAP_ANON is 0x1000 on darwin but 0x20 on linux. + // + // The following flags are darwin only. + // #define MAP_RENAME 0x0020 /* Sun: rename private pages to file */ + // #define MAP_NORESERVE 0x0040 /* Sun: don't reserve needed swap area */ + // #define MAP_RESERVED0080 0x0080 /* previously unimplemented MAP_INHERIT */ + // #define MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change file size */ + // #define MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */ + // #define MAP_NOCACHE 0x0400 /* don't cache pages for this mapping */ + flags = (flags & 0x1f) | (flags & 0x1000 ? MAP_ANONYMOUS : 0); + return mmap(addr, length, prot, flags, fd, offset); +} + +// From /usr/include/sys/sysctl.h + +#define CTL_KERN 1 +#define CTL_HW 6 + +#define HW_NCPU 3 +#define HW_PHYSMEM 5 +#define HW_AVAILCPU 25 + +#define KERN_OSRELEASE 2 + +int __darwin_sysctl(int* name, u_int namelen, + unsigned int* oldp, size_t* oldlenp, + void* newp, size_t newlen) { + int i; + LOGF("sysctl: namelen=%u", namelen); + for (i = 0; i < namelen; i++) { + LOGF(" name[%d]=%d", i, name[i]); + } + LOGF(" newp=%p\n", newp); + + if (newp) { + fprintf(stderr, "sysctl with newp isn't supported yet.\n"); + abort(); + } + + if (namelen != 2) { + fprintf(stderr, "sysctl with namelen=%u isn't supported yet.\n", namelen); + abort(); + } + + switch (name[0]) { + case CTL_HW: { + if (*oldlenp != sizeof(unsigned int)) { + fprintf(stderr, + "sysctl(HW) with oldlenp=%lu isn't supported yet.\n", *oldlenp); + abort(); + } + + unsigned int val = 0; + switch (name[1]) { + case HW_NCPU: + case HW_AVAILCPU: + val = 1; + break; + + case HW_PHYSMEM: + val = 2147483648; + break; + + default: + fprintf(stderr, "sysctl(HW) with oldp=%u isn't supported yet.\n", + *oldp); + abort(); + } + + *oldp = val; + return 0; + } + + case CTL_KERN: { + switch (name[1]) { + case KERN_OSRELEASE: + strcpy((char*)oldp, "10.6.0"); + *oldlenp = 7; + break; + + default: + fprintf(stderr, "sysctl(KERN) with oldp=%u isn't supported yet.\n", + *oldp); + abort(); + } + return 0; + } + + default: + fprintf(stderr, + "sysctl with name[0]=%d isn't supported yet.\n", name[0]); + abort(); + } +} + +// Unfortunately, putc_nolock depends on FILE's layout, +// so we need to wrap linux's FILE with darwin's layout. +typedef struct __darwin_sFILE { + unsigned char *_p; /* current position in (some) buffer */ + int _r; /* read space left for getc() */ + int _w; /* write space left for putc() */ + // TODO(hamaji): we need to modify this value with ferror and feof... + short _flags; /* flags, below; this FILE is free if 0 */ + short _file; /* fileno, if Unix descriptor, else -1 */ +#if 0 + struct __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */ + int _lbfsize; /* 0 or -_bf._size, for inline putc */ + + /* operations */ + void *_cookie; /* cookie passed to io functions */ + int (*_close)(void *); + int (*_read) (void *, char *, int); + fpos_t (*_seek) (void *, fpos_t, int); + int (*_write)(void *, const char *, int); + + /* separate buffer for long sequences of ungetc() */ + struct __sbuf _ub; /* ungetc buffer */ + struct __sFILEX *_extra; /* additions to FILE to not break ABI */ + int _ur; /* saved _r when _r is counting ungetc data */ + + /* tricks to meet minimum requirements even when malloc() fails */ + unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */ + unsigned char _nbuf[1]; /* guarantee a getc() buffer */ + + /* separate buffer for fgetln() when line crosses buffer boundary */ + struct __sbuf _lb; /* buffer for fgetln() */ + + /* Unix stdio files get aligned to block boundaries on fseek() */ + int _blksize; /* stat.st_blksize (may be != _bf._size) */ + fpos_t _offset; /* current lseek offset (see WARNING) */ + +#endif + + FILE* linux_fp; +} __darwin_FILE; + +__darwin_FILE* __darwin_stdin; +__darwin_FILE* __darwin_stdout; +__darwin_FILE* __darwin_stderr; + +static __darwin_FILE* __init_darwin_FILE(FILE* linux_fp) { + __darwin_FILE* fp = (__darwin_FILE*)malloc(sizeof(__darwin_FILE)); + fp->_p = NULL; + fp->_r = -1; + fp->_w = -1; + fp->_flags = 0; + fp->_file = fileno(linux_fp); + fp->linux_fp = linux_fp; + return fp; +} + +__darwin_FILE* __darwin_fopen(const char* path, const char* mode) { + FILE* fp = fopen(path, mode); + if (!fp) + return NULL; + return __init_darwin_FILE(fp); +} + +__darwin_FILE* __darwin_fdopen(int fd, const char* mode) { + FILE* fp = fdopen(fd, mode); + if (!fp) + return NULL; + return __init_darwin_FILE(fp); +} + +__darwin_FILE* __darwin_freopen(const char* path, const char* mode, + __darwin_FILE* fp) { + FILE* new_fp = freopen(path, mode, fp->linux_fp); + if (!new_fp) + return NULL; + return __init_darwin_FILE(new_fp); +} + +int __darwin_fclose(__darwin_FILE* fp) { + int r = fclose(fp->linux_fp); + free(fp); + return r; +} + +size_t __darwin_fread(void* ptr, size_t size, size_t nmemb, + __darwin_FILE* fp) { + return fread(ptr, size, nmemb, fp->linux_fp); +} + +size_t __darwin_fwrite(void* ptr, size_t size, size_t nmemb, + __darwin_FILE* fp) { + return fwrite(ptr, size, nmemb, fp->linux_fp); +} + +int __darwin_fseek(__darwin_FILE* fp, long offset, int whence) { + return fseek(fp->linux_fp, offset, whence); +} + +long __darwin_ftell(__darwin_FILE* fp) { + return ftell(fp->linux_fp); +} + +int __darwin_fgetc(__darwin_FILE* fp) { + return fgetc(fp->linux_fp); +} + +int __darwin_ungetc(int c, __darwin_FILE* fp) { + return ungetc(c, fp->linux_fp); +} + +int __darwin_fputc(int c, __darwin_FILE* fp) { + return fputc(c, fp->linux_fp); +} + +int __darwin_fputs(const char* s, __darwin_FILE* fp) { + return fputs(s, fp->linux_fp); +} + +int __darwin_fprintf(__darwin_FILE* fp, const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + int r = vfprintf(fp->linux_fp, fmt, ap); + va_end(ap); + return r; +} + +int __darwin_vfscanf(__darwin_FILE* fp, const char* fmt, va_list ap) { + return vfscanf(fp->linux_fp, fmt, ap); +} + +int __darwin_fscanf(__darwin_FILE* fp, const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + int r = vfscanf(fp->linux_fp, fmt, ap); + va_end(ap); + return r; +} + +int __darwin_vfprintf(__darwin_FILE* fp, const char* fmt, va_list ap) { + return vfprintf(fp->linux_fp, fmt, ap); +} + +int __darwin_fflush(__darwin_FILE* fp) { + return fflush(fp->linux_fp); +} + +void __darwin_setbuf(__darwin_FILE* fp, char* buf) { + setbuf(fp->linux_fp, buf); +} + +void __darwin_setbuffer(__darwin_FILE* fp, char* buf, size_t size) { + setbuffer(fp->linux_fp, buf, size); +} + +int __darwin_ferror(__darwin_FILE* fp) { + LOGF("ferror: %d\n", ferror(fp->linux_fp)); + return ferror(fp->linux_fp); +} + +int __darwin_fileno(__darwin_FILE* fp) { + return fileno(fp->linux_fp); +} + +char __darwin_executable_path[PATH_MAX]; +char __loader_path[PATH_MAX]; + +int _NSGetExecutablePath(char* buf, unsigned int* size) { + strcpy(buf, __darwin_executable_path); + *size = strlen(__darwin_executable_path); + return 0; +} + +int __darwin_open(const char* path, int flags, mode_t mode) { + LOGF("open path=%s flags=%d\n", path, flags); + int linux_flags = 0; + linux_flags |= flags & O_ACCMODE; + // O_SHLOCK + if (flags & 0x10) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + // O_EXLOCK + if (flags & 0x20) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + if (flags & 0x40) linux_flags |= O_ASYNC; + if (flags & 0x80) linux_flags |= O_SYNC; + if (flags & 0x100) linux_flags |= O_NOFOLLOW; + if (flags & 0x200) linux_flags |= O_CREAT; + if (flags & 0x400) linux_flags |= O_TRUNC; + if (flags & 0x800) linux_flags |= O_EXCL; + // O_EVTONLY + if (flags & 0x8000) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + if (flags & 0x20000) linux_flags |= O_NOCTTY; + if (flags & 0x100000) linux_flags |= O_DIRECTORY; + // O_SYMLINK + if (flags & 0x200000) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + if (flags & 0x400000) linux_flags |= O_DSYNC; + // O_POPUP + if (flags & 0x80000000) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + // O_ALERT + if (flags & 0x20000000) { + fprintf(stderr, "Unsupported open flag=%d\n", flags); + abort(); + } + + return open(path, linux_flags, mode); +} + +int __darwin_execv(const char* path, char* argv[]) { + int i, argc; + LOGF("execv: path=%s\n", path); + for (argc = 0; argv[argc]; argc++) {} + char** new_argv = malloc(sizeof(char*) * (argc + 2)); + new_argv[0] = __loader_path; + for (i = 0; i < argc + 1; i++) { + new_argv[i + 1] = argv[i]; + } + +#if 0 + printf("*** execv: "); + for (i = 0; i < argc + 1; i++) { + printf("%s ", new_argv[i]); + } + puts(""); +#endif + + return execvp(__loader_path, new_argv); +} + +// Finds executable "file" in path. Returns path name to execute. +// It leaks memory, but we don't care because it will be released +// in exec(3) call. +static char* find_in_path(const char* file) { + size_t len = strlen(file) + 1; + if (*file == '/') { + char* name = malloc(len + 1); + strcpy(name, file); + return name; + } + char* path = getenv("PATH"); + if (path == NULL) { + // Get default path. + size_t len = confstr(_CS_PATH, (char*)NULL, 0); + path = (char*)alloca(len + 1); + path[0] = ':'; + (void)confstr(_CS_PATH, path + 1, len); + } + + size_t pathlen = strlen(path); + + char* name = malloc(pathlen + len + 1); + // Puts "file" at the end of "name". + name = (char*)memcpy(name + pathlen + 1, file, len); + // "name" points '/file". + *--name = '/'; + + char* p = path; // next path to try. + do { + char* startp; + path = p; + p = strchrnul(path, ':'); + if (p == path) { + // empty path, so it means current directory. + // so try relative path (== file). + startp = name + 1; + } else { + // add path before "/file". + startp = (char*)memcpy(name - (p - path), path, p - path); + } + // Otherwise, use startp as relative path. + int r = access(startp, X_OK); + if (r == 0) { + // If it is X_OK, assume we can execute it. + return startp; + } + // Otherwise, try another path. + } while (*p++ != '\0'); + return NULL; +} + +int __darwin_execvp(const char* file, char* argv[]) { + // TODO(hamaji): Incorrect. + if (access(file, X_OK) == 0) { + return __darwin_execv(file, argv); + } else { + char* path = find_in_path(file); + LOGF("execvp: file=%s path=%s\n", file, path); + //fprintf(stderr, "execvp: file=%s path=%s\n", file, path); + argv[0] = path; + return __darwin_execv(path, argv); + } +} + +typedef struct { + const char *name; + unsigned int cputype; + unsigned int cpusubtype; + int byteorder; + const char *description; +} NXArchInfo; + +// This was dumped on snow leopard. +NXArchInfo __darwin_all_arch_infos[] = { + // The first entry indicates the local arch. + { "hppa", 11, 0, 2, "HP-PA" }, + { "i386", 7, 3, 1, "Intel 80x86" }, + { "x86_64", 16777223, 3, 1, "Intel x86-64" }, + { "i860", 15, 0, 2, "Intel 860" }, + { "m68k", 6, 1, 2, "Motorola 68K" }, + { "m88k", 13, 0, 2, "Motorola 88K" }, + { "ppc", 18, 0, 2, "PowerPC" }, + { "ppc64", 16777234, 0, 2, "PowerPC 64-bit" }, + { "sparc", 14, 0, 2, "SPARC" }, + { "arm", 12, 0, 1, "ARM" }, + { "any", -1, -1, 0, "Architecture Independent" }, + { "veo", 255, 2, 2, "veo" }, + { "hppa7100LC", 11, 1, 2, "HP-PA 7100LC" }, + { "m68030", 6, 3, 2, "Motorola 68030" }, + { "m68040", 6, 2, 2, "Motorola 68040" }, + { "i486", 7, 4, 1, "Intel 80486" }, + { "i486SX", 7, 132, 1, "Intel 80486SX" }, + { "pentium", 7, 5, 1, "Intel Pentium" }, + { "i586", 7, 5, 1, "Intel 80586" }, + { "pentpro", 7, 22, 1, "Intel Pentium Pro" }, + { "i686", 7, 22, 1, "Intel Pentium Pro" }, + { "pentIIm3", 7, 54, 1, "Intel Pentium II Model 3" }, + { "pentIIm5", 7, 86, 1, "Intel Pentium II Model 5" }, + { "pentium4", 7, 10, 1, "Intel Pentium 4" }, + { "ppc601", 18, 1, 2, "PowerPC 601" }, + { "ppc603", 18, 3, 2, "PowerPC 603" }, + { "ppc603e", 18, 4, 2, "PowerPC 603e" }, + { "ppc603ev", 18, 5, 2, "PowerPC 603ev" }, + { "ppc604", 18, 6, 2, "PowerPC 604" }, + { "ppc604e", 18, 7, 2, "PowerPC 604e" }, + { "ppc750", 18, 9, 2, "PowerPC 750" }, + { "ppc7400", 18, 10, 2, "PowerPC 7400" }, + { "ppc7450", 18, 11, 2, "PowerPC 7450" }, + { "ppc970", 18, 100, 2, "PowerPC 970" }, + { "ppc970-64", 16777234, 100, 2, "PowerPC 970 64-bit" }, + { "armv4t", 12, 5, 1, "arm v4t" }, + { "armv5", 12, 7, 1, "arm v5" }, + { "xscale", 12, 8, 1, "arm xscale" }, + { "armv6", 12, 6, 1, "arm v6" }, + { "armv7", 12, 9, 1, "arm v7" }, + { "little", -1, 0, 1, "Little Endian" }, + { "big", -1, 1, 2, "Big Endian" }, + { "veo1", 255, 1, 2, "veo 1" }, + { "veo2", 255, 2, 2, "veo 2" }, + { NULL, 0, 0, 0, NULL }, +}; + +NXArchInfo __darwin_local_arch_info = { "i486", 7, 4, 1, "Intel 80486" }; + +const NXArchInfo* NXGetAllArchInfos() { + return __darwin_all_arch_infos; +} + +const NXArchInfo* NXGetLocalArchInfo() { + return &__darwin_local_arch_info; +} + +const NXArchInfo* NXGetArchInfoFromName(const char *name) { + NXArchInfo* info = __darwin_all_arch_infos; + for (; info->name; info++) { + if (!strcmp(info->name, name)) + return info; + } + return NULL; +} + +__attribute__((constructor)) void initMac() { + __darwin_stdin = __init_darwin_FILE(stdin); + __darwin_stdout = __init_darwin_FILE(stdout); + __darwin_stderr = __init_darwin_FILE(stderr); +} diff --git a/libmac/none.c b/libmac/none.c new file mode 100644 index 0000000..6edc461 --- /dev/null +++ b/libmac/none.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +rune_t +_none_sgetrune(string, n, result) + const char *string; + size_t n; + char const **result; +{ + if (n < 1) { + if (result) + *result = string; + return(_INVALID_RUNE); + } + if (result) + *result = string + 1; + return(*string & 0xff); +} + +int +_none_sputrune(c, string, n, result) + rune_t c; + char *string, **result; + size_t n; +{ + if (n >= 1) { + if (string) + *string = c; + if (result) + *result = string + 1; + } else if (result) + *result = (char *)0; + return(1); +} diff --git a/libmac/popcountdi2.c b/libmac/popcountdi2.c new file mode 100644 index 0000000..84dacc2 --- /dev/null +++ b/libmac/popcountdi2.c @@ -0,0 +1,32 @@ +/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +int +__popcountdi2(int64_t a) +{ + uint64_t x2 = (uint64_t)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + /* Every 2 bits holds the sum of every pair of bits (32) */ + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ + unsigned int x = (unsigned int)(x2 + (x2 >> 32)); + /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ + /* Upper 32 bits are garbage */ + x = x + (x >> 16); + /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ +} diff --git a/libmac/runetable.c b/libmac/runetable.c new file mode 100644 index 0000000..432cae2 --- /dev/null +++ b/libmac/runetable.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +_RuneLocale _DefaultRuneLocale = { + _RUNE_MAGIC_A, + "none", + _none_sgetrune, + _none_sputrune, + 0xFFFD, + + { /*00*/ _C, _C, _C, _C, + _C, _C, _C, _C, + /*08*/ _C, _C|_S|_B, _C|_S, _C|_S, + _C|_S, _C|_S, _C, _C, + /*10*/ _C, _C, _C, _C, + _C, _C, _C, _C, + /*18*/ _C, _C, _C, _C, + _C, _C, _C, _C, + /*20*/ _S|_B|_R, _P|_R|_G, _P|_R|_G, _P|_R|_G, + _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G, + /*28*/ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G, + _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G, + /*30*/ _D|_R|_G|_X|0, _D|_R|_G|_X|1, _D|_R|_G|_X|2, _D|_R|_G|_X|3, + _D|_R|_G|_X|4, _D|_R|_G|_X|5, _D|_R|_G|_X|6, _D|_R|_G|_X|7, + /*38*/ _D|_R|_G|_X|8, _D|_R|_G|_X|9, _P|_R|_G, _P|_R|_G, + _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G, + /*40*/ _P|_R|_G, _U|_X|_R|_G|_A|10, _U|_X|_R|_G|_A|11, _U|_X|_R|_G|_A|12, + _U|_X|_R|_G|_A|13, _U|_X|_R|_G|_A|14, _U|_X|_R|_G|_A|15, _U|_R|_G|_A, + /*48*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, + _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, + /*50*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, + _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, + /*58*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _P|_R|_G, + _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G, + /*60*/ _P|_R|_G, _L|_X|_R|_G|_A|10, _L|_X|_R|_G|_A|11, _L|_X|_R|_G|_A|12, + _L|_X|_R|_G|_A|13, _L|_X|_R|_G|_A|14, _L|_X|_R|_G|_A|15, _L|_R|_G|_A, + /*68*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, + _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, + /*70*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, + _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, + /*78*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _P|_R|_G, + _P|_R|_G, _P|_R|_G, _P|_R|_G, _C, + }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, + }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, + }, +}; diff --git a/libmac/stack_protector-obsd.c b/libmac/stack_protector-obsd.c new file mode 100644 index 0000000..0dd861b --- /dev/null +++ b/libmac/stack_protector-obsd.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2002 Hiroaki Etoh, Federico G. Schwindt, and Miodrag Vallat. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#if defined(LIBC_SCCS) && !defined(list) +static char rcsid[] = "$OpenBSD: stack_protector.c,v 1.3 2002/12/10 08:53:42 etoh Exp $"; +#endif + +#include +#include +#include +#include +#include +#include +#include + +long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0}; +void __guard_setup(void) __attribute__ ((visibility ("hidden"))); +void __stack_chk_fail(void); + +void +__guard_setup(void) +{ + int fd; + if (__stack_chk_guard[0]!=0) return; + fd = open ("/dev/urandom", 0); + if (fd != -1) { + ssize_t size = read (fd, (char*)&__stack_chk_guard, + sizeof(__stack_chk_guard)); + close (fd) ; + if (size == sizeof(__stack_chk_guard) + && *__stack_chk_guard != 0) return; + } + /* If a random generator can't be used, the protector switches the guard + to the "terminator canary" */ + ((char*)__stack_chk_guard)[0] = 0; ((char*)__stack_chk_guard)[1] = 0; + ((char*)__stack_chk_guard)[2] = '\n'; ((char*)__stack_chk_guard)[3] = 255; +} + +void +__stack_chk_fail() +{ + const char message[] = "[%d] stack overflow"; + + /* this may fail on a chroot jail, though luck */ + syslog(LOG_CRIT, message, getpid()); + + abort(); +} diff --git a/mach-o.cc b/mach-o.cc new file mode 100644 index 0000000..68c4012 --- /dev/null +++ b/mach-o.cc @@ -0,0 +1,421 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mach-o.h" +#include "mach-o/loader.h" + +#ifdef NOLOG +# define LOGF(...) if (0) fprintf(stderr, __VA_ARGS__) +#else +# define LOGF(...) fprintf(stderr, __VA_ARGS__) +#endif + +typedef long long ll; +typedef unsigned long long ull; + +struct sym { + uint32_t name; + uint32_t addr; + uint32_t flag; +}; + +struct sym64 { + uint32_t name; + uint64_t addr; + uint32_t flag; +}; + +static uint64_t uleb128(const uint8_t*& p) { + uint64_t r = 0; + int s = 0; + for (;;) { + uint8_t b = *p++; + if (b < 0x80) { + r += b << s; + break; + } + r += (b & 0x7f) << s; + s += 7; + } + return r; +} + +static int64_t sleb128(const uint8_t*& p) { + int64_t r = 0; + int s = 0; + for (;;) { + uint8_t b = *p++; + if (b < 0x80) { + if (b & 0x40) { + r -= (0x80 - b) << s; + } + else { + r += (b & 0x3f) << s; + } + break; + } + r += (b & 0x7f) << s; + s += 7; + } + return r; +} + +void MachO::readBind(const uint8_t* p, const uint8_t* end) { + uint8_t ordinal = 0; + const char* sym_name; + uint8_t type = BIND_TYPE_POINTER; + int64_t addend = 0; + int seg_index = 0; + uint64_t seg_offset = 0; + + while (p < end) { + uint8_t op = *p & BIND_OPCODE_MASK; + uint8_t imm = *p & BIND_IMMEDIATE_MASK; + p++; + LOGF("bind: op=%x imm=%d\n", op, imm); + switch (op) { + case BIND_OPCODE_DONE: + break; + + case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + ordinal = imm; + break; + + case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + ordinal = uleb128(p); + break; + + case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (imm == 0) { + ordinal = 0; + } else { + ordinal = BIND_OPCODE_MASK | imm; + } + break; + + case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + sym_name = reinterpret_cast(p); + p += strlen(sym_name) + 1; + LOGF("sym_name=%s\n", sym_name); + break; + + case BIND_OPCODE_SET_TYPE_IMM: + type = imm; + break; + + case BIND_OPCODE_SET_ADDEND_SLEB: + addend = sleb128(p); + break; + + case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + seg_index = imm; + seg_offset = uleb128(p); + break; + + case BIND_OPCODE_ADD_ADDR_ULEB: + seg_offset += uleb128(p); + break; + + case BIND_OPCODE_DO_BIND: { + MachO::Bind* bind = new MachO::Bind(); + uint64_t vmaddr; + if (is64_) { + vmaddr = segments64_[seg_index]->vmaddr; + } else { + vmaddr = segments_[seg_index]->vmaddr; + } + LOGF("do bind! %s seg_index=%d seg_offset=%llu " + "type=%d ordinal=%d addend=%lld vmaddr=%p\n", + sym_name, seg_index, (ull)seg_offset, + type, ordinal, (ll)addend, (void*)vmaddr); + bind->name = sym_name; + bind->vmaddr = vmaddr + seg_offset; + bind->type = type; + bind->ordinal = ordinal; + binds_.push_back(bind); + + seg_offset += is64_ ? 8 : 4; + + break; + } + + case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + fprintf(stderr, "not impl\n"); + break; + + case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + + default: + fprintf(stderr, "unknown op\n"); + } + } +} + +MachO::MachO(const char* filename) { + fd_ = open(filename, O_RDONLY); + if (fd_ < 0) { + fprintf(stderr, "open %s: %s\n", filename, strerror(errno)); + exit(1); + } + + off_t len = lseek(fd_, 0, SEEK_END); + char* bin = reinterpret_cast( + mmap(NULL, len, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd_, 0)); + base_ = bin; + + mach_header* header = reinterpret_cast(bin); + LOGF("magic=%x cpu=%d cpusub=%d file=%d ncmds=%d sizecmd=%d flags=%x\n", + header->magic, header->cputype, header->cpusubtype, + header->filetype, header->ncmds, header->sizeofcmds, + header->flags); + + is64_ = false; + if (header->magic == MH_MAGIC_64) { + is64_ = true; + } else if (header->magic != MH_MAGIC) { + fprintf(stderr, "Not mach-o\n"); + exit(1); + } + + if ((header->cputype & 0x00ffffff) != CPU_TYPE_X86) { + fprintf(stderr, "Unsupported CPU\n"); + exit(1); + } + + char* cmds_ptr = bin + sizeof(mach_header); + if (is64_) { + cmds_ptr += sizeof(uint32_t); + } + + uint32_t* symtab = NULL; + const char* symstrtab = NULL; + + for (uint32_t i = 0; i < header->ncmds; i++) { + uint32_t cmd = *reinterpret_cast(cmds_ptr); + LOGF("%x\n", cmd); + + switch (cmd) { + case LC_SEGMENT_64: { + segment_command_64* segment = + reinterpret_cast(cmds_ptr); + segments64_.push_back(segment); + + LOGF("segment %s: vmaddr=%p vmsize=%llu " + "fileoff=%llu filesize=%llu " + "maxprot=%d initprot=%d nsects=%u flags=%u\n", + segment->segname, + (void*)segment->vmaddr, (ull)segment->vmsize, + (ull)segment->fileoff, (ull)segment->filesize, + segment->maxprot, segment->initprot, + segment->nsects, segment->flags); + + section_64* sections = reinterpret_cast( + cmds_ptr + sizeof(segment_command_64)); + for (uint32_t j = 0; j < segment->nsects; j++) { + const section_64& sec = sections[j]; + LOGF("section %s in %s: " + "addr=%p size=%llu offset=%u align=%u " + "reloff=%u nreloc=%u flags=%u " + "reserved1=%u reserved2=%u reserved3=%u\n", + sec.sectname, sec.segname, + (void*)sec.addr, (ull)sec.size, + sec.offset, sec.align, + sec.reloff, sec.nreloc, sec.flags, + sec.reserved1, sec.reserved2, sec.reserved3); + } + + break; + } + + case LC_DYLD_INFO_ONLY: { + dyld_info_command* dyinfo = + reinterpret_cast(cmds_ptr); + LOGF("dyld info: rebase_off=%u rebase_size=%u " + "bind_off=%u bind_size=%u " + "weak_bind_off=%u weak_bind_size=%u " + "lazy_bind_off=%u lazy_bind_size=%u " + "export_off=%u export_size=%u\n", + dyinfo->rebase_off, dyinfo->rebase_size, + dyinfo->bind_off, dyinfo->bind_size, + dyinfo->weak_bind_off, dyinfo->weak_bind_size, + dyinfo->lazy_bind_off, dyinfo->lazy_bind_size, + dyinfo->export_off, dyinfo->export_size); + + { + const uint8_t* p = reinterpret_cast( + bin + dyinfo->bind_off); + const uint8_t* end = p + dyinfo->bind_size; + readBind(p, end); + } + + { + const uint8_t* p = reinterpret_cast( + bin + dyinfo->lazy_bind_off); + const uint8_t* end = p + dyinfo->lazy_bind_size; + readBind(p, end); + } + + break; + } + + case LC_SYMTAB: { + symtab_command* symtab_cmd = + reinterpret_cast(cmds_ptr); + + LOGF("symoff=%u nsysm=%u stroff=%u strsize=%u\n", + symtab_cmd->symoff, symtab_cmd->nsyms, + symtab_cmd->stroff, symtab_cmd->strsize); + + symtab = reinterpret_cast(bin + symtab_cmd->symoff); + symstrtab = bin + symtab_cmd->stroff; + for (uint32_t i = 0; i < symtab_cmd->nsyms; i++) { + if (is64_) { + LOGF("%d %s(%d) %p %d\n", + i, symstrtab + symtab[0], symtab[0], + (void*)*(uint64_t*)(symtab + 1), + symtab[3]); + symtab += 4; + } else { + LOGF("%d %s(%d) %p %d\n", + i, symstrtab + symtab[0], symtab[0], + (void*)*(uint32_t*)(symtab + 1), + symtab[2]); + symtab += 3; + } + } + symtab = reinterpret_cast(bin + symtab_cmd->symoff); + break; + } + + case LC_DYSYMTAB: { + dysymtab_command* dysymtab_cmd = + reinterpret_cast(cmds_ptr); + + LOGF("dysym:\n" + " ilocalsym=%u nlocalsym=%u\n" + " iextdefsym=%u nextdefsym=%u\n" + " iundefsym=%u nundefsym=%u\n" + " tocoff=%u ntoc=%u\n" + " modtaboff=%u nmodtab=%u\n" + " extrefsymoff=%u nextrefsyms=%u\n" + " indirectsymoff=%u nindirectsyms=%u\n" + " extreloff=%u nextrel=%u\n" + " locreloff=%u nlocrel=%u\n" + , + dysymtab_cmd->ilocalsym, dysymtab_cmd->nlocalsym, + dysymtab_cmd->iextdefsym, dysymtab_cmd->nextdefsym, + dysymtab_cmd->iundefsym, dysymtab_cmd->nundefsym, + dysymtab_cmd->tocoff, dysymtab_cmd->ntoc, + dysymtab_cmd->modtaboff, dysymtab_cmd->nmodtab, + dysymtab_cmd->extrefsymoff, dysymtab_cmd->nextrefsyms, + dysymtab_cmd->indirectsymoff, dysymtab_cmd->nindirectsyms, + dysymtab_cmd->extreloff, dysymtab_cmd->nextrel, + dysymtab_cmd->locreloff, dysymtab_cmd->nlocrel); + + uint32_t* dysyms = reinterpret_cast( + bin + dysymtab_cmd->indirectsymoff); + for (uint32_t j = 0; j < dysymtab_cmd->nindirectsyms; j++) { + uint32_t dysym = dysyms[j]; + uint32_t index = dysym & 0x3fffffff; + const char* local = + (dysym & INDIRECT_SYMBOL_LOCAL) ? " local" : ""; + const char* abs = + (dysym & INDIRECT_SYMBOL_ABS) ? " abs" : ""; + + uint32_t* sym = symtab; + sym += index * (is64_ ? 4 : 3); + + LOGF("dylib %d %s(%u)%s%s\n", + j, symstrtab + sym[0], index, local, abs); + } + + uint32_t* dymods = reinterpret_cast( + bin + dysymtab_cmd->modtaboff); + for (uint32_t j = 0; j < dysymtab_cmd->nmodtab; j++) { + LOGF("dymods: %u\n", dymods[j]); + } + + break; + } + + case LC_LOAD_DYLINKER: { + lc_str name = *reinterpret_cast( + cmds_ptr + sizeof(uint32_t) * 2); + LOGF("dylinker: %s\n", cmds_ptr + name.offset); + break; + } + + case LC_UUID: + break; + + case LC_UNIXTHREAD: { + uint32_t* p = reinterpret_cast(cmds_ptr); + LOGF("UNIXTHREAD"); + for (uint32_t i = 2; i < p[1]; i++) { + LOGF(" %d:%x", i, p[i]); + } + LOGF("\n"); + entry_ = reinterpret_cast(cmds_ptr)[18]; + LOGF("entry=%llx\n", (ull)entry_); + break; + } + + case LC_LOAD_DYLIB: { + dylib* lib = reinterpret_cast( + cmds_ptr + sizeof(uint32_t) * 2); + LOGF("dylib: %s\n", cmds_ptr + lib->name.offset); + break; + } + + } + + cmds_ptr += reinterpret_cast(cmds_ptr)[1]; + } + + LOGF("%p vs %p\n", cmds_ptr, bin + len); +} + +MachO::~MachO() { + for (size_t i = 0; i < binds_.size(); i++) { + delete binds_[i]; + } + // need munmap + close(fd_); +} diff --git a/mach-o.h b/mach-o.h new file mode 100644 index 0000000..099bc9d --- /dev/null +++ b/mach-o.h @@ -0,0 +1,83 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef MACH_O_H_ +#define MACH_O_H_ + +#include + +#include + +#include "mach-o/loader.h" + +using namespace std; + +class MachO { + public: + struct Bind { + uint64_t vmaddr; + const char* name; + int64_t addend; + uint8_t type; + uint8_t ordinal; + }; + + explicit MachO(const char* filename); + + ~MachO(); + + const vector& segments64() const { + return segments64_; + } + + const vector& segments() const { + return segments_; + } + + const vector binds() const { return binds_; } + + const char* base() const { return base_; } + + uint64_t entry() const { return entry_; } + + bool is64() const { return is64_; } + + int fd() const { return fd_; } + + private: + void readBind(const uint8_t* p, const uint8_t* end); + + vector segments64_; + vector segments_; + vector binds_; + const char* base_; + uint64_t entry_; + bool is64_; + int fd_; +}; + +#endif // MACH_O_H_ diff --git a/mach/argv.c b/mach/argv.c new file mode 100644 index 0000000..4fc9fb8 --- /dev/null +++ b/mach/argv.c @@ -0,0 +1,11 @@ +#include + +int main(int argc, char* argv[]) { + printf("argc=%d\n", argc); + printf("argv[0]=%p\n", argv[0]); + printf("argv[0]=%s\n", argv[0]); + printf("argv[1]=%p\n", argv[1]); + printf("argv[1]=%s\n", argv[1]); + return 42; +} + diff --git a/mach/getenv.c b/mach/getenv.c new file mode 100644 index 0000000..d68092b --- /dev/null +++ b/mach/getenv.c @@ -0,0 +1,5 @@ +#include +#include +int main() { + printf("%s\n", getenv("HOME")); +} diff --git a/mach/getrusage.c b/mach/getrusage.c new file mode 100644 index 0000000..31e7994 --- /dev/null +++ b/mach/getrusage.c @@ -0,0 +1,22 @@ +#include +#include +#include + +int main() { + struct rusage ru; + int r = getrusage(RUSAGE_SELF, &ru); + printf("ret=%d\n", r); + printf("utime=%d.%d stime=%d.%d\n" + "maxrss=%ld ixrss=%ld idrss=%ld isrss=%ld\n" + "minflt=%ld majflt=%ld nswap=%ld\n" + "inblock=%ld oublock=%ld msgsnd=%ld msgrcv=%ld\n" + "nsignals=%ld nvcsw=%ld nivcsw=%ld\n" + , + (int)ru.ru_utime.tv_sec, (int)ru.ru_utime.tv_usec, + (int)ru.ru_stime.tv_sec, (int)ru.ru_stime.tv_usec, + ru.ru_maxrss, ru.ru_ixrss, ru.ru_idrss, ru.ru_isrss, + ru.ru_minflt, ru.ru_majflt, ru.ru_nswap, + ru.ru_inblock, ru.ru_oublock, ru.ru_msgsnd, ru.ru_msgrcv, + ru.ru_nsignals, ru.ru_nvcsw, ru.ru_nivcsw); + return 0; +} diff --git a/mach/gettimeofday.c b/mach/gettimeofday.c new file mode 100644 index 0000000..caa64bf --- /dev/null +++ b/mach/gettimeofday.c @@ -0,0 +1,10 @@ +#include +#include + +int main() { + struct timeval tv; + int r = gettimeofday(&tv, NULL); + printf("%d %d\n", tv.tv_sec, tv.tv_usec); + return 0; +} + diff --git a/mach/hello.c b/mach/hello.c new file mode 100644 index 0000000..081e786 --- /dev/null +++ b/mach/hello.c @@ -0,0 +1,6 @@ +#include +int main() { + printf("Hello, world!\n"); + return 0; +} + diff --git a/mach/mycat.c b/mach/mycat.c new file mode 100644 index 0000000..c9f2f33 --- /dev/null +++ b/mach/mycat.c @@ -0,0 +1,18 @@ +#include + +int main(int argc, char* argv[]) { + if (argc < 2) { + fprintf(stderr, "need file name\n"); + return 1; + } + + FILE* fp = fopen(argv[1], "rb"); + char buf[4096]; + size_t len; + while (1) { + len = fread(buf, 1, 4096, fp); + fwrite(buf, 1, len, stdout); + if (len != 4096) break; + } + fclose(fp); +} diff --git a/mach/nsgetexecpath.c b/mach/nsgetexecpath.c new file mode 100644 index 0000000..a63d886 --- /dev/null +++ b/mach/nsgetexecpath.c @@ -0,0 +1,10 @@ +#include +#include + +int main() { + char buf[4096]; + unsigned int size = 4095; + int r = _NSGetExecutablePath(buf, &size); + printf("r=%d %s\n", r, buf); + return 0; +} diff --git a/mach/printf.c b/mach/printf.c new file mode 100644 index 0000000..1362ffe --- /dev/null +++ b/mach/printf.c @@ -0,0 +1,3 @@ +int main(int argc) { + printf("%d\n", argc); +} diff --git a/mach/putc.c b/mach/putc.c new file mode 100644 index 0000000..88433e1 --- /dev/null +++ b/mach/putc.c @@ -0,0 +1,12 @@ +// It's very unfortunate that apple's putc_unlocked is a macro and depends on +// the layout of FILE. +#include + +int main() { + putc_unlocked('h', stdout); + putc_unlocked('o', stdout); + putc_unlocked('g', stdout); + putc_unlocked('e', stdout); + putc_unlocked('\n', stdout); + return 0; +} diff --git a/mach/setlocale.c b/mach/setlocale.c new file mode 100644 index 0000000..d289644 --- /dev/null +++ b/mach/setlocale.c @@ -0,0 +1,9 @@ +#include +#include + +int main() { + char* p = setlocale(2, ""); + printf("%p\n", p); + printf("%s\n", p); +} + diff --git a/mach/stack.c b/mach/stack.c new file mode 100644 index 0000000..5269660 --- /dev/null +++ b/mach/stack.c @@ -0,0 +1,12 @@ +#include + +int fib(int n) { + if (n < 2) return 1; + return fib(n-1) + fib(n-2); +} + +int main() { + printf("%d\n", fib(20)); + return 0; +} + diff --git a/mach/stat.c b/mach/stat.c new file mode 100644 index 0000000..1e8ef92 --- /dev/null +++ b/mach/stat.c @@ -0,0 +1,12 @@ +#include +#include +#include + +int main(int argc, char* argv[]) { + struct stat buf; + int fd = open(argv[0], O_RDONLY); + int ret = fstat(fd, &buf); + printf("ret=%d size=%d size_offset=%ld\n", + ret, buf.st_size, (char*)&buf.st_size - (char*)&buf); + return 0; +} diff --git a/mach/stderr.c b/mach/stderr.c new file mode 100644 index 0000000..b61cf47 --- /dev/null +++ b/mach/stderr.c @@ -0,0 +1,6 @@ +#include + +int main() { + fprintf(stderr, "Hello, world!\n"); + return 0; +} diff --git a/mach/strcmp.c b/mach/strcmp.c new file mode 100644 index 0000000..59e926b --- /dev/null +++ b/mach/strcmp.c @@ -0,0 +1,11 @@ +#include + +int main(int argc, char* argv[]) { + puts("run"); + if (strcmp(argv[argc-1], "a.out")) { + puts("yes"); + } else { + puts("no"); + } + puts("done"); +} diff --git a/macho2elf.cc b/macho2elf.cc new file mode 100644 index 0000000..8d463b1 --- /dev/null +++ b/macho2elf.cc @@ -0,0 +1,674 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// An attempt to translate from Mach-O to ELF. +// +// Note that programs generated by this program won't run because this +// doesn't initialize glibc properly. + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mach-o.h" + +using namespace std; + +static map g_rename; +static vector g_sos; + +static void initRename() { +#define RENAME(src, dst) g_rename.insert(make_pair(#src, #dst)); +#define WRAP(src) RENAME(src, __darwin_ ## src) +#include "rename.tab" +#undef RENAME +#undef WRAP +} + +static uint64_t alignMem(uint64_t p, uint64_t a) { + a--; + return (p + a) & ~a; +} + +template +struct BitsHelpers { + typedef Elf64_Ehdr Elf_Ehdr; + typedef Elf64_Phdr Elf_Phdr; + typedef Elf64_Shdr Elf_Shdr; + typedef Elf64_Dyn Elf_Dyn; + typedef Elf64_Sym Elf_Sym; + typedef Elf64_Rela Elf_Rel; + typedef uint64_t intptr; + typedef segment_command_64 mach_segment; + + static const vector& segments(const MachO& mach) { + return mach.segments64(); + } + + static int elf_st_bind(int val) { + return ELF64_ST_BIND(val); + } +}; + +template <> +struct BitsHelpers { + typedef Elf32_Ehdr Elf_Ehdr; + typedef Elf32_Phdr Elf_Phdr; + typedef Elf32_Shdr Elf_Shdr; + typedef Elf32_Dyn Elf_Dyn; + typedef Elf32_Sym Elf_Sym; + typedef Elf32_Rela Elf_Rel; + typedef uint32_t intptr; + typedef segment_command mach_segment; + + static const vector& segments(const MachO& mach) { + return mach.segments(); + } + + static int elf_st_bind(int val) { + return ELF32_ST_BIND(val); + } +}; + +template +class ELFBuilder { + typedef BitsHelpers Helpers; + typedef typename Helpers::Elf_Ehdr Ehdr; + typedef typename Helpers::Elf_Phdr Phdr; + typedef typename Helpers::Elf_Shdr Shdr; + typedef typename Helpers::Elf_Dyn Dyn; + typedef typename Helpers::Elf_Sym Sym; + typedef typename Helpers::Elf_Rel Rel; + typedef typename Helpers::intptr intptr; + typedef typename Helpers::mach_segment Segment; + + public: + ~ELFBuilder() { + for (size_t i = 0; i < sections_.size(); i++) { + delete sections_[i]; + } + + fclose(fp_); + } + + void emit(const MachO& mach, const char* filename) { + Section* null_section = newSection("", SHT_NULL); + null_section->flags = 0; + + fp_ = fopen(filename, "wb"); + if (!fp_) { + fprintf(stderr, "Cannot write %s\n", filename); + exit(1); + } + + int num_phdr = 3; // PT_INTERP + PT_DYNAMIC + PT_LOAD for dynamic + intptr base_vaddr = 0; + intptr max_vaddr = 0; + + const vector& segments = Helpers::segments(mach); + for (size_t i = 0; i < segments.size(); i++) { + Segment* seg = segments[i]; + const char* name = seg->segname; + if (!strcmp(name, SEG_PAGEZERO)) { + continue; + } + if (!strcmp(name, SEG_TEXT)) { + base_vaddr = seg->vmaddr; + } + max_vaddr = max(max_vaddr, seg->vmaddr + seg->vmsize); + num_phdr++; + } + + Ehdr ehdr; + memset(&ehdr, 0, sizeof(ehdr)); + ehdr.e_ident[EI_MAG0] = ELFMAG0; + ehdr.e_ident[EI_MAG1] = ELFMAG1; + ehdr.e_ident[EI_MAG2] = ELFMAG2; + ehdr.e_ident[EI_MAG3] = ELFMAG3; + ehdr.e_ident[EI_CLASS] = is64 ? ELFCLASS64 : ELFCLASS32; + ehdr.e_ident[EI_DATA] = ELFDATA2LSB; + ehdr.e_ident[EI_VERSION] = EV_CURRENT; + ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV; + ehdr.e_type = ET_EXEC; + ehdr.e_machine = is64 ? EM_X86_64 : EM_386; + ehdr.e_version = EV_CURRENT; + ehdr.e_entry = mach.entry(); + ehdr.e_phoff = sizeof(ehdr); + ehdr.e_shoff = 0; + ehdr.e_flags = 0; + ehdr.e_ehsize = sizeof(ehdr); + ehdr.e_phentsize = sizeof(Phdr); + ehdr.e_phnum = num_phdr; + ehdr.e_shentsize = sizeof(Shdr); + ehdr.e_shnum = 0; + ehdr.e_shstrndx = 0; + fwrite(&ehdr, sizeof(ehdr), 1, fp_); + + intptr offset = sizeof(Ehdr) + sizeof(Phdr) * num_phdr; + Phdr phdr; + + const char* loader = + is64 ? "/lib64/ld-linux-x86-64.so.2" : "/lib/ld-linux.so.2"; + phdr.p_type = PT_INTERP; + phdr.p_offset = offset; + phdr.p_vaddr = base_vaddr + phdr.p_offset; + phdr.p_paddr = phdr.p_vaddr; + phdr.p_filesz = strlen(loader) + 1; + phdr.p_memsz = phdr.p_filesz; + phdr.p_flags = PF_R; + phdr.p_align = 1; + fwrite(&phdr, sizeof(phdr), 1, fp_); + + offset += phdr.p_filesz; + + for (size_t i = 0; i < segments.size(); i++) { + Segment* seg = segments[i]; + const char* name = seg->segname; + if (!strcmp(name, SEG_PAGEZERO)) { + continue; + } + + phdr.p_type = PT_LOAD; + phdr.p_offset = seg->fileoff; + phdr.p_vaddr = seg->vmaddr; + phdr.p_paddr = phdr.p_vaddr; + // TODO! + //phdr.p_filesz = max(0x1000, seg->filesize); + phdr.p_filesz = alignMem(seg->filesize, 0x1000); + phdr.p_memsz = seg->vmsize; + phdr.p_flags = 0; + if (seg->initprot & VM_PROT_READ) { + phdr.p_flags |= PF_R; + } + if (seg->initprot & VM_PROT_WRITE) { + phdr.p_flags |= PF_W; + } + if (seg->initprot & VM_PROT_EXECUTE) { + phdr.p_flags |= PF_X; + } + phdr.p_align = 0x1000; + fwrite(&phdr, sizeof(phdr), 1, fp_); + + const char* sec_name = seg->segname; + int flags = SHF_ALLOC; + int align = 8; + //size = phdr.p_filesz; + uint64_t size = seg->filesize; + if (!strcmp(sec_name, SEG_TEXT)) { + sec_name = ".text"; + flags |= SHF_EXECINSTR; + align = 16; + // TODO: maybe OK? + size = alignMem(size, 0x1000) - offset; + seg->fileoff = offset; + seg->filesize = size; + } else if (!strcmp(sec_name, SEG_DATA)) { + sec_name = ".data"; + flags |= SHF_WRITE; + } + Section* sec = newSection(sec_name, SHT_PROGBITS); + sec->override_data_size = phdr.p_filesz; + sec->vmaddr = seg->vmaddr; + sec->offset = phdr.p_offset; + sec->flags |= flags; + sec->align = align; + + offset += size; + } + + size_t num_dyns = 9 + g_sos.size(); + + intptr dynamic_offset = max_vaddr + (offset & 0xfff); + + phdr.p_type = PT_LOAD; + phdr.p_offset = offset; + phdr.p_vaddr = dynamic_offset; + phdr.p_paddr = phdr.p_vaddr; + // TODO: need to rewrite the size + phdr.p_filesz = 0x4000; + phdr.p_memsz = phdr.p_filesz; + phdr.p_flags = PF_R | PF_W; + phdr.p_align = 8; + fwrite(&phdr, sizeof(phdr), 1, fp_); + + phdr.p_type = PT_DYNAMIC; + phdr.p_filesz = sizeof(Dyn) * num_dyns; + fwrite(&phdr, sizeof(phdr), 1, fp_); + + Section* dynamic = newSection(".dynamic", SHT_DYNAMIC); + dynamic->entsize = sizeof(Dyn); + dynamic->offset = phdr.p_offset; + dynamic->override_data_size = phdr.p_filesz; + dynamic->flags |= SHF_WRITE; + + offset += phdr.p_filesz; + dynamic_offset += phdr.p_filesz; + + fwrite(loader, strlen(loader) + 1, 1, fp_); + + for (size_t i = 0; i < segments.size(); i++) { + Segment* seg = segments[i]; + const char* name = seg->segname; + if (!strcmp(name, SEG_PAGEZERO)) { + continue; + } + + fwrite(mach.base() + seg->fileoff, 1, seg->filesize, fp_); + } + + Symtab* symtab = newSymtab(".dynsym", SHT_DYNSYM, ".dynstr", ".hash"); + dynamic->link = symtab->str; + + vector rels; + for (size_t i = 0; i < mach.binds().size(); i++) { + MachO::Bind* bind = mach.binds()[i]; + + if (bind->name[0] != '_') { + continue; + } + + if (bind->type == BIND_TYPE_POINTER) { + const char* name = bind->name + 1; + + printf("Putting ELF symbol: %s\n", name); + + map::const_iterator found = + g_rename.find(name); + if (found != g_rename.end()) { + printf("Applying renaming: %s => %s\n", + name, found->second.c_str()); + name = found->second.c_str(); + } + + int sym_index = putELFSym(symtab, bind->vmaddr, 0, + ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), + 0, 0, name); + + Rel rel; + rel.r_offset = bind->vmaddr; + rel.r_info = ELF64_R_INFO(sym_index, R_X86_64_JUMP_SLOT); + rel.r_addend = bind->addend; + rels.push_back(rel); + } else { + printf("Unknown bind type: %d\n", bind->type); + abort(); + } + } + + vector dyns; + for (size_t i = 0; i < g_sos.size(); i++) { + addDynVal(DT_NEEDED, putELFStr(symtab->str, g_sos[i].c_str()), + &dyns); + } + + addDynVal(DT_HASH, dynamic_offset, &dyns); + symtab->hash->offset = offset; + offset += symtab->hash->data.size(); + dynamic_offset += symtab->hash->data.size(); + + addDynVal(DT_SYMTAB, dynamic_offset, &dyns); + symtab->sym->offset = offset; + offset += symtab->sym->data.size(); + dynamic_offset += symtab->sym->data.size(); + + addDynVal(DT_STRTAB, dynamic_offset, &dyns); + symtab->str->offset = offset; + offset += symtab->str->data.size(); + dynamic_offset += symtab->str->data.size(); + + addDynVal(DT_STRSZ, symtab->str->data.size(), &dyns); + addDynVal(DT_SYMENT, sizeof(Sym), &dyns); + addDynVal(DT_RELA, dynamic_offset, &dyns); + + addDynVal(DT_RELASZ, sizeof(Rel) * rels.size(), &dyns); + Section* rel_sec = newSection(".rela.got", SHT_RELA); + rel_sec->offset = offset; + rel_sec->override_data_size = sizeof(Rel) * rels.size(); + rel_sec->entsize = sizeof(Rel); + rel_sec->link = symtab->sym; + offset += sizeof(Rel) * rels.size(); + + addDynVal(DT_RELAENT, sizeof(Rel), &dyns); + addDynVal(DT_NULL, 0, &dyns); + + assert(num_dyns == dyns.size()); + + for (size_t i = 0; i < dyns.size(); i++) { + fwrite(&dyns[i], 1, sizeof(Dyn), fp_); + } + fwrite(symtab->hash->data.data(), symtab->hash->data.size(), 1, fp_); + fwrite(symtab->sym->data.data(), symtab->sym->data.size(), 1, fp_); + fwrite(symtab->str->data.data(), symtab->str->data.size(), 1, fp_); + for (size_t i = 0; i < rels.size(); i++) { + fwrite(&rels[i], 1, sizeof(Rel), fp_); + } + + delete symtab; + + Section* shstrtab = newSection(".shstrtab", SHT_STRTAB); + shstrtab->offset = offset; + + for (size_t i = 0; i < sections_.size(); i++) { + Section* sec = sections_[i]; + sec->name_offset = putELFStr(shstrtab, sec->name.c_str()); + } + + fwrite(shstrtab->data.data(), shstrtab->data.size(), 1, fp_); + offset += shstrtab->data.size(); + + ehdr.e_shoff = offset; + ehdr.e_shstrndx = sections_.size() - 1; + + for (size_t i = 0; i < sections_.size(); i++) { + Section* sec = sections_[i]; + Shdr shdr; + shdr.sh_name = sec->name_offset; + shdr.sh_type = sec->type; + shdr.sh_flags = sec->flags; + shdr.sh_addr = sec->offset + base_vaddr; + if (sec->vmaddr) { + shdr.sh_addr = sec->vmaddr; + } + shdr.sh_offset = sec->offset; + shdr.sh_size = sec->data.size(); + if (sec->override_data_size) { + shdr.sh_size = sec->override_data_size; + } + shdr.sh_link = 0; + if (sec->link) { + shdr.sh_link = sec->link->index; + } + shdr.sh_info = 0; + //shdr.sh_addralign = sec->align; + shdr.sh_addralign = 0; + shdr.sh_entsize = sec->entsize; + fwrite(&shdr, 1, sizeof(shdr), fp_); + } + ehdr.e_shnum = sections_.size(); + + char padding[4096]; + fwrite(padding, 1, sizeof(padding), fp_); + + fseek(fp_, 0, SEEK_SET); + fwrite(&ehdr, sizeof(ehdr), 1, fp_); + } + + private: + struct Section { + string name; + int name_offset; + string data; + uint64_t offset; + uint64_t vmaddr; + uint64_t override_data_size; + uint32_t type; + int nb_hashed_syms; + int entsize; + int index; + int flags; + int align; + Section* link; + }; + + struct Symtab { + Section* sym; + Section* str; + Section* hash; + + ~Symtab() { + } + }; + + static char* addSectionPtr(Section* sec, uint64_t size) { + size_t offset = sec->data.size(); + sec->data.resize(offset + size); + return &sec->data[offset]; + } + + Section* newSection(const char *name, uint32_t type) { + Section* sec; + sec = new Section(); + sec->name = name; + sec->type = type; + sec->flags = SHF_ALLOC; + sec->index = static_cast(sections_.size()); + + sec->name_offset = 0; + sec->offset = 0; + sec->vmaddr = 0; + sec->override_data_size = 0; + sec->nb_hashed_syms = 0; + sec->entsize = 0; + sec->align = 0; + sec->link = NULL; + + sections_.push_back(sec); + return sec; + } + + Symtab* newSymtab(const char* symtab_name, uint32_t symtab_type, + const char* strtab_name, + const char* hash_name) { + Symtab* symtab = new Symtab(); + symtab->sym = newSection(symtab_name, symtab_type); + symtab->sym->entsize = sizeof(Sym); + symtab->str = newSection(strtab_name, SHT_STRTAB); + putELFStr(symtab->str, ""); + symtab->sym->link = symtab->str; + putELFSym(symtab, 0, 0, 0, 0, 0, NULL); + + int nb_buckets = 1; + symtab->hash = newSection(hash_name, SHT_HASH); + symtab->hash->entsize = sizeof(int); + symtab->hash->link = symtab->sym; + + int* ptr = (int*)addSectionPtr(symtab->hash, + (2 + nb_buckets + 1) * sizeof(int)); + ptr[0] = nb_buckets; + ptr[1] = 1; + memset(ptr + 2, 0, (nb_buckets + 1) * sizeof(int)); + + return symtab; + } + + /* return the symbol number */ + static int putELFSym(Symtab* symtab, + unsigned long value, unsigned long size, + int info, int other, int shndx, const char *name) { + int name_offset, sym_index; + int nbuckets, h; + Sym* sym; + Section* hs; + Section* s = symtab->sym; + + sym = reinterpret_cast(addSectionPtr(s, sizeof(Sym))); + if (name) { + name_offset = putELFStr(symtab->str, name); + } else { + name_offset = 0; + } + /* XXX: endianness */ + sym->st_name = name_offset; + sym->st_value = value; + sym->st_size = size; + sym->st_info = info; + sym->st_other = other; + sym->st_shndx = shndx; + sym_index = sym - (Sym*)s->data.data(); + printf("sym: %x %p index=%d\n", + sym->st_name, (void*)sym->st_value, sym_index); + hs = symtab->hash; + if (hs) { + int *ptr, *base; + ptr = reinterpret_cast(addSectionPtr(hs, sizeof(int))); + base = (int*)hs->data.data(); + /* only add global or weak symbols */ + if (Helpers::elf_st_bind(info) != STB_LOCAL) { + /* add another hashing entry */ + nbuckets = base[0]; + h = calcHash(name) % nbuckets; + *ptr = base[2 + h]; + base[2 + h] = sym_index; + base[1]++; + /* we resize the hash table */ + hs->nb_hashed_syms++; + if (hs->nb_hashed_syms > 2 * nbuckets) { + rebuildHash(symtab, 2 * nbuckets); + } + } else { + *ptr = 0; + base[1]++; + } + } + return sym_index; + } + + /* rebuild hash table of section s */ + /* NOTE: we do factorize the hash table code to go faster */ + static void rebuildHash(Symtab* symtab, unsigned int nb_buckets) { + Sym* sym; + int* ptr; + int* hash; + int nb_syms, sym_index, h; + char* strtab; + Section* s = symtab->sym; + + strtab = (char*)symtab->str->data.data(); + nb_syms = s->data.size() / sizeof(Sym); + + symtab->hash->data.clear(); + ptr = (int*)addSectionPtr(symtab->hash, + (2 + nb_buckets + nb_syms) * sizeof(int)); + ptr[0] = nb_buckets; + ptr[1] = nb_syms; + ptr += 2; + hash = ptr; + memset(hash, 0, (nb_buckets + 1) * sizeof(int)); + ptr += nb_buckets + 1; + + sym = (Sym*)s->data.data() + 1; + for(sym_index = 1; sym_index < nb_syms; sym_index++) { + if (Helpers::elf_st_bind(sym->st_info) != STB_LOCAL) { + h = calcHash(strtab + sym->st_name) % nb_buckets; + *ptr = hash[h]; + hash[h] = sym_index; + } else { + *ptr = 0; + } + ptr++; + sym++; + } + } + + static uint64_t putELFStr(Section* sec, const char* sym) { + uint64_t offset = sec->data.size(); + sec->data += sym; + sec->data += '\0'; + return offset; + } + + static void addDynVal(int64_t tag, uint64_t val, vector* dyns) { + Dyn dyn; + dyn.d_tag = tag; + dyn.d_un.d_val = val; + dyns->push_back(dyn); + } + + static void addDynPtr(int64_t tag, uint64_t ptr, vector* dyns) { + Dyn dyn; + dyn.d_tag = tag; + dyn.d_un.d_ptr = ptr; + dyns->push_back(dyn); + } + + static uint64_t addStr(const string& s, string* o) { + uint64_t r = s.size(); + *o += s; + *o += '\0'; + return r; + } + + static uint32_t calcHash(const char* name) { + const uint8_t* ptr = reinterpret_cast(name); + uint32_t h = 0; + while (*ptr) { + h <<= 4; + h += *ptr++; + uint32_t g = h & 0xf0000000; + h ^= g >> 24; + h &= ~g; + } + return h; + } + + FILE* fp_; + vector sections_; +}; + +template +void emitELF(const MachO& mach, const char* filename) { + ELFBuilder builder; + builder.emit(mach, filename); +} + +int main(int argc, char* argv[]) { + initRename(); + + vector args; + g_sos.push_back("/lib/libc.so.6"); + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + if (arg[0] != '-') { + args.push_back(arg); + continue; + } + + if (arg[1] == 'l') { + g_sos.push_back(arg + 2); + } else { + fprintf(stderr, "Unknown switch '%s'\n", arg); + exit(1); + } + } + + if (args.size() < 2) { + exit(1); + } + + MachO mach(args[0].c_str()); + if (mach.is64()) { + emitELF(mach, args[1].c_str()); + } else { + emitELF(mach, args[1].c_str()); + } +} diff --git a/no_trampoline.tab b/no_trampoline.tab new file mode 100644 index 0000000..1683128 --- /dev/null +++ b/no_trampoline.tab @@ -0,0 +1,41 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// A list of symbols which should not be wrapped by trampolines. +// Symbols for data should be listed here. +// TODO(hamaji): Investigate a way to get rid of this exemption list. + +NO_TRAMPOLINE(__darwin_stdin) +NO_TRAMPOLINE(__darwin_stdout) +NO_TRAMPOLINE(__darwin_stderr) + +NO_TRAMPOLINE(optind) +NO_TRAMPOLINE(opterr) +NO_TRAMPOLINE(optopt) + +NO_TRAMPOLINE(_DefaultRuneLocale) +NO_TRAMPOLINE(__stack_chk_guard) diff --git a/rename.tab b/rename.tab new file mode 100644 index 0000000..49ce73d --- /dev/null +++ b/rename.tab @@ -0,0 +1,78 @@ +// Copyright 2011 Shinichiro Hamaji. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY Shinichiro Hamaji ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Shinichiro Hamaji OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// A translation table from Mac symbols to Linux's. + +RENAME(__stderrp, __darwin_stderr) +RENAME(__stdoutp, __darwin_stdout) +RENAME(__stdinp, __darwin_stdin) +WRAP(stderr) +WRAP(stdout) +WRAP(stdin) + +WRAP(fopen) +WRAP(fdopen) +WRAP(freopen) +WRAP(fclose) +WRAP(fread) +WRAP(fwrite) +WRAP(fseek) +WRAP(ftell) +WRAP(fgetc) +WRAP(ungetc) +WRAP(fputc) +WRAP(fputs) +WRAP(fscanf) +WRAP(vfscanf) +WRAP(fprintf) +WRAP(vfprintf) +WRAP(fflush) +WRAP(setbuf) +WRAP(setbuffer) +WRAP(ferror) +RENAME(_ferror, __darwin_ferror) +WRAP(fileno) + +RENAME(__srget, __darwin_fgetc) +RENAME(__swbuf, __darwin_fputc) + +RENAME(__tolower, tolower) + +RENAME(opendir$INODE64, opendir) +RENAME(readdir$INODE64, readdir) + +WRAP(open) + +RENAME(strlcpy, strncpy) + +WRAP(mmap) +WRAP(sysctl) + +RENAME(realpath$DARWIN_EXTSN, realpath) + +WRAP(execv) +WRAP(execvp)