Skip to content

Commit

Permalink
Refactored traceconfig into separate files. (#758)
Browse files Browse the repository at this point in the history
* Refactored traceconfig into separate files.

* Fixed setup to include traceconfig.cpp for pywhere build.
  • Loading branch information
emeryberger committed Jan 18, 2024
1 parent 4773a98 commit 87520b4
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 122 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def copy_extensions_to_source(self):

pywhere = Extension('scalene.pywhere',
include_dirs=['.', 'src', 'src/include'],
sources = ['src/source/pywhere.cpp'],
sources = ['src/source/pywhere.cpp', 'src/source/traceconfig.cpp'],
extra_compile_args=extra_compile_args(),
py_limited_api=False,
language="c++")
Expand Down
129 changes: 129 additions & 0 deletions src/include/traceconfig.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#pragma once

#ifndef __TRACECONFIG_H
#define __TRACECONFIG_H

#include <Python.h>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

class TraceConfig {
public:
TraceConfig(PyObject* list_wrapper, PyObject* base_path, bool profile_all_b) {
// Assumes that each item is a bytes object
owner = list_wrapper;
path_owner = base_path;
Py_IncRef(owner);
Py_IncRef(path_owner);
profile_all = profile_all_b;
auto size = PyList_Size(owner);
items.reserve(size);
for (int i = 0; i < size; i++) {
auto item = PyList_GetItem(owner, i);
auto unic = PyUnicode_AsASCIIString(item);
auto s = PyBytes_AsString(unic);
items.push_back(s);
}
scalene_base_path = PyBytes_AsString(PyUnicode_AsASCIIString(base_path));
}

bool should_trace(char* filename) {
auto res = _memoize.find(filename);
if ( res != _memoize.end()) {
return res->second;
}
// Return false if filename contains paths corresponding to the native Python libraries.
// This is to avoid profiling the Python interpreter itself.
// Also exclude site-packages and any IPython files.

#if defined(_WIN32)
// If on Windows, use \\ as the path separator.
const auto PATH_SEP = "\\";
#else
// Assume all others are POSIX.
const auto PATH_SEP = "/";
#endif

auto python_lib = std::string("lib") + std::string(PATH_SEP) + std::string("python");
auto scalene_lib = std::string("scalene") + std::string(PATH_SEP) + std::string("scalene");
auto anaconda_lib = std::string("anaconda3") + std::string(PATH_SEP) + std::string("lib");

if (strstr(filename, python_lib.c_str()) != nullptr ||
strstr(filename, scalene_lib.c_str()) != nullptr ||
strstr(filename, anaconda_lib.c_str()) != nullptr ||
// strstr(filename, "site-packages") != nullptr ||
(*filename == '<' && strstr(filename, "<ipython") != nullptr)) {
_memoize.insert(std::pair<std::string, bool>(std::string(filename), false));
return false;
}

if (owner != nullptr) {
for (char* traceable : items) {
if (strstr(filename, traceable)) {
_memoize.insert(std::pair<std::string, bool>(std::string(filename), true));
return true;
}
}
}

// Temporarily change the current working directory to the original program
// path.
char original_cwd_buf[PATH_MAX];
#ifdef _WIN32
auto oldcwd = _getcwd(original_cwd_buf, PATH_MAX);
#else
auto oldcwd = getcwd(original_cwd_buf, PATH_MAX);
#endif
chdir(scalene_base_path);
char resolved_path[PATH_MAX];

// Check to see if the file we are profiling is in the original path.
bool did_resolve_path = realpath(filename, resolved_path);
bool result = false;
if (did_resolve_path) {
// True if we found this file in the original path.
result = (strstr(resolved_path, scalene_base_path) != nullptr);
}

// Now change back to the original current working directory.
chdir(oldcwd);
_memoize.insert(std::pair<std::string, bool>(std::string(filename), result));
return result;
}

void print() {
printf("Profile all? %d\nitems {", profile_all);
for (auto c : items) {
printf("\t%s\n", c);
}
printf("}\n");
}

static void setInstance(TraceConfig* instance) {
std::lock_guard<decltype(_instanceMutex)> g(_instanceMutex);
delete _instance;
_instance = instance;
}

static TraceConfig* getInstance() {
std::lock_guard<decltype(_instanceMutex)> g(_instanceMutex);
return _instance;
}

private:
std::vector<char*> items;
char* scalene_base_path;
// This is to keep the object in scope so that
// the data pointers are always valid
PyObject* owner;
PyObject* path_owner;
bool profile_all;

static std::mutex _instanceMutex;
static TraceConfig* _instance;
static std::unordered_map<std::string, bool> _memoize;
};

#endif
123 changes: 2 additions & 121 deletions src/source/pywhere.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "pywhere.hpp"
#include "traceconfig.hpp"

#include <Python.h>
#include <dlfcn.h>
Expand All @@ -16,126 +17,6 @@
//
// #include "printf.h"
const int NEWLINE_TRIGGER_LENGTH = 98820;
class TraceConfig {
public:
TraceConfig(PyObject* list_wrapper, PyObject* base_path, bool profile_all_b) {
// Assumes that each item is a bytes object
owner = list_wrapper;
path_owner = base_path;
Py_IncRef(owner);
Py_IncRef(path_owner);
profile_all = profile_all_b;
auto size = PyList_Size(owner);
items.reserve(size);
for (int i = 0; i < size; i++) {
auto item = PyList_GetItem(owner, i);
auto unic = PyUnicode_AsASCIIString(item);
auto s = PyBytes_AsString(unic);
items.push_back(s);
}
scalene_base_path = PyBytes_AsString(PyUnicode_AsASCIIString(base_path));
}

bool should_trace(char* filename) {
auto res = _memoize.find(filename);
if ( res != _memoize.end()) {
return res->second;
}
// Return false if filename contains paths corresponding to the native Python libraries.
// This is to avoid profiling the Python interpreter itself.
// Also exclude site-packages and any IPython files.

#if defined(_WIN32)
// If on Windows, use \\ as the path separator.
const auto PATH_SEP = "\\";
#else
// Assume all others are POSIX.
const auto PATH_SEP = "/";
#endif

auto python_lib = std::string("lib") + std::string(PATH_SEP) + std::string("python");
auto scalene_lib = std::string("scalene") + std::string(PATH_SEP) + std::string("scalene");
auto anaconda_lib = std::string("anaconda3") + std::string(PATH_SEP) + std::string("lib");

if (strstr(filename, python_lib.c_str()) != nullptr ||
strstr(filename, scalene_lib.c_str()) != nullptr ||
strstr(filename, anaconda_lib.c_str()) != nullptr ||
strstr(filename, "site-packages") != nullptr ||
(*filename == '<' && strstr(filename, "<ipython") != nullptr)) {
_memoize.insert(std::pair<std::string, bool>(std::string(filename), false));
return false;
}

if (owner != nullptr) {
for (char* traceable : items) {
if (strstr(filename, traceable)) {
_memoize.insert(std::pair<std::string, bool>(std::string(filename), true));
return true;
}
}
}

// Temporarily change the current working directory to the original program
// path.
char original_cwd_buf[PATH_MAX];
#ifdef _WIN32
auto oldcwd = _getcwd(original_cwd_buf, PATH_MAX);
#else
auto oldcwd = getcwd(original_cwd_buf, PATH_MAX);
#endif
chdir(scalene_base_path);
char resolved_path[PATH_MAX];

// Check to see if the file we are profiling is in the original path.
bool did_resolve_path = realpath(filename, resolved_path);
bool result = false;
if (did_resolve_path) {
// True if we found this file in the original path.
result = (strstr(resolved_path, scalene_base_path) != nullptr);
}

// Now change back to the original current working directory.
chdir(oldcwd);
_memoize.insert(std::pair<std::string, bool>(std::string(filename), result));
return result;
}

void print() {
printf("Profile all? %d\nitems {", profile_all);
for (auto c : items) {
printf("\t%s\n", c);
}
printf("}\n");
}

static void setInstance(TraceConfig* instance) {
std::lock_guard<decltype(_instanceMutex)> g(_instanceMutex);
delete _instance;
_instance = instance;
}

static TraceConfig* getInstance() {
std::lock_guard<decltype(_instanceMutex)> g(_instanceMutex);
return _instance;
}

private:
std::vector<char*> items;
char* scalene_base_path;
// This is to keep the object in scope so that
// the data pointers are always valid
PyObject* owner;
PyObject* path_owner;
bool profile_all;

static std::mutex _instanceMutex;
static TraceConfig* _instance;
static std::unordered_map<std::string, bool> _memoize;
};

TraceConfig* TraceConfig::_instance = 0;
std::mutex TraceConfig::_instanceMutex;
std::unordered_map<std::string, bool> TraceConfig::_memoize;

static bool last_profiled_invalidated = false;
// An RAII class to simplify acquiring and releasing the GIL.
Expand Down Expand Up @@ -298,7 +179,7 @@ int whereInPython(std::string& filename, int& lineno, int& bytei) {
continue;
}

if (!strstr(filenameStr, "<") && !strstr(filenameStr, "/python") &&
if (!strstr(filenameStr, "<") && !strstr(filenameStr, "/python") &&
!strstr(filenameStr, "scalene/scalene")) {
if (traceConfig->should_trace(filenameStr)) {
#if defined(PyPy_FatalError)
Expand Down
5 changes: 5 additions & 0 deletions src/source/traceconfig.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "traceconfig.hpp"

TraceConfig* TraceConfig::_instance = 0;
std::mutex TraceConfig::_instanceMutex;
std::unordered_map<std::string, bool> TraceConfig::_memoize;

0 comments on commit 87520b4

Please sign in to comment.