On the Feasibility of Cross-Language Detection of Malicious Packages in npm and PyPI   

## Install necessary libraries

In [1]:
!pip install --upgrade mergetb
!pip install joblib


[0m

## Import necessary libraries

In [2]:
import time, socket
from timeout_timer import timeout, TimeoutInterrupt

from mergetb.workspace import User, Experiment, Project
from mergetb.realize import Realization
from mergetb.materialize import Materialization
from mergetb.grpc_client import MergeGRPCError
from mergetb.types import StatusType

## Log in as user

In [3]:
username='alix'
passw='SmaduMestivel13'

try:
    u = User(username, passw)
    u.login()
except MergeGRPCError as e:
    print(f"Error logging in: {e}")
else:
    print("Logged in as user", u.username)

Logged in as user alix


## Find or create experiment


In [4]:
try:
    # load the user's personal project
    p = Project(username)
    resp = p.get()

    # look for the experiment, create if it does not exist
    for exp in resp.project.experiments:
        if exp == 'npm':
            print('experiment npm already exists')
            break
    else:
        e = Experiment('npm', username, description='Replicating npm artifact')
        e.create()
        print('experiment npm created')
    
except MergeGRPCError as e:
    print(e)

experiment npm already exists


## Push model, allocate resources and attach the experiment to the XDC

In [7]:
RLZ_TIMEOUT=15
MTZ_TIMEOUT=60

try:
    exp = Experiment('npm', username)
    rev = exp.push_model('1726077500691.py').revision
    print("Pushed experiment revision", rev)
    time.sleep(5)
    
    # reserve resources through a lease named 'v1' (name is arbitrary)
    exp.reserve('v1', revision=rev)

    # wait up for 10 sec for reservation to succeed
    try:
        rlz = Realization('v1', exp.name, exp.project)
        with timeout(RLZ_TIMEOUT):
            while True:
                resp = rlz.get()
                status = resp.status.highest_status

                if status == StatusType.Success:
                    print('leased resources')
                    break
    except TimeoutInterrupt:
        raise Exception('reservation did not succeed within %d seconds' % RLZ_TIMEOUT)

    # activate experiment on this lease
    exp.activate('v1')

    # wait up to 60 sec for activation to succeed
    try:
        mtz = Materialization('v1', exp.name, exp.project)
        with timeout(MTZ_TIMEOUT):
            while True:
                resp = mtz.get_status()
                status = resp.status.highest_status

                if status == StatusType.Success:
                    print('started experiment')
                    break
    except TimeoutInterrupt:
        raise Exception('activation did not succeed within %d seconds' % MTZ_TIMEOUT)

    hostname = socket.gethostname().split('-')[0].split('.')[0]
    exp.attach_xdc('v1', hostname, username)
    time.sleep(15)
    print("Attached experiment is now running")
except MergeGRPCError as e:
    print(e)

Pushed experiment revision 80d4e96950fd07a596229c204f74e3d1bc5f08f3
leased resources
started experiment
Attached experiment is now running


## Run commands in experiment to clone the artifact, install prerequisites, fix some bugs and execute one script in the artifact

In [8]:
installpackages="sudo apt update && sudo apt install git python3-venv python3 python3-pip python-is-python3 -y"
clonerepo="git clone https://github.com/SAP-samples/cross-language-detection-artifacts.git"
installpip="pip3 install psutil"
fixartifact="cd cross-language-detection-artifacts/; pip install -r ./scripts/requirements.txt"
print("Installing Linux packages, this step may take a while ...")
(lines, elines) = exp.exec_on_node(username, "nodemis", installpackages)
for line in lines:
    print(line.strip())
print("Cloning artifact repository");
(lines, elines) = exp.exec_on_node(username, "nodemis", clonerepo)
for line in lines:
     print(line.strip())
print("Installing Python packages")
(lines, elines) = exp.exec_on_node(username, "nodemis", installpip)
for line in lines:
     print(line.strip())
print("Applying artifact fixes")
(lines, elines) = exp.exec_on_node(username, "nodemis", fixartifact)
for line in lines:
     print(line.strip())
print("Will now execute the code for training, it can take a few hours, the 'end' will be announced")
runcode="cd cross-language-detection-artifacts/; existing_pkl_files=`ls *.pkl 2>/dev/null`; python3 ./scripts/Crosslanguage_XGBoost_train_test.py; python3 ./scripts/Crosslanguage_DT_train_test.py; python3 ./scripts/Crosslanguage_RF_train_test.py; python3 ./scripts/JS_monolanguage_RF_train_test.py; python3 ./scripts/JS_monolanguage_DT_train_test.py; python3 ./scripts/JS_monolanguage_XGBoost_train_test.py; python3 ./scripts/Py_monolanguage_XGBoost_train_test.py; python3 ./scripts/Py_monolanguage_DT_train_test.py; python3 ./scripts/Py_monolanguage_RF_train_test.py; new_pkl_files=`ls *.pkl 2>/dev/null`; new_files=`comm -13 <(echo \"$existing_pkl_files\") <(echo \"$new_pkl_files\")`; echo \"New_file .pkl : $new_files\""
(lines, elines) = exp.exec_on_node(username, "nodemis", runcode)
new_files_list = [] 
for line in lines: 
    print(line.strip()) 
    if "new_file .pkl" in line: 
        new_files_list = line.split(":")[1].strip().split() 
print("end") 



Installing Linux packages, this step may take a while ...
Get:1 http://deb.debian.org/debian bullseye InRelease [116 kB]
Get:2 http://security.debian.org/debian-security bullseye-security InRelease [27.2 kB]
Get:3 http://deb.debian.org/debian bullseye-updates InRelease [44.1 kB]
Get:4 http://security.debian.org/debian-security bullseye-security/main Sources [211 kB]
Get:5 http://security.debian.org/debian-security bullseye-security/main amd64 Packages [305 kB]
Get:6 http://security.debian.org/debian-security bullseye-security/main Translation-en [196 kB]
Get:7 http://deb.debian.org/debian bullseye/main Sources [8,500 kB]
Get:8 http://deb.debian.org/debian bullseye/non-free Sources [81.0 kB]
Get:9 http://deb.debian.org/debian bullseye/contrib Sources [43.2 kB]
Get:10 http://deb.debian.org/debian bullseye/main amd64 Packages [8,066 kB]
Get:11 http://deb.debian.org/debian bullseye/main Translation-en [6,235 kB]
Get:12 http://deb.debian.org/debian bullseye/non-free amd64 Packages [96.4 kB]

## Return resources and if you so wish delete the experiment

In [None]:
##try:
##    exp.relinquish('v1')
##except MergeGRPCError as e:
##    print(e)
##else:
##    print("experiment relinquished")