# Use this notebook for developing mlframe prototype 2

## Object-oriented approach. Testing on DL with benchmarks.

In [2]:
import multiprocessing
from Queue import Queue, Empty
import subprocess
import os
import time
import re
import sys

In [3]:
def printObj(obj):
    for key,val in obj.__dict__.iteritems():
        print key,"=",val

In [4]:
# Class for executing local and remote commands in background processes.
# Command must be in multiprocessing.Manager.dict().
# Command must be a string with the command and arguments separated with spaces.
# Should be called from BashExecutor class.
# Usage sample: 
# d = BashExecutor(command,hostname=hostname)
# d.start()
#  command - string representation of the command and arguments.
# exec_remote.sh must output subprocess exit code in the form:
# exitcode=N
# , where N is the number.
class BackgroundExecutor(multiprocessing.Process):
    
    def __init__(self, d = {}, debug=False, hostname="", callback=None, callback_params=None):
        super(BackgroundExecutor,self).__init__()
        self.debug = debug
        self.hostname = hostname
        self.exitcode_pat = re.compile("^exitcode=(\d+)")
        self.d = d
        self.command = d["command"].split(" ")
        self.callback = callback
        self.callback_params = callback_params
        if debug:
            print "In ",self.name," command='",self.command,"'"
            if callback is not None:
                print "Callback:",callback,callback_params
        
        
    # Poll exit code of self.proc and store it if not None.
    def poll(self):
        exitcode = self.proc.poll()
        if exitcode is not None:
            self.setExitCode(exitcode)
            if self.callback is not None:
                if self.debug: print "Calling callback",str(self.callback),str(self.callback_params)
                self.callback(self.callback_params)
        return exitcode
    
    # Set given exit code to Command class object.
    # Called from poll() method.
    def setExitCode(self, ec):
#         if self.debug:
#             print "Manager.dict object:",repr(self.d)
#             print type(self.d)
#             printObj(self.d)
        if self.d["exitcode"] == "":
            if self.debug: print "exit code:",ec,
            self.d["exitcode"] = ec                        
        
            
    def run(self):
        if self.debug: 
            print "In {}. Calling {}".format(self.name,self.command)
            
        command = self.command
        proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, shell=False)
        self.proc = proc
        if self.debug: print self.name, "process started"
        std = ""
        for std in iter(proc.stdout.readline, b''):
            if std is not None and len(std) > 0:
                self.d["stdout"] = self.d["stdout"] + std
                if self.debug:
                    self.d[1]=1                    
                    assert self.d[1] ==1, "Cannot set to Manager dictionary"
                print std,
            
            time.sleep(.5)
        self.poll()
    
    def nameYourself(self):
        cp = multiprocessing.current_process()
        print "name",cp._name
        print "parent pid",cp._parent_pid
        print "id",cp._identity
    
    def getExitcode(self):
        return self.d["exitcode"]
            

In [5]:
# Class for calling BackgroundExecutor with multiprocessing.Manager.dict object,
# which stores string representation of the command, and after execution: its ouptput and exit code.
class BashExecutor:
    def __init__(self, command, debug=False, callback=None, callback_params=None):
        manager = multiprocessing.Manager()
        self.d = manager.dict()
        self.d["command"] = command
        if debug:
            print "In BashExecutor:"
            print "Command set to '",self.d["command"],"'"
            print "command type:",type(self.d["command"])
            print "Callback:",callback, callback_params
        self.d["stdout"] = ""
        self.d["exitcode"] = ""
        self.debug = debug
        self.BE = BackgroundExecutor(self.d, self.debug, callback=callback, callback_params=callback_params)
        
    def start(self):
        self.BE.start()
        
    def getExitcode(self):        
        return self.d["exitcode"]
    
    def getStdout(self):
        return self.d["stdout"]
    
    def getCommand(self):
        return self.d["command"]
        
    def __str__(self):
        s = self.d["command"]
        if self.d["exitcode"] != "":
            s += " ("+str(self.d["exitcode"])+")"            
        return s
    
    def __repr__(self):
        s = self.d["command"]
        if self.d["exitcode"] != "":
            s += " ("+str(self.d["exitcode"])+")"            
        return s
        

In [6]:
# Class for storing host-related data: hostname, access key and username.
# Stores commands (instances of Command class) executed on the host.
# Has methods for connecting to the host with ssh, connection test, executing commands.
class Host(object):
    
    def __init__(self, hostname, address="localhost", user="", key="", debug=False, ssh_options="", scp_options=""):
        self.hostname = hostname
        self.address = address
        self.user = user
        key = key.replace("~",os.environ['HOME'])
        self.key = key
        self.debug = debug
        self.ssh_command = "ssh"
        self.scp_command = "scp"
        if key != "":
            self.ssh_command += " -i "+key
            self.scp_command += " -i "+key
        if ssh_options != "":
            self.ssh_command += " -o "+ssh_options
        if scp_options != "":
            self.scp_command += " -o "+scp_options

        self.host=""
        if user != "":
            self.host += user+"@"
        self.host += hostname
        self.commands = []
        if debug:
            print "hostname,address,user,key:",self.hostname,self.address,self.user,self.key
            print "ssh command:",self.ssh_command.replace(' ','.'),"host:",self.host
        
        
    def ping(self, N=5):        
        comm = BashExecutor("ping -c "+str(N)+" "+self.address, debug=self.debug)
        #print "Append:",self.commands.append(comm)
        index = len(self.commands)
        self.commands.append(comm)
        comm.start()        
        return index
        
    
    # Execute command on the server
    # If command is a script file, copy the file before executing it.
    def execute(self,command,ssh_options=""):
        # Deside if command is a script name or just a command
        package_directory = os.path.dirname(os.getcwd())
        scripts_location=os.path.realpath(os.path.join(package_directory,"mlframe","scripts"))
        command_list = command.split(" ")
        command_file = command_list[0]
        command_script_path = os.path.join(scripts_location, command_file)
        callback = None
        if os.path.isfile(command_script_path):
            if self.debug: print "script file exists:",command_script_path
            # Joined command: execute remote script file and delete it
            command = "./"+command
            if self.copyScriptFile(command_script_path) != 0:
                # Error copying script file
                return -1
            callback = self.removeScriptFile
            callback_params = command_file
            
        if self.debug: print "Command:",command
        
        if ssh_options != "":
            ssh_options = " "+ssh_options
        command = self.ssh_command+ssh_options+" "+self.host+" "+command
        
        if self.debug:
            print "Executing:",command.replace(' ','.')
            if callback is not None:
                print "Callback:",callback,callback_params
        if callback is not None:
            comm = BashExecutor(command, debug=self.debug,callback=callback, callback_params=callback_params)
        else:
            comm = BashExecutor(command, debug=self.debug)
        index = len(self.commands)
        self.commands.append(comm)
        comm.start()        
        return index
    
    
    def removeScriptFile(self, script_file, options=""):
        command = self.ssh_command
        if options != 0:
            command += " "+options
        command += self.host+" rm "+ script_file
        if self.debug: print "Remove command:",command
        proc = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, shell=False)
        std,stderr = proc.communicate()
        exitcode = proc.poll()
        if exitcode != 0:        
            print "Error deleting",script_file,"on",self.hostname,exitcode
            print std
            print "!",stderr    
        return exitcode
        
    
    # Copy script file to the host.
    # Called every time before remote script execution.
    def copyScriptFile(self, script_file, options=""):
        command = self.scp_command+" -o ConnectTimeout=5"
        if options != 0:
            command += " "+options
        command += script_file + " "+self.host+":"
        if self.debug: print "Copy command:",command
        proc = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, shell=False)
        std,stderr = proc.communicate()
        exitcode = proc.poll()
        if exitcode != 0:        
            print "Error copying",script_file,"to",self.hostname,exitcode
            print std
            print "!",stderr
            
        return exitcode
    
    
    def connect_test(self):
        options = "-o ConnectTimeout=5"
        return self.execute("hostname",options=options)
        

In [10]:
dl_serv = Host("DL","192.168.12.116",debug=True)

hostname,address,user,key: DL 192.168.12.116  
ssh command: ssh host: DL


In [13]:
dl_serv.execute("test.sh ab cs; echo echo test")

script file exists: /Users/peterbryzgalov/work/ML/mlframework/mlframe/scripts/test.sh
Copy command: scp -o ConnectTimeout=5 /Users/peterbryzgalov/work/ML/mlframework/mlframe/scripts/test.sh DL:
Command: ./test.sh ab cs; echo echo test
Executing: ssh.DL../test.sh.ab.cs;.echo.echo.test
Callback: <bound method Host.removeScriptFile of <__main__.Host object at 0x10f55c6d0>> test.sh
In BashExecutor:
Command set to ' ssh DL ./test.sh ab cs; echo echo test '
command type: <type 'str'>
Callback: <bound method Host.removeScriptFile of <__main__.Host object at 0x10f55c6d0>> test.sh
In  BackgroundExecutor-10  command=' ['ssh', 'DL', './test.sh', 'ab', 'cs;', 'echo', 'echo', 'test'] '
Callback: <bound method Host.removeScriptFile of <__main__.Host object at 0x10f55c6d0>> test.sh


2

In BackgroundExecutor-10. Calling ['ssh', 'DL', './test.sh', 'ab', 'cs;', 'echo', 'echo', 'test']
BackgroundExecutor-10 process started
Running test command on DL-Server pars: ab cs
DL-Server(ab):1
(ab):1
DL-Server(ab) err:1
DL-Server(ab):2
(ab):2
DL-Server(ab) err:2
DL-Server(ab):3
(ab):3
DL-Server(ab) err:3
DL-Server(ab):4
(ab):4
DL-Server(ab) err:4
DL-Server(ab):5
(ab):5
DL-Server(ab) err:5
echo test
exit code: 0 Calling callback <bound method Host.removeScriptFile of <__main__.Host object at 0x10f55c6d0>> test.sh
Remove command: ssh DL rm test.sh


In [12]:
dl_serv.ping()

In BashExecutor:
Command set to ' ping -c 5 192.168.12.116 '
command type: <type 'str'>
Callback: None None
In  BackgroundExecutor-8  command=' ['ping', '-c', '5', '192.168.12.116'] '


1

In BackgroundExecutor-8. Calling ['ping', '-c', '5', '192.168.12.116']
BackgroundExecutor-8 process started
PING 192.168.12.116 (192.168.12.116): 56 data bytes
Request timeout for icmp_seq 0
Request timeout for icmp_seq 1
Request timeout for icmp_seq 2
Request timeout for icmp_seq 3

--- 192.168.12.116 ping statistics ---
5 packets transmitted, 0 packets received, 100.0% packet loss
exit code: 2

In [14]:
key = "~/.ssh/id_rsa_com"
mouse = Host("mouse","mouse.local",user="peter",key=key,debug=True)

hostname,address,user,key: mouse mouse.local peter /Users/peterbryzgalov/.ssh/id_rsa_com
ssh command: ssh.-i./Users/peterbryzgalov/.ssh/id_rsa_com host: peter@mouse


In [31]:
mouse.execute("pwd")

17

/home/peter


In [16]:
mouse.connect_test()

Command: hostname
Executing: ssh.-i./Users/peterbryzgalov/.ssh/id_rsa_com.-o.ConnectTimeout=5.peter@mouse.hostname
In BashExecutor:
Command set to ' ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com -o ConnectTimeout=5 peter@mouse hostname '
command type: <type 'str'>
Callback: None None
In  BackgroundExecutor-14  command=' ['ssh', '-i', '/Users/peterbryzgalov/.ssh/id_rsa_com', '-o', 'ConnectTimeout=5', 'peter@mouse', 'hostname'] '


1

In BackgroundExecutor-14. Calling ['ssh', '-i', '/Users/peterbryzgalov/.ssh/id_rsa_com', '-o', 'ConnectTimeout=5', 'peter@mouse', 'hostname']
BackgroundExecutor-14 process started
mouse
exit code: 0

In [17]:
mouse.ping()

In BashExecutor:
Command set to ' ping -c 5 mouse.local '
command type: <type 'str'>
Callback: None None
In  BackgroundExecutor-16  command=' ['ping', '-c', '5', 'mouse.local'] '


2

In BackgroundExecutor-16. Calling ['ping', '-c', '5', 'mouse.local']
BackgroundExecutor-16 process started
PING mouse.local (192.168.83.30): 56 data bytes
64 bytes from 192.168.83.30: icmp_seq=0 ttl=64 time=4.180 ms
64 bytes from 192.168.83.30: icmp_seq=1 ttl=64 time=0.623 ms
64 bytes from 192.168.83.30: icmp_seq=2 ttl=64 time=3.543 ms
64 bytes from 192.168.83.30: icmp_seq=3 ttl=64 time=0.709 ms
64 bytes from 192.168.83.30: icmp_seq=4 ttl=64 time=0.768 ms

--- mouse.local ping statistics ---
5 packets transmitted, 5 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.623/1.965/4.180/1.563 ms
exit code: 0

In [30]:
mouse.execute("nvidia-smi dmon -c 2 -s u")

16

# gpu    sm   mem   enc   dec
# Idx     %     %     %     %
    0    36    34     0     0
    0    54    53     0     0


In [25]:
mouse.debug=False
test = range(4)
test[0] = mouse.ping(1)
test[1] = mouse.connect_test()
test[2] = mouse.execute("hostname && date")
test[3] = mouse.execute("test.sh")

PING mouse.local (192.168.83.30): 56 data bytes
64 bytes from 192.168.83.30: icmp_seq=0 ttl=64 time=0.658 ms
mouse
mouse

Running test command on mouse pars: 
Fri Apr 27 19:27:51 JST 2018
--- mouse.local ping statistics ---
mouse():1
1 packets transmitted, 1 packets received, 0.0% packet loss
():1
round-trip min/avg/max/stddev = 0.658/0.658/0.658/0.000 ms
mouse() err:1
mouse():2
():2
mouse() err:2
mouse():3
():3
mouse() err:3
mouse():4
():4
mouse() err:4
mouse():5
():5
mouse() err:5


In [24]:
for tst in test:
    print tst, mouse.commands[tst].getCommand()
    print mouse.commands[tst].getStdout(),
    print "exit",mouse.commands[tst].getExitcode()
    print

7 ping -c 1 mouse.local
PING mouse.local (192.168.83.30): 56 data bytes
64 bytes from 192.168.83.30: icmp_seq=0 ttl=64 time=0.755 ms

--- mouse.local ping statistics ---
1 packets transmitted, 1 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.755/0.755/0.755/0.000 ms
exit 0

8 ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com -o ConnectTimeout=5 peter@mouse hostname
mouse
exit 0

9 ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse hostname && date
mouse
Fri Apr 27 19:22:12 JST 2018
exit 0

10 ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse ./test.sh
Running test command on mouse pars: 
mouse():1
():1
mouse() err:1
mouse():2
():2
mouse() err:2
mouse():3
():3
mouse() err:3
mouse():4
():4
mouse() err:4
mouse():5
():5
mouse() err:5
exit 125



In [22]:
print mouse.commands

[ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse ./test.sh (125), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com -o ConnectTimeout=5 peter@mouse hostname (0), ping -c 5 mouse.local (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse nvidia-smi dmon -c 2 -s u (0), ping -c 1 mouse.local (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com -o ConnectTimeout=5 peter@mouse hostname (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse hostname && date (0), ping -c 1 mouse.local (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com -o ConnectTimeout=5 peter@mouse hostname (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse hostname && date (0), ssh -i /Users/peterbryzgalov/.ssh/id_rsa_com peter@mouse ./test.sh (125)]


In [26]:
reedbush = Host("reedbush","reedbush.cc.u-tokyo.ac.jp",key="~/.ssh/id_rsa_com",user="i96005")

In [27]:
reedbush.execute("test.sh")

Error copying /Users/peterbryzgalov/work/ML/mlframework/mlframe/scripts/test.sh to reedbush 1

Connection closed by 130.69.241.11 port 22
lost connection



-1

In [28]:
reedbush.ping(3)

0

PING reedbush.cc.u-tokyo.ac.jp (130.69.241.11): 56 data bytes
64 bytes from 130.69.241.11: icmp_seq=0 ttl=54 time=3.306 ms
64 bytes from 130.69.241.11: icmp_seq=1 ttl=54 time=6.005 ms
64 bytes from 130.69.241.11: icmp_seq=2 ttl=54 time=6.008 ms

--- reedbush.cc.u-tokyo.ac.jp ping statistics ---
3 packets transmitted, 3 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 3.306/5.106/6.008/1.273 ms
