## Timeout function

In [12]:
import pandas as pd 
import os 
import subprocess
import signal

class timeout:
    """Timeout class.
    Taken from StackOverflow: https://stackoverflow.com/a/22348885
    """

    def __init__(self, seconds=1, error_message='Timeout'):
        self.seconds = seconds
        self.error_message = error_message

    def handle_timeout(self, signum, frame):
        # logger.info('Timeout encountered')
        # print('Timeout encountered')
        raise TimeoutError(self.error_message)

    def __enter__(self):
        signal.signal(signal.SIGALRM, self.handle_timeout)
        signal.alarm(self.seconds)

    def __exit__(self, type, value, traceback):
        signal.alarm(0)


class TimeoutError(Exception):
    def __init__(self,mess): 
        super(TimeoutError, self).__init__()
        pass

## Read gists have importError from csv 


In [13]:
# DF = pd.read_csv("all-gists-results.csv")
DF = pd.read_csv("../../results/proprocess/all-gists-results_2500.csv")
print(DF.shape)
importError_gists = DF.loc[DF['initial-eval'] == 'ImportError','id']
# print(importError_gists)
importError_gists = list(importError_gists)
print(len(importError_gists))

(2499, 2)
1140


## Read all possible types of errors

In [3]:
type_list = []
with open('../../error_type.txt','r') as file:
    for line in file:
        line = line.rstrip('\r\n')
        line = line.strip()
        # print(line)
        type_list.append(line)

## dockerizeme command to generate dockerfile

### Read all importError file gist ID(NOT snippet.py)


In [4]:
dir = os.path.join(os.path.abspath(''),'../../datasets/','dockerizeme-evaluated-gists')


# gists ids have importError
importError_gists

['3797445',
 '4345741',
 '7d2fa751b0b7af8c90e2e15a38d64c4c',
 '8512102',
 'b21e142b529a958ab6413e92bca39e17',
 '3965052',
 '9749639',
 '8dddf5b924588184b0568ca98f827b8b',
 '3907846',
 '4902819f17218340d11f',
 '7beb9dd4829844177cda5d529ab4c687',
 '3880042',
 '2db123b4e1bd8dcca5c4',
 '7d9f439a6eb16dee7fee',
 'ad764996ef26a5b1eea7',
 'a9d0767b1b262923f85027105a5eb968',
 '5641933',
 '6356555',
 '4061368',
 '8ee1922ac2e77f5b0c03',
 '8faaa57891318aa438db4bff10b347df',
 '7fba03376d75c83bbb55',
 '10017416',
 '7319733',
 '11189694',
 '7430052',
 'bee7007252a72f52751d',
 'b41dd8e544bf0e3980c971b0d015f5f6',
 'ab4de1d15d9f8b2b461bbd05c8c035ea',
 '4550560',
 '7885684',
 '9082229',
 '7622401',
 '4659409',
 '7525439',
 'c4d4632afb8176321385',
 '5041434',
 '92653913a53baf9dd1a8',
 '6684739',
 '5363149',
 '4644918',
 '7783958',
 '5892603',
 '8593929ed02435aed107',
 '5049438',
 '7595164',
 '1bf1d58f30e66639ff44',
 '3497868',
 '03e309b89fd9a74af1dd41344b7c5c70',
 'c3d8b8f44d5a95aae1d0',
 '5468929',
 'b0d

### Generate Dockerfile

In [None]:
for g in importError_gists:
    print(dir + '/' + g)
    output = subprocess.getoutput('npm run dockerizeme {0}/{1}/snippet.py'.format(dir,g))
    print(output)
    # extract index of 'FROM' for building dockerfile
    idx = output.index('FROM')
    res = output[idx:]
    # get rid of new line character
    res_lis = res.split('\n')
    lines = [line for line in res_lis]
    dockerfile_filename = dir + '/'+ g + '/' +  'Dockerfile'
    with open(dockerfile_filename, 'w') as dockerfile:
        dockerfile.write('\n'.join(lines))
    
    
    print()


## RQ2: Evaluating importError gists using Gistable command line on dockfiles that have been created by DockerizeMe

In [6]:
import sys

os.chdir('../../gistable')
# os.system('npm link')
print(f"\nos.getcwd()  = {os.getcwd()}")






os.getcwd()  = /Users/qqrewquu/Desktop/CISC834/Project/gistable


In [7]:
results = []
TIMEOUT_SECONDS = 60
for i,g in enumerate(importError_gists):
    if i % 100 == 0:
        print('################################',i,'################################')
    found_regular_error = False 
    print(g)
    try:
        with timeout(seconds=TIMEOUT_SECONDS):
            output = subprocess.getoutput('gistable run {0}'.format(g)).lower()
            for t in type_list:
                t = t.lower()
                # print(t)
                if t in output: 
                    print('output type = ',t)
                    found_regular_error = True 
                    results.append({
                    'id': g,
                    'initial-eval':'importerror',
                    'final-eval': t
                    })
            
                    break
            if not found_regular_error:
                print('output type = other error')
                results.append({
                    'id': g,
                    'initial-eval':'importerror',
                    'final-eval':'other error'
            })
    except TimeoutError:
            print('time out error')
            results.append({
                'id': g,
                'initial-eval':'importerror',
                'final-eval':'timeouterror'
            })
            pass


################################ 0 ################################
3797445
output type =  keyerror
4345741
output type =  success
7d2fa751b0b7af8c90e2e15a38d64c4c
output type =  success
8512102


KeyboardInterrupt: 

## Change working directory back to dockerizeme folder

In [8]:
# dir = os.path.join(os.path.abspath(''))
os.chdir(os.path.join(os.path.abspath(''),'../dockerizeme/code'))

print(f"\nos.getcwd()  = {os.getcwd()}")


os.getcwd()  = /Users/qqrewquu/Desktop/CISC834/Project/dockerizeme/code


## Save the results into csv

In [18]:
import csv 

with open('../../results/RQ2/RQ2_dockerizeme_results_{0}.csv'.format(len(importError_gists)), 'w') as results_file:
    fieldnames = ['id', 'initial-eval','final-eval']
    # Create dict writer
    writer = csv.DictWriter(results_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(results)        
    print('file has been saved')

file has been saved


## Results Evaluation

### RQ2: 

num of file has importError has been removed

num of file can run

In [11]:
DF_new = pd.read_csv("../../results/RQ2/RQ2_dockerizeme_results_{0}.csv".format(len(importError_gists)))
# DF_new = pd.read_csv("importErrors-gists-results_1000.csv")

noimportError_count=0
success_count = 0
'''count # of file has no error, and can run without error'''
for error in list(DF_new['final-eval']):
    if error != 'importerror':
        noimportError_count+=1
        
    if error == 'success':
        success_count+=1

'''get rid of # of 'other error from 'noimportError file' '''
for error in list(DF_new['final-eval']):
    
    if error == 'other error':noimportError_count-=1
        
print('{0}/{1} of files has no importError. The ratio = {2:.2f}% '.format(noimportError_count,len(importError_gists),noimportError_count/DF_new.shape[0]*100))
print('{0}/{1} of files can run without error. The ratio = {2:.2f}% '.format(success_count,len(importError_gists),success_count/DF_new.shape[0]*100))


772/1140 of files has no importError. The ratio = 67.72% 
228/1140 of files can run without error. The ratio = 20.00% 
