## Timeout function

In [26]:
import pandas as pd 
import os 
import subprocess
import signal

class timeout:
    """Timeout class.
    Taken from StackOverflow: https://stackoverflow.com/a/22348885
    """

    def __init__(self, seconds=1, error_message='Timeout'):
        self.seconds = seconds
        self.error_message = error_message

    def handle_timeout(self, signum, frame):
        # logger.info('Timeout encountered')
        # print('Timeout encountered')
        raise TimeoutError(self.error_message)

    def __enter__(self):
        signal.signal(signal.SIGALRM, self.handle_timeout)
        signal.alarm(self.seconds)

    def __exit__(self, type, value, traceback):
        signal.alarm(0)


class TimeoutError(Exception):
    def __init__(self,mess): 
        super(TimeoutError, self).__init__()
        pass

## Read gists have importError from csv 



In [28]:
# DF = pd.read_csv("all-gists-results.csv")
DF = pd.read_csv("../../results/proprocess/all-gists-results_2500.csv")
print(DF.shape)
importError_gists = DF.loc[DF['initial-eval'] == 'ImportError','id']
# print(importError_gists)
importError_gists = list(importError_gists)
print(len(importError_gists))

(2499, 2)
1140


## Read all possible types of errors

In [4]:
type_list = []
with open('../../error_type.txt','r') as file:
    for line in file:
        line = line.rstrip('\r\n')
        line = line.strip()
        # print(line)
        type_list.append(line)

In [5]:
len(importError_gists),len(type_list)

(1140, 93)

## RQ1: Evaluating importError gists using Gistable command line

In [None]:
results = []
# timeout variable
TIMEOUT_SECONDS = 60
for i,g in enumerate(importError_gists):
    if i % 100 == 0:
        print('####################################',i,'####################################')
    found_regular_error = False 
    print(g)
    try:
        with timeout(seconds=TIMEOUT_SECONDS):
            output = subprocess.getoutput('gistable run {0}'.format(g)).lower()
            # print(output)
            for t in type_list:
                t = t.lower()
                # print(t)
                if t in output: 
                    print('output type = ',t)
                    found_regular_error = True 
                    results.append({
                    'id': g,
                    'initial-eval':'importerror',
                    'final-eval': t
                    })
            
                    break
            if not found_regular_error:
                print('output type = other error')
                results.append({
                    'id': g,
                    'initial-eval':'importerror',
                    'final-eval':'other error'
            })
    except TimeoutError:
            print('time out error')
            results.append({
                'id': g,
                'initial-eval':'importerror',
                'final-eval':'timeouterror'
            })
            pass


## Save the results into csv

In [13]:
import csv 
# with open('importErrors-gists-results.csv', 'w') as results_file:
# with open('importErrors-gists-results_1000.csv', 'w') as results_file:with open('importErrors-gists-results.csv', 'w') as results_file:
with open('../../results/RQ1/RQ1_gistable_results_{0}.csv'.format(len(importError_gists)), 'w') as results_file:


    # Row fieldnames
    fieldnames = ['id', 'initial-eval','final-eval']

    # Create dict writer
    writer = csv.DictWriter(results_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(results)        
    print('file has been saved')

file has been saved


## Results Evaluation

### Num of total files has importErrors

In [22]:
res = len(importError_gists)/DF.shape[0]

print('In 2500 file, {0} of file has importError. The ratio = {1:.2f}%'.format(len(importError_gists),res*100))



In 2500 file, 1140 of file has importError. The ratio = 45.62%


### RQ1 Results

num of file has importError has been removed

num of file can run

In [23]:
DF_new = pd.read_csv("../../results/RQ1/RQ1_gistable_results_{0}.csv".format(len(importError_gists)))

noimportError_count=0
success_count = 0

# count num of file has no error, and can run without error
for error in list(DF_new['final-eval']):
    if error != 'importerror':
        noimportError_count+=1
        
    if error == 'success':
        success_count+=1

#get rid of # of other error from noimportError file
    if error == 'other error':
        noimportError_count-=1
        
print('{0}/{1} of files has no importError. The ratio = {2:.2f}% '.format(noimportError_count,len(importError_gists),noimportError_count/DF_new.shape[0]*100))
print('{0}/{1} of files can run without error. The ratio = {2:.2f}% '.format(success_count,len(importError_gists),success_count/DF_new.shape[0]*100))


829/1140 of files has no importError. The ratio = 72.72% 
227/1140 of files can run without error. The ratio = 19.91% 
