In [4]:
from utils import bq_utils as bqu
from utils import text_normalizer as tn
import pandas as pd
import numpy as np
import json
import arrow
import gc
import re
import os

In [5]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../auth/bq_key.json'
gh_archive = bqu.BigQueryHelper(active_project= "githubarchive", 
                                dataset_name = "day")
print('Setting up BQ Client:', gh_archive)

Setting up BQ Client: <utils.bq_utils.BigQueryHelper object at 0x7f84f1341668>


In [6]:
gh_repo_links = open('../../data/go-cves/golang-repo-list.txt').readlines()
gh_repo_links = np.array([item.strip('\n').strip() for item in gh_repo_links])

pattern = re.compile(r'.*?github.com/(.*)', re.I)
REPO_NAMES = np.array(list(filter(None,[pattern.search(item).group(1) 
                                            if pattern.search(item) else None 
                                               for item in gh_repo_links])))
print('Total Repos to Track:', len(REPO_NAMES))

Total Repos to Track: 845


In [7]:
def add_query_params(query, params_dict):
    for i, j in params_dict.items():
        query = query.replace(i, j)
    return query

In [8]:
# Don't change this
PRESENT_TIME = arrow.now()

# to get data for N days preceding from D - 1. Here D = present day
# e.g if today is 20190528 and DURATION DAYS = 2
# BQ will get data for 20190527, 20190526
# We don't get data for day D since it will be incomplete on the same day
# But you can get it if you want
DURATION_DAYS = 2  # to go back two days starting from yesterday

# Start time for getting data
START_TIME = PRESENT_TIME.shift(days=-DURATION_DAYS)

# End time for getting data (present_time - 1) i.e yesterday
# you can remove -1 to get present day data
# but it is not advised as data will be incomplete
END_TIME = PRESENT_TIME.shift(days=-1)

LAST_N_DAYS = [dt.format('YYYYMMDD') for dt in arrow.Arrow.range('day', START_TIME, END_TIME)]
print('Last N days:', LAST_N_DAYS)

Last N days: ['20190527', '20190528']


In [9]:
YEAR_PREFIX = '20*'
DAY_LIST = [item[2:] for item in LAST_N_DAYS]
QUERY_PARAMS = {
    '{year_prefix_wildcard}': YEAR_PREFIX,
    '{year_suffix_month_day}': '('+', '.join(["'"+d+"'" for d in DAY_LIST])+')',
    '{repo_names}': '('+', '.join(["'"+r+"'" for r in REPO_NAMES])+')'
}

In [10]:
query = """
SELECT  type, count(*)
        FROM `githubarchive.day.{year_prefix_wildcard}`
        WHERE _TABLE_SUFFIX IN {year_suffix_month_day}
        AND repo.name in {repo_names}
        AND type in ('PullRequestEvent', 'IssuesEvent')
        GROUP BY type
"""
query = add_query_params(query, QUERY_PARAMS)
gh_archive.estimate_query_size(query)

0.1296424763277173

In [11]:
df = gh_archive.query_to_pandas(query)
df

Unnamed: 0,type,f0_
0,PullRequestEvent,615
1,IssuesEvent,434


In [14]:
ISSUE_QUERY = """
SELECT 
    repo.name as repo_name, 
    type as event_type, 
    'golang' as ecosystem,
    JSON_EXTRACT_SCALAR(payload, '$.action') as status,
    JSON_EXTRACT_SCALAR(payload, '$.issue.id') as id,
    JSON_EXTRACT_SCALAR(payload, '$.issue.number') as number,
    JSON_EXTRACT_SCALAR(payload, '$.issue.url') as api_url,
    JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') as url,
    JSON_EXTRACT_SCALAR(payload, '$.issue.user.login') as creator_name,
    JSON_EXTRACT_SCALAR(payload, '$.issue.user.html_url') as creator_url,
    JSON_EXTRACT_SCALAR(payload, '$.issue.created_at') as created_at,
    JSON_EXTRACT_SCALAR(payload, '$.issue.updated_at') as updated_at,
    JSON_EXTRACT_SCALAR(payload, '$.issue.closed_at') as closed_at,
    TRIM(REGEXP_REPLACE(
             REGEXP_REPLACE(
                 JSON_EXTRACT_SCALAR(payload, '$.issue.title'), 
                 r'\\r\\n|\\r|\\n', 
                 ' '),
             r'\s{2,}', 
             ' ')) as title,
    TRIM(REGEXP_REPLACE(
             REGEXP_REPLACE(
                 JSON_EXTRACT_SCALAR(payload, '$.issue.body'), 
                 r'\\r\\n|\\r|\\n', 
                 ' '),
             r'\s{2,}', 
             ' ')) as body
        
FROM `githubarchive.day.{year_prefix_wildcard}`
    WHERE _TABLE_SUFFIX IN {year_suffix_month_day}
    AND repo.name in {repo_names}
    AND type = 'IssuesEvent'
    """

ISSUE_QUERY = add_query_params(ISSUE_QUERY, QUERY_PARAMS)
gh_archive.estimate_query_size(ISSUE_QUERY)

9.224743023514748

In [15]:
issues_df = gh_archive.query_to_pandas(ISSUE_QUERY)
issues_df.shape

(434, 15)

In [16]:
issues_df.head()

Unnamed: 0,repo_name,event_type,ecosystem,status,id,number,api_url,url,creator_name,creator_url,created_at,updated_at,closed_at,title,body
0,google/go-cmp,IssuesEvent,golang,opened,449409868,143,https://api.github.com/repos/google/go-cmp/iss...,https://github.com/google/go-cmp/issues/143,mrkagelui,https://github.com/mrkagelui,2019-05-28T18:17:33Z,2019-05-28T18:17:33Z,,cmpopts to ignore fields within slices?,"Hi all, My struct looks like this: ```go type ..."
1,golang/go,IssuesEvent,golang,closed,433886750,31488,https://api.github.com/repos/golang/go/issues/...,https://github.com/golang/go/issues/31488,bcmills,https://github.com/bcmills,2019-04-16T16:55:12Z,2019-05-28T18:03:21Z,2019-05-28T18:03:21Z,x/build/cmd/gopherbot: test issue (ignore this),Pay no attention to the man behind the curtain.
2,golang/go,IssuesEvent,golang,closed,398648020,29718,https://api.github.com/repos/golang/go/issues/...,https://github.com/golang/go/issues/29718,tonifirnandes,https://github.com/tonifirnandes,2019-01-13T11:28:03Z,2019-05-28T18:04:25Z,2019-05-28T18:04:25Z,question: how to get go code from .aar file,"Dear team, I have built .aar file from gomobil..."
3,golang/go,IssuesEvent,golang,closed,435557563,31605,https://api.github.com/repos/golang/go/issues/...,https://github.com/golang/go/issues/31605,arnottcr,https://github.com/arnottcr,2019-04-21T23:23:48Z,2019-05-28T18:29:38Z,2019-05-28T18:29:38Z,cmd/go: error loading subpackage with pathMajo...,<!-- Please answer these questions before subm...
4,influxdata/influxdb,IssuesEvent,golang,opened,449411669,13998,https://api.github.com/repos/influxdata/influx...,https://github.com/influxdata/influxdb/issues/...,nathanielc,https://github.com/nathanielc,2019-05-28T18:22:11Z,2019-05-28T18:22:11Z,,Parse Flux int he browser,We need the ability to parse Flux source code ...


In [17]:
issues_df.created_at = pd.to_datetime(issues_df.created_at)
issues_df.updated_at = pd.to_datetime(issues_df.updated_at)
issues_df.closed_at = pd.to_datetime(issues_df.closed_at)
issues_df = issues_df.loc[issues_df.groupby('url').updated_at.idxmax(skipna=False)].reset_index(drop=True)
issues_df.shape

(375, 15)

In [18]:
PR_QUERY = """
SELECT 
    repo.name as repo_name, 
    type as event_type, 
    'golang' as ecosystem,
    JSON_EXTRACT_SCALAR(payload, '$.action') as status,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.id') as id,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') as number,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.url') as api_url,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.user.login') as creator_name,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.user.html_url') as creator_url,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.created_at') as created_at,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.updated_at') as updated_at,
    JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,   
    TRIM(REGEXP_REPLACE(
             REGEXP_REPLACE(
                 JSON_EXTRACT_SCALAR(payload, '$.pull_request.title'), 
                 r'\\r\\n|\\r|\\n', 
                 ' '),
             r'\s{2,}', 
             ' ')) as title,
    TRIM(REGEXP_REPLACE(
             REGEXP_REPLACE(
                 JSON_EXTRACT_SCALAR(payload, '$.pull_request.body'), 
                 r'\\r\\n|\\r|\\n', 
                 ' '),
             r'\s{2,}', 
             ' ')) as body
        
FROM `githubarchive.day.{year_prefix_wildcard}`
    WHERE _TABLE_SUFFIX IN {year_suffix_month_day}
    AND repo.name in {repo_names}
    AND type = 'PullRequestEvent'
"""

PR_QUERY = add_query_params(PR_QUERY, QUERY_PARAMS)
gh_archive.estimate_query_size(PR_QUERY)

9.224743023514748

In [19]:
prs_df = gh_archive.query_to_pandas(PR_QUERY)
prs_df.shape

(615, 15)

In [20]:
prs_df.created_at = pd.to_datetime(prs_df.created_at)
prs_df.updated_at = pd.to_datetime(prs_df.updated_at)
prs_df.closed_at = pd.to_datetime(prs_df.closed_at)
prs_df = prs_df.loc[prs_df.groupby('url').updated_at.idxmax(skipna=False)].reset_index(drop=True)
prs_df.shape

(475, 15)

In [21]:
prs_df.head()

Unnamed: 0,repo_name,event_type,ecosystem,status,id,number,api_url,url,creator_name,creator_url,created_at,updated_at,closed_at,title,body
0,Azure/azure-sdk-for-go,PullRequestEvent,golang,closed,273727170,4668,https://api.github.com/repos/Azure/azure-sdk-f...,https://github.com/Azure/azure-sdk-for-go/pull...,AutorestCI,https://github.com/AutorestCI,2019-04-26 00:46:34+00:00,2019-05-28 17:01:24+00:00,2019-05-28 17:01:24+00:00,[AutoPR resources/resource-manager] Add deploy...,Created to sync https://github.com/Azure/azure...
1,Azure/azure-sdk-for-go,PullRequestEvent,golang,closed,275325719,4730,https://api.github.com/repos/Azure/azure-sdk-f...,https://github.com/Azure/azure-sdk-for-go/pull...,AutorestCI,https://github.com/AutorestCI,2019-05-02 11:30:02+00:00,2019-05-28 16:47:31+00:00,2019-05-28 16:47:31+00:00,[AutoPR security/resource-manager] Add playboo...,Created to sync https://github.com/Azure/azure...
2,Azure/azure-sdk-for-go,PullRequestEvent,golang,closed,277198569,4763,https://api.github.com/repos/Azure/azure-sdk-f...,https://github.com/Azure/azure-sdk-for-go/pull...,AutorestCI,https://github.com/AutorestCI,2019-05-08 23:34:33+00:00,2019-05-28 17:00:59+00:00,2019-05-28 17:00:59+00:00,[AutoPR resources/resource-manager] Add jit re...,Created to sync https://github.com/Azure/azure...
3,Azure/azure-sdk-for-go,PullRequestEvent,golang,closed,277198579,4764,https://api.github.com/repos/Azure/azure-sdk-f...,https://github.com/Azure/azure-sdk-for-go/pull...,AutorestCI,https://github.com/AutorestCI,2019-05-08 23:34:36+00:00,2019-05-28 17:00:40+00:00,2019-05-28 17:00:40+00:00,[AutoPR] resources/resource-manager,Created to accumulate context: resources/resou...
4,Azure/azure-sdk-for-go,PullRequestEvent,golang,closed,277938747,4776,https://api.github.com/repos/Azure/azure-sdk-f...,https://github.com/Azure/azure-sdk-for-go/pull...,AutorestCI,https://github.com/AutorestCI,2019-05-10 23:53:39+00:00,2019-05-28 16:43:29+00:00,2019-05-28 16:43:29+00:00,[AutoPR] cdn/resource-manager,Created to accumulate context: cdn/resource-ma...


In [22]:
cols = issues_df.columns
df = pd.concat([issues_df, prs_df], axis=0, sort=False, ignore_index=True).reset_index(drop=True)
df = df[cols]
df['description'] = df['title'].fillna(value='').map(str) + ' ' + df['body'].fillna(value='')
columns = ['title', 'body']
df.drop(columns, inplace=True, axis=1)
df.shape

(850, 14)

In [23]:
df['norm_description'] = tn.pre_process_documents_parallel(documents=df['description'].values)
df.drop(['description'], inplace=True, axis=1)
df.shape

2019-05-29 12:13:10,162 [21605] INFO     utils.text_normalizer: Text Pre-processing: starting
2019-05-29 12:13:10,165 [21605] INFO     utils.text_normalizer: ThreadPoolExecutor-0_0: working on doc num: 0
2019-05-29 12:13:12,257 [21605] INFO     utils.text_normalizer: ThreadPoolExecutor-0_13: working on doc num: 849


(850, 14)

In [24]:
df['security_model_flag'] = 0
df['cve_model_flag'] = 0
df.head()

Unnamed: 0,repo_name,event_type,ecosystem,status,id,number,api_url,url,creator_name,creator_url,created_at,updated_at,closed_at,norm_description,security_model_flag,cve_model_flag
0,AlecAivazis/survey,IssuesEvent,golang,closed,391535316,168,https://api.github.com/repos/AlecAivazis/surve...,https://github.com/AlecAivazis/survey/issues/168,singhrasster,https://github.com/singhrasster,2018-12-17 02:24:40+00:00,2019-05-28 10:44:20+00:00,2019-05-28 10:44:20+00:00,issue with executing test suggested in readme ...,0,0
1,AlecAivazis/survey,IssuesEvent,golang,closed,423174952,183,https://api.github.com/repos/AlecAivazis/surve...,https://github.com/AlecAivazis/survey/issues/183,shlao,https://github.com/shlao,2019-03-20 10:27:47+00:00,2019-05-28 20:23:23+00:00,2019-05-28 20:23:23+00:00,go test is suspended when i send the string r ...,0,0
2,AlecAivazis/survey,IssuesEvent,golang,closed,436441636,188,https://api.github.com/repos/AlecAivazis/surve...,https://github.com/AlecAivazis/survey/issues/188,zanven42,https://github.com/zanven42,2019-04-24 00:12:06+00:00,2019-05-28 10:43:17+00:00,2019-05-28 10:43:16+00:00,dynamic question asking binary tree hi i had f...,0,0
3,AlecAivazis/survey,IssuesEvent,golang,closed,440853697,193,https://api.github.com/repos/AlecAivazis/surve...,https://github.com/AlecAivazis/survey/issues/193,ahmetb,https://github.com/ahmetb,2019-05-06 19:38:15+00:00,2019-05-28 20:10:53+00:00,2019-05-28 20:10:53+00:00,allow customization of questionicon color it a...,0,0
4,AlecAivazis/survey,IssuesEvent,golang,closed,449192830,198,https://api.github.com/repos/AlecAivazis/surve...,https://github.com/AlecAivazis/survey/issues/198,turtleDev,https://github.com/turtleDev,2019-05-28 10:34:17+00:00,2019-05-28 20:04:35+00:00,2019-05-28 20:04:35+00:00,feature request survey select filtermessage al...,0,0


In [103]:
from models import security_dl_classifier as sdc

sc = sdc.SecurityClassifier(embedding_size=300, max_length=1000, max_features=800000,
                            tokenizer_path='models/model_assets/gokube-phase1-jun19/embeddings/security_tokenizer_word2idx_fulldata.pkl',
                            model_weights_path='models/model_assets/gokube-phase1-jun19/saved_models/security_model_train99-jun19_weights.h5')
sc.build_model_architecture()
sc.load_model_weights()
sc_model = sc.get_model()

Loading Tokenizer Vocabulary
Building Model Architecture
Loading Model Weights


In [104]:
security_encoded_docs = sc.prepare_inference_data(df['norm_description'].tolist())
print('Total Security Docs Encoded:', security_encoded_docs.shape)
sec_doc_lengths = np.array([len(np.nonzero(item)[0]) for item in security_encoded_docs])
print('Removing bad docs with low tokens')
sec_doc_idx = np.argwhere(sec_doc_lengths >= 5).ravel()
filtered_security_encoded_docs = security_encoded_docs[sec_doc_idx]
print('Filtered Security Docs Encoded:', filtered_security_encoded_docs.shape)

Total Security Docs Encoded: (441, 1000)
Removing bad docs with low tokens
Filtered Security Docs Encoded: (439, 1000)


In [105]:
print('Making predictions for probable security issues')
sec_pred_probs = sc_model.predict(filtered_security_encoded_docs, batch_size=2048, verbose=0)
sec_pred_probsr = sec_pred_probs.ravel()
sec_pred_labels = [1 if prob > 0.4 else 0 for prob in sec_pred_probsr]
print('Storing Security Model predictions in dataset')
df.loc[df.index.isin(sec_doc_idx), 'security_model_flag'] = sec_pred_labels

Making predictions for probable security issues
Storing Security Model predictions in dataset


In [106]:
del sc
del sc_model
gc.collect()

6021

In [113]:
from models import cve_dl_classifier as cdc

cc = cdc.CVEClassifier(embedding_size=300, max_length=1000, max_features=600000,
                       tokenizer_path='models/model_assets/gokube-phase1-jun19/embeddings/cve_tokenizer_word2idx_fulldata.pkl',
                       model_weights_path='models/model_assets/gokube-phase1-jun19/saved_models/cve_model_train99-jun19_weights.h5')
cc.build_model_architecture()
cc.load_model_weights()
cc_model = cc.get_model()

Loading CVE Model Tokenizer Vocabulary
Building CVE Model Architecture
Loading CVE Model Weights


In [125]:
subset_df = df[df['security_model_flag'] == 1]
prob_security_df_rowidx = np.array(subset_df.index)
prob_security_df_rowidx

array([ 20,  71,  73,  74,  86, 113, 119, 130, 133, 138, 139, 144, 149,
       152, 158, 161, 162, 164, 170, 174, 178, 190, 194, 196, 208, 209,
       210, 213, 215, 217, 227, 228, 229, 237, 246, 247, 259, 264, 265,
       290, 294, 300, 304, 326, 337, 345, 352, 356, 372, 374, 380, 383,
       388, 390, 407, 414, 415, 428, 429, 430])

In [126]:
cve_encoded_docs = cc.prepare_inference_data(subset_df['norm_description'].tolist())
print('Total CVE Docs Encoded:', cve_encoded_docs.shape)
cve_doc_lengths = np.array([len(np.nonzero(item)[0]) for item in cve_encoded_docs])
print('Removing bad docs with low tokens')
cve_doc_idx = np.argwhere(cve_doc_lengths >= 10).ravel()
filtered_cve_encoded_docs = cve_encoded_docs[cve_doc_idx]
print('Filtered CVE Docs Encoded:', filtered_cve_encoded_docs.shape)

Total CVE Docs Encoded: (60, 1000)
Removing bad docs with low tokens
Filtered CVE Docs Encoded: (58, 1000)


In [128]:
print('Making predictions for probable CVE issues')
cve_pred_probs = cc_model.predict(filtered_cve_encoded_docs, batch_size=2048, verbose=0)
cve_pred_probsr = cve_pred_probs.ravel()
cve_pred_labels = [1 if prob > 0.3 else 0 for prob in cve_pred_probsr]
print('Storing CVE Model predictions in dataset')
prob_cve_idxs = prob_security_df_rowidx[cve_doc_idx]
df.loc[df.index.isin(prob_cve_idxs), 'cve_model_flag'] = cve_pred_labels

Making predictions for probable CVE issues
Storing CVE Model predictions in dataset


In [153]:
df.head()

Unnamed: 0,repo_name,event_type,ecosystem,status,id,number,api_url,url,creator_name,creator_url,created_at,updated_at,closed_at,norm_description,security_model_flag,cve_model_flag
0,Unknwon/goconfig,IssuesEvent,golang,opened,448638715,44,https://api.github.com/repos/Unknwon/goconfig/...,https://github.com/Unknwon/goconfig/issues/44,wencycool,https://github.com/wencycool,2019-05-27 02:22:38+00:00,2019-05-27 02:22:38+00:00,NaT,key keyvalue nocache port my cnf,0,99
1,astaxie/beego,IssuesEvent,golang,opened,448805111,3663,https://api.github.com/repos/astaxie/beego/iss...,https://github.com/astaxie/beego/issues/3663,chaclee,https://github.com/chaclee,2019-05-27 11:28:40+00:00,2019-05-27 11:28:40+00:00,NaT,httplib post file cannot specify file name hel...,0,99
2,aws/aws-sdk-go,IssuesEvent,golang,opened,448528837,2623,https://api.github.com/repos/aws/aws-sdk-go/is...,https://github.com/aws/aws-sdk-go/issues/2623,kishaningithub,https://github.com/kishaningithub,2019-05-26 05:02:44+00:00,2019-05-26 05:02:44+00:00,NaT,concurrent put please fill out the sections be...,0,99
3,aws/aws-sdk-go,IssuesEvent,golang,opened,448580609,2624,https://api.github.com/repos/aws/aws-sdk-go/is...,https://github.com/aws/aws-sdk-go/issues/2624,kishaningithub,https://github.com/kishaningithub,2019-05-26 15:53:55+00:00,2019-05-26 15:53:55+00:00,NaT,first call to dynamodb is very slow please fil...,0,99
4,bazelbuild/bazel-gazelle,IssuesEvent,golang,opened,448562916,536,https://api.github.com/repos/bazelbuild/bazel-...,https://github.com/bazelbuild/bazel-gazelle/is...,Globegitter,https://github.com/Globegitter,2019-05-26 12:38:33+00:00,2019-05-26 12:38:33+00:00,NaT,extra args lost when running in a sub director...,0,99


In [155]:
BASE_TRIAGE_DIR = './triaged_datasets'
NEW_TRIAGE_SUBDIR = '-'.join([START_TIME.format('YYYYMMDD'), END_TIME.format('YYYYMMDD')])
NEW_DIR_PATH = os.path.join(BASE_TRIAGE_DIR, NEW_TRIAGE_SUBDIR)

MODEL_INFERENCE_DATASET = os.path.join(NEW_DIR_PATH, 'model_inference_full_output_'+NEW_TRIAGE_SUBDIR+'.csv')
PROBABLE_SEC_CVE_DATASET = os.path.join(NEW_DIR_PATH, 'probable_security_and_cves_'+NEW_TRIAGE_SUBDIR+'.csv')
PROBABLE_CVE_DATASET = os.path.join(NEW_DIR_PATH, 'probable_cves_'+NEW_TRIAGE_SUBDIR+'.csv')
print('Creating New Model Inference Directory:', NEW_DIR_PATH)
os.makedirs(NEW_DIR_PATH)

df.drop(['norm_description'], inplace=True, axis=1)
df['triage_is_security'] = 0
df['triage_is_cve'] = 0
df['triage_feedback_comments'] = ''
print('Saving Model Inference dataset:', MODEL_INFERENCE_DATASET)
df.to_csv(MODEL_INFERENCE_DATASET, index=False)
print('Saving Probable Security dataset:', PROBABLE_SEC_CVE_DATASET)
df[df.security_model_flag==1].drop(['triage_is_cve'], axis=1).to_csv(PROBABLE_SEC_CVE_DATASET, index=False)
print('Saving Probable CVE dataset:', PROBABLE_CVE_DATASET)
df[df.cve_model_flag==1].drop(['triage_is_security'], axis=1).to_csv(PROBABLE_CVE_DATASET, index=False)

Creating New Model Inference Directory: ./triaged_datasets/20190526-20190527
Saving Model Inference dataset: ./triaged_datasets/20190526-20190527/model_inference_full_output_20190526-20190527.csv
Saving Probable Security dataset: ./triaged_datasets/20190526-20190527/probable_security_and_cves_20190526-20190527.csv
Saving Probable CVE dataset: ./triaged_datasets/20190526-20190527/probable_cves_20190526-20190527.csv


In [157]:
from utils import aws_utils as aws
from utils import cloud_constants as cc


s3_obj = aws.S3_OBJ
bucket_name = cc.S3_BUCKET_NAME
s3_bucket = s3_obj.Bucket(bucket_name)

In [160]:
print('Uploading Saved Model Assets to S3 Bucket')
aws.s3_upload_folder(folder_path=NEW_DIR_PATH,
                     s3_bucket_obj=s3_bucket, prefix='triaged_datasets')

Uploading Saved Model Assets to S3 Bucket
Uploading: triaged_datasets/20190526-20190527/probable_cves_20190526-20190527.csv
Uploading: triaged_datasets/20190526-20190527/model_inference_full_output_20190526-20190527.csv
Uploading: triaged_datasets/20190526-20190527/probable_security_and_cves_20190526-20190527.csv
