In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import json
import glob

import boto3
from botocore.exceptions import NoCredentialsError

pd.set_option('display.max_rows', 99999)
pd.set_option('display.max_colwidth', 99999)
pd.set_option('display.max_columns', 99999)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [2]:
with open('config.json') as config_f:
    data = json.load(config_f)
ACCESS_KEY = data['aws_access_key_id']
SECRET_KEY = data['aws_secret_access_key']
region_name = data['region_name']
WANDB_API_KEY = data['WANDB_API_KEY']

In [3]:
def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)
    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False

def download_from_aws(s3_file, bucket,local_file):
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)
    try:
        s3.download_file(bucket, s3_file, local_file)
        print("Download Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False
    
def list_files_from_aws(bucket):
    fileList = []
    s3 = boto3.resource('s3', aws_access_key_id=ACCESS_KEY,aws_secret_access_key=SECRET_KEY)
    my_bucket = s3.Bucket(bucket)
    for my_bucket_object in my_bucket.objects.all():
        fileList.append(str(my_bucket_object.key))
    return fileList

def filter_list(input_list,contains):
    
    list_return = []
    
    for item in input_list:
        if item.find(contains) != -1:
            list_return.append(item)
    
    return(list_return)

def remove_from_list(input_list,contains):
    
    list_return = []
    
    for item in input_list:
        if item.find(contains) == -1:
            list_return.append(item)
    
    return(list_return)

# Upload Files to Bucker Folder

In [None]:
filter_str = '**Depth_12**joblib'
#filter_str = 'xgb'
fileList = glob.glob('*'+filter_str+'*' , recursive=True)
fileList.sort()
fileList

In [None]:
#fileList = remove_from_list(fileList,'50')
#fileList = remove_from_list(fileList,'10_')
fileList    

In [None]:
bucket = 'pparkitn-public'
folder = 'final_models/'

for filename in fileList:
    print("LocalFilename=",filename)
    remote_file = filename.replace('_Est_3000_Drop_Cols_1_Date_01-01-2021_xgb','')
    print("RemoteFilename=",remote_file)
    upload_to_aws(filename, bucket, folder + remote_file)

# List Files in Bucket

In [4]:
my_bucket = 'pparkitn-public'
fileList = list_files_from_aws(my_bucket)
fileList

['final_models/',
 'final_models/Final_AssetID_000000.parquet.gzip',
 'final_models/Final_AssetID_000001.parquet.gzip',
 'final_models/Final_AssetID_000002.parquet.gzip',
 'final_models/Final_AssetID_000003.parquet.gzip',
 'final_models/Final_AssetID_000004.parquet.gzip',
 'final_models/Final_AssetID_000005.parquet.gzip',
 'final_models/Final_AssetID_000006.parquet.gzip',
 'final_models/Final_AssetID_000007.parquet.gzip',
 'final_models/Final_AssetID_000008.parquet.gzip',
 'final_models/Final_AssetID_000009.parquet.gzip',
 'final_models/Final_AssetID_000010.parquet.gzip',
 'final_models/Final_AssetID_000011.parquet.gzip',
 'final_models/Final_AssetID_000012.parquet.gzip',
 'final_models/Final_AssetID_000013.parquet.gzip',
 'final_models/Model_Final_AssetID_000000.joblib',
 'final_models/Model_Final_AssetID_000000_Est_1000_Drop_Cols_1_Date_01-01-2021_xgb.joblib',
 'final_models/Model_Final_AssetID_000000_Est_1000_Max_Depth_10_xgb.joblib',
 'final_models/Model_Final_AssetID_000000_Est_10

# Filter List

In [5]:
fileList = filter_list(fileList,'BW_3')
fileList = remove_from_list(fileList,'joblib')
fileList

['final_models/model_input_BW_3.parquet.gzip']

# Download File 

In [6]:
my_bucket = 'pparkitn-public'
for s3_file in fileList:
    local_file = s3_file.replace('final_models/','')
    print(local_file)
    print(s3_file)    
    download_from_aws(s3_file, my_bucket,local_file)

model_input_BW_3.parquet.gzip

final_models/model_input_BW_3.parquet.gzip

Download Successful
