In [1]:
'''
Bitport MP4 Grabber.

In Late August someone tried to hack Bitport, and so they increased security, making regular scraping
very difficult.  Insead, here, we use their API to provide a way to easily download bunches of MP4 files.
'''

import requests
import json
from pprint import pprint
import os
import re
debug = False
download_path_prefix = '/Volumes/JUNK/'
download_path_prefix = '/Volumes/iMac_External_Drive/ORIGINAL/BITPORT_DOWNLOAD'


class BitportBlob(object):
    '''
    The main object.  Has all your bitport items which are also objects (BitportDirs and BitportFiles)
    '''
    instance_names = []  

    def __init__(self, name):
        self.name = name
        BitportBlob.instance_names.append(self.name)
        self.obj_list = []
        self.auth_tok = ''
        self.me_info = {}
        self.all_bpf_objects = {}
        self.dirs = []
        self.files_to_download = []
        
        
class BitportDir(object):
    '''
    One bitport directory.
    '''
    instance_names = []

    def __init__(self, name):
        self.name = name
        BitportDir.instance_names.append(self.name)
        self.dictoid = {}
        #self.date = ''
        #self.name = ''
        #self.count = 0
        self.flist = []
        self.fact = ''

        

def get_secrets_from_file(bitport_dot_file = ''):
    '''
    Keep secret stuff in a .bitport file rather than embedded in the code.
    This utility reads the file and returns a dict.  (or else creates the file.)
    One arg: bitport_dot_file is optional, and defaults to ~/.bitport
    '''
    
    
    # File should contain what dummy_file_content contains (but with real info instead of <..GOES_HERE> stuff)
    dummy_file_content = {
      "client_id":"<CLIENT_ID_GOES_HERE>",
      "client_secret": "<CLIENT_SECRET_GOES_HERE>",
      "code": "<CODE_GOES_HERE>"
    } 
    
    if bitport_dot_file == '':
        home_dir_path = os.path.expanduser('~')
        bitport_dot_file = home_dir_path + '/.bitport'
    
    if os.path.isfile(bitport_dot_file):
        if debug: print ('File {} already exists, reading...'.format(bitport_dot_file))
        

        try:
            with open(bitport_dot_file,  'r') as fh:                                                             
                mystuff = json.load(fh)  

            if debug: print("FOUND FILE CONTENTS: [[[{}]]]".format(mystuff))
        except:
            print('UHHHhhhh, problem with file ', bitport_dot_file, ' game over.')
            
            
        if re.search (r'_GOES_HERE>', mystuff['code']):
            print('Did you forget to edit the file {} ?\n Exiting!'.format(bitport_dot_file))
            exit('Game over')

        return mystuff

            
    else:
        print ('File {} not found.   Creating....'.format(bitport_dot_file))
        print ('You must go edit the file: {}'.format(bitport_dot_file))
        exit('Game over!')

        with open(bitport_dot_file, 'w') as fh:                                                             
            json.dump(dummy_file_content, fh)    
            

        return False
            
        
        
def find_object(field, object_list):
    '''
    Check 'object_list' to see if an object with a 'name' attribute equal to 'field' exists, return it if so.
    '''
    
    for item in object_list:
        if item.name == field:
            return item
    return None



def get_auth_tok(url='https://api.bitport.io/v2/oauth2/access-token'):
    '''
    This method does a POST to http://bitport.io/get-access to obtain the 'code' (a.k.a. USER_CODE), 
    per the Bitport doc, "thanks to which you can get access_token" [sic]. 

    Beware that the 'code' seems to change occasionally, but the old one still works. So, it may be that
    a human needs to be logged in to a browser and hit the ./get-access site to get a new one occasionally.

    The requests.post below should yield a blob containing 3 items;
    token_type":"Bearer","expires_in":157766399,"access_token":"eyJ0eXAiOiJKV1QiLCJhbGciO...."
    
    We ony care about, (and therefore only return), the 'access_token' item.  Once we have the access_token
    we can make API calls to do useful stuff on the Bitport site.
    
    For more information see the (very terse) Bitport documentation available here: https://bitport.io/api

    '''
    
   
    
    dict1 = get_secrets_from_file()

    payload = {'type': 'application/x-www-form-urlencoded', 
               'client_id':'?', 
               'client_secret': '?', 
               'grant_type': 'code', 
               'code': '?'} 
    
    # We read those things from a file for a reason, they are needed in the payload;
    # We keep them out of the distro / git repo.   They are secrets!
    payload['client_id']     = dict1['client_id']
    payload['client_secret'] = dict1['client_secret']
    payload['code']          = dict1['code']

    if debug: print('Payload is \n'), pprint(payload)
    
    
    r = requests.post(url, data=payload)
    if ( r.status_code != 200 ):
        print("\n\nPOST was a FAILURE!!   status_code = : ", r.status_code, "\n\n")
        return False
    else:
        resp = json.loads(r.text)
        return resp['access_token']


    
def jsonify_req_obj(robj):
    import json
    rt = json.loads(robj.text)

    return rt


def bitport_api_cloud(tok):
    '''
    This returns a top level look at your Bitport cloud content.
    '''
    api_url = 'https://api.bitport.io/v2/cloud'

    header = {'Authorization': tok } 

    rr = requests.get(api_url, headers = header)
    return rr


def get_a_files_download_url(tok, fcode):
    
    ''' 
    Hit url/website to get more file details (only one we care about is download url)
    Take the 'fileCode', which looks like this: wzskcf8bls
    use that in the https://api.bitport.io/v2/files/<....>/stream.m3u8 url, and 
    get a result that looks like this;
    
    ['#EXTM3U',
     '#EXT-X-VERSION:4',
     '#EXT-X-TARGETDURATION:3556',
     '#EXT-X-MEDIA-SEQUENCE:0',
     '#EXTINF:3556',
     'https://s04.bitport.io/download/uGDWG96J1QRpEFQnntfzsp49vbkK1oba/1',
     '#EXT-X-ENDLIST']
    
    Then parse out and return just the https (url) piece.
    
    '''
    
    api_url = 'https://api.bitport.io/v2/files/' + fcode + '/stream.m3u8'

    header = {'Authorization': tok } 

    rr = requests.get(api_url, headers = header)
    
    pat = r'(http.+1)\n'
    import re
    found = re.search(pat, rr.text)
    if debug: print('0=[{}]'.format(found.group(0)))
    if debug: print('1=[{}]'.format(found.group(1)))

    if debug: print('finalanswer[[[{}]]]'.format(found.group(1)[3]))

    return found.group(1)

        

    
def get_me_info_from_api(tok):
    '''
    This returns a top level look at your Bitport cloud content.
    '''
    api_url = 'https://api.bitport.io/v2/me'

    header = {'Authorization': tok } 

    rr = requests.get(api_url, headers = header)
    jj = jsonify_req_obj(rr)
    
    return jj['data']




def bitport_api_get_folder(tok, dir_code):
    api_url = 'https://api.bitport.io/v2/cloud/' + dir_code
    header = {'Authorization': tok }
    rr = requests.get(api_url, headers = header)
    return rr






def munge_dir_req_obj(bitport_api_get_folder_results):

    dstruct = jsonify_req_obj(bitport_api_get_folder_results)
    min_size = 10000000  # sometimes a 'sample' file is included.  We want to skip it.  Size is in bytes?
    hits = 0
    file_url_prefix = 'https://bitport.io/my-files/file/'
    good_video_file_urls = []
    #pprint(dstruct)


    #print("AAA", dstruct['data'][0])
    print("date=", dstruct['data'][0]['created_at']['date'])
    print("name=", dstruct['data'][0]['name'])
    print("count=", dstruct['data'][0]['files_count'])
    for ff in dstruct['data'][0]['files']:
        temp_dict = {}
        if re.search(r'^sample[.]', ff['name']) and ff['size'] < min_size:
            if debug: print(ff['name'], 'is a small file that starts with "sample.", skipping...')
        elif ff['screenshots'] == []:
            if debug: print(ff['name'], 'has no screenshots, skipping...')
        else:
            hits += 1
            ans = ff
            if debug: print('\tWINNER: ', ff['code'], ff['name'], ff['size'])
            temp_dict['code'] = ff['code']
            temp_dict['parent_folder_code'] = ff['parent_folder_code']
            temp_dict['size'] = ff['size']
            temp_dict['name'] = ff['name']
            temp_dict['date'] = ff['created_at']['date']
            temp_dict['url'] = file_url_prefix + ff['code']

            
            good_video_file_urls.append(temp_dict)
            
    print('munge_dir_req_obj() returning with {} items: {}'.format(hits, good_video_file_urls))       
    return good_video_file_urls




def paint_h():
    
    
    print('{:>3} {:<12} {:<90} {:>5}'.format(
            'Row',
            'Code', 
            'Name',
            'Size(MB)')
         )
    print('{:>3} {:<12} {:<90} {:>5}'.format(
            '=' * 3,
            '=' * 12, 
            '=' * 82,
            '=' * 5)
         )
    
    
    
def paint_r(row, dictx):
    
    mb = 1024 * 1024
    
    debug = False
    if debug: print('I am "paint_r", called with [{}] and [{}]'.format(row, dictx))
    
    print('{:>3} {:<12} {:<90} {:<5.2f}'.format(
            row,
            dictx['code'], 
            dictx['name'],
            dictx['size'] / mb)
         )
    
    
def figure_out_dirs( the_cloud_api_requests_object):
    dict_list = []
    xx = jsonify_req_obj(the_cloud_api_requests_object)
    files_dict = xx['data'][0]['folders']
    for file_item in files_dict:
        if file_item['files_count'] > 0:

            temp_obj = BitportDir(file_item['code'])

            temp_obj.dictoid = {'name':         file_item['name'],
                                 'size':        file_item['size'], 
                                 'files_count': file_item['files_count'], 
                                 'code':    file_item['code']  
                                }

            dict_list.append(temp_obj)
    return dict_list




def print_top_level_choice(bpobj):
    
    paint_h()

    for zz in range(len(bpobj.dirs)):
        #print('line {}'.format(zz))
        #print(bpobj.dirs[zz].dictoid)
        paint_r(zz, bpobj.dirs[zz].dictoid)


        
        
def paint_dirs_files(directory_objects_file_list):

    paint_h()
    
    debug = False
    if debug: pprint(directory_objects_file_list)
    
    for item_num in range(len(directory_objects_file_list)):
        paint_r(item_num, directory_objects_file_list[item_num])

    
    
    

    
    

def get_all_dem_file_download_urls(tok, dobj):
    all_urls = []
    all_sm_urls = []
    
    
    dir_url = 'https://bitport.io/my-files/' + dobj.name
    print('Getting file download urls for dir: {}'.format(dir_url))
        
    for ff in dobj.flist:
        print("Grabbing download url for file code {}".format(ff['code']))

        download_url = get_a_files_download_url(BiBo.auth_tok, ff['code'])
        download_url_sm = re.sub(r'/1$', '/2', download_url)   # 'cuz small is known to be just the ../2
        
        ff['download_url'] = download_url
        ff['download_url_sm'] = download_url_sm
        
        all_urls.append(download_url)
        all_sm_urls.append(download_url_sm)
        
    return all_urls, all_sm_urls





def download_file(download_url, file_path):
    '''
    Take a url and filename, use the requests lib in stream mode to download the file.
    '''
    import re
    
    file_path = re.sub(r'[?]$', '', file_path)  # removing trailing '?'                 
    
    print('Downloading file {}'.format(file_path), end='')
    rr = requests.get(download_url, stream=True) 

    with open(file_path, 'wb') as fd:
        for chunk in rr.iter_content(chunk_size=2**24):  # 2^24 = 16MB
            print('.', end='')
            fd.write(chunk)
            
    print('\nDone.')
        
        






In [2]:
import re

def ui2(file_obj):
    max_range = len(file_obj.flist)
    loop = True
    while loop:
        user_pick_f = input('Pick one by number, or \"A\" for All:')
        
        # If user picks a 1, 2, 3, ... we convert it to int so we can use it as a list index;
        mat = re.match(r'[0123456789]', user_pick_f)
        if mat:
            user_pick_f = int(user_pick_f)
            
        print('Youpicked[{}], of type: {}'.format(user_pick_f, type(user_pick_f)))
        
        # USER WANTS ONE FILE;
        if user_pick_f in range(0, max_range):
            print('Call ONE here with {} (type is type {})'.format(user_pick_f, type(user_pick_f)))
            paint_h()
            paint_r(user_pick_f, file_obj.flist[0])
            print( get_a_files_download_url(BiBo.auth_tok,file_obj.flist[0]['code']) )

            file_obj.flist[user_pick_f]['download_url'] = get_a_files_download_url(BiBo.auth_tok,tt.flist[user_pick_f]['code'])
            loop = False

            
        # USER WANTS ALL FILES:
        elif user_pick_f in [ 'A', 'a']:
            print('Getting all...{}', type(user_pick_f))
            (durls, durls_sm) = get_all_dem_file_download_urls(BiBo.auth_tok, file_obj)
            pick = input('Do you want to download Small, Large, or None?:')
  

            # SMALL
            if pick == 'S' or pick == 's':
                for item in durls_sm:
                    print(item)
                y_or_n = input('Are you sure you want all of these?:')
                if y_or_n == 'Y' or y_or_n == 'y':
                    for ii in file_obj.flist:
                        ur = ii['download_url_sm']
                        nm = ii['name'] 
                        nm = re.sub(r'[.][aA][vV][iI]$', '_SM.MP4', nm )  # if it ends with .AVI, change to _SM.MP4
                        nm = download_path_prefix + '/' + nm
                        download_file(ur, nm)
                else:
                    print('Good.  You\'re saving bandwidth and disk space!')

                    
            # LARGE
            elif pick == 'L' or pick == 'l':
                for item in durls:
                    print(item)  
                y_or_n = input('Are you sure you want all of these?:')
                if y_or_n == 'Y' or y_or_n == 'y':
                    for ii in file_obj.flist:
                        ur = ii['download_url']
                        nm = ii['name'] 
                        nm = re.sub(r'[.][aA][vV][iI]$', '.MP4', nm )  # if it ends with .AVI, change to .MP4
                        nm = download_path_prefix + '/' + nm
                        download_file(ur, nm)
                else:
                    print('Good.  You\'re saving bandwidth and disk space!')
                
                print('All done!')
    
            # USER WANTS NOTHING!
            else:
                print('Caution is a virtue')

            loop = False
            
        else:
            print('Invalid Response, Pick a number between 0 and {}, or \"A\" for all'.format(max_range))


In [3]:
BiBo = BitportBlob('mmm')
BiBo.auth_tok = get_auth_tok()
print(BiBo.auth_tok)

eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImp0aSI6IjI0ZGNlMGI5NDdkMTU3NmY1ZDMyMWNjYTZhYTMxYmY0MWZlOGM1NjczYWQ4YjZiYmNhYTBiZTMyOTcyZmMyOTZlMDczOWYwMmMzZjIxZGIwIn0.eyJhdWQiOiI0Mzg4MCIsImp0aSI6IjI0ZGNlMGI5NDdkMTU3NmY1ZDMyMWNjYTZhYTMxYmY0MWZlOGM1NjczYWQ4YjZiYmNhYTBiZTMyOTcyZmMyOTZlMDczOWYwMmMzZjIxZGIwIiwiaWF0IjoxNTA2MzEwNzU0LCJuYmYiOjE1MDYzMTA3NTQsImV4cCI6MTY2NDA3NzE1NCwic3ViIjoiMzM4MTQ0Iiwic2NvcGVzIjpbXX0.EZGUXOHcF0bxFKCyIZ7BLNJeYDeBAYEpNr5YpMUc_e1k3iwe-0PQ8nEoCgb7UtF7l0ixTCszR1sNFogNIeiyYr3GRf66cbI_iLKgj1AjU4LjxhpcZf1Yc-30JozSB-jZEahpJThZNcKMqH-DshmVZYArSLmpraaYwUCZ4m-IbPQ


In [4]:
def ui():

    user_input = 99
    BiBo.me_info = get_me_info_from_api(BiBo.auth_tok)
    BiBo.dirs = figure_out_dirs(bitport_api_cloud(BiBo.auth_tok))
    print_top_level_choice(BiBo)
    # print('#' * 100)

    while user_input not in range(0,len(BiBo.dirs)):
        user_input = int(input('Pick a number: '))

    dir_code = BiBo.dirs[user_input].dictoid['code']
    print("DIRCODE IS {}".format(dir_code))
    print('Files from choice {}, the {} dir;\n'.format(user_input, dir_code ))
    paint_h()
    paint_r(user_input, BiBo.dirs[user_input].dictoid)

    # Call API on folder -- get a requests object;
    one_dirs_requests_obj = bitport_api_get_folder(BiBo.auth_tok, dir_code)

    # Populate that dir object's .flist
    BiBo.dirs[user_input].flist = munge_dir_req_obj(one_dirs_requests_obj) 

    paint_dirs_files(BiBo.dirs[user_input].flist)

    ui2(BiBo.dirs[user_input])    


In [None]:
ui()

Row Code         Name                                                                                       Size(MB)
  0 8yzfuvl00s   Pinky and the Brain                                                                        17405.20
  1 7yd1vjxluk   Rick.and.Morty.S03E08.720p.HDTV.x264-BATV[rarbg]                                           486.32
  2 sjyi4ww4ry   The.Red.Turtle.2016.BRRip.XviD.AC3-EVO                                                     1220.78
  3 7yx1y5wu7y   Inception [2010] 1080p BluRay AAC x264-ETRG                                                3992.07


In [None]:
for xx in BiBo.dirs:
    print('%' * 90)
    pprint(xx.dictoid)
    print('\n',xx.name, end='')
    pprint(xx.flist)