In [1]:
import os
import pandas as pd
import re
import requests

In [4]:
df = pd.read_excel('assignment1/response.xlsx')
df['id'] = df['Thư điện tử'].str[:8]
df['folder'] = df.agg('{0[id]}_{0[Họ và đệm]} {0[Tên]}'.format, axis=1)
df = df.sort_values(by=['Được hoàn thành'], ascending=False)

In [5]:
def extract_id(response):
    if type(response) != str:
        return None
    pos = response.find('open?id=')
    if pos == -1:
        pos = response.find('/file/d/')
    if pos != -1:
        pos += 8
        response = response[pos:]
        result = re.search(r"[\w-]+", response)
        return result.group(0)
    else:
        return None

def download_file_from_google_drive(id, destination, replace=False):
    if not replace and os.path.isfile(destination):
        print("Destination file", destination, "exists, download aborted")
        return
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

def download_response(row):
    folder = row['folder']
    response = row['Response 1']    
    data_file_id = extract_id(response)
    if data_file_id is not None:
        destination_dir = os.path.join('assignment1', folder)
        destination = os.path.join(destination_dir, 'data.zip')
        os.makedirs(destination_dir, exist_ok=True)
        print("Downloading", data_file_id, '...')
        download_file_from_google_drive(data_file_id, destination)
        print("Done.")
    else:
        print("Cannot find drive file ID, check response")

def download_by_student_id(student_id):
    for r in df.iterrows():
        r = r[1]
        if r['id'] == student_id:
            print(r['folder'], r['Được hoàn thành'], r['Response 1'])
            download_response(r)
            print('--------------------------------------------------------')
        # break # comment this line to process all assignments

In [6]:
df['id']

235    17020709
234    17021311
233    16022403
232    16022403
148    17021311
         ...   
259    17021203
260    15020971
261    17021246
262    16020237
285    17021019
Name: id, Length: 286, dtype: object

In [None]:
for i in df['id']:
    download_by_student_id(i)

17020709_Dương Thu Hà 8 Tháng tư 2020  1:54 AM Code: https://github.com/duongghaa/speechProcessing/tree/master/Tuan1_thudulieu

Data: https://drive.google.com/file/d/1lQxeVW4j2PfV4hnML_DskEGFJlk2Jq3k/view?usp=sharing
Downloading 1lQxeVW4j2PfV4hnML_DskEGFJlk2Jq3k ...
Destination file assignment1/17020709_Dương Thu Hà/data.zip exists, download aborted
Done.
--------------------------------------------------------
17020709_Dương Thu Hà 6 Tháng tư 2020  12:14 PM Code: https://github.com/duongghaa/speechProcessing/tree/master/Tuan1_thudulieu

Data: https://drive.google.com/file/d/1lQxeVW4j2PfV4hnML_DskEGFJlk2Jq3k/view?usp=sharing
Downloading 1lQxeVW4j2PfV4hnML_DskEGFJlk2Jq3k ...
Destination file assignment1/17020709_Dương Thu Hà/data.zip exists, download aborted
Done.
--------------------------------------------------------
17020709_Dương Thu Hà 30 Tháng ba 2020  10:32 PM Code: https://github.com/duongghaa/speechProcessing/tree/master/Tuan1_thudulieu

Data: https://drive.google.com/file/d/1

Done.
--------------------------------------------------------
17021288_Khổng Thị Mai Loan 4 Tháng tư 2020  9:42 AM https://drive.google.com/open?id=1rekZLMpeEbxYrXTyUzm7M5bYbHKz9gwB

https://github.com/KhongMaiLoan/voice-processing
Downloading 1rekZLMpeEbxYrXTyUzm7M5bYbHKz9gwB ...
Destination file assignment1/17021288_Khổng Thị Mai Loan/data.zip exists, download aborted
Done.
--------------------------------------------------------
17021288_Khổng Thị Mai Loan 4 Tháng tư 2020  9:58 AM https://drive.google.com/file/d/1G05hHuF7yWvZ31xXnMRA8Sdj4VtOC5Ec/view

https://github.com/KhongMaiLoan/voice-processing
Downloading 1G05hHuF7yWvZ31xXnMRA8Sdj4VtOC5Ec ...
Destination file assignment1/17021288_Khổng Thị Mai Loan/data.zip exists, download aborted
Done.
--------------------------------------------------------
17021288_Khổng Thị Mai Loan 4 Tháng tư 2020  9:42 AM https://drive.google.com/open?id=1rekZLMpeEbxYrXTyUzm7M5bYbHKz9gwB

https://github.com/KhongMaiLoan/voice-processing
Downloading 1re

Done.
--------------------------------------------------------
17021231_Đỗ Thành Đạt 4 Tháng tư 2020  4:42 PM [Link source code: ]
https://github.com/username31299/RecordStream

[Data]

https://drive.google.com/file/d/11NRYuyyi9p3cHlXEDkfBw2KH3R_oTDgv

[Video demo record]

https://drive.google.com/file/d/19Xlt8zIzzP4PNAK707DqIlEt-kHTqERW
Downloading 11NRYuyyi9p3cHlXEDkfBw2KH3R_oTDgv ...
Destination file assignment1/17021231_Đỗ Thành Đạt/data.zip exists, download aborted
Done.
--------------------------------------------------------
17021231_Đỗ Thành Đạt 4 Tháng tư 2020  4:40 PM [Link source code: ]
https://github.com/username31299/RecordStream

[Data]

https://drive.google.com/file/d/11NRYuyyi9p3cHlXEDkfBw2KH3R_oTDgv

[Video demo record]

https://drive.google.com/file/d/19Xlt8zIzzP4PNAK707DqIlEt-kHTqERW
Downloading 11NRYuyyi9p3cHlXEDkfBw2KH3R_oTDgv ...
Destination file assignment1/17021231_Đỗ Thành Đạt/data.zip exists, download aborted
Done.
--------------------------------------------

Done.
--------------------------------------------------------
17021089_Trần Đức Trung 2 Tháng tư 2020  9:49 AM Mã nguồn:

https://github.com/niits/INT3411

Dữ liệu:

https://drive.google.com/file/d/1nt9XYeYhZFdeyLhMSVyL4wgmU1KY2zbd/view?usp=sharing
Downloading 1nt9XYeYhZFdeyLhMSVyL4wgmU1KY2zbd ...
Destination file assignment1/17021089_Trần Đức Trung/data.zip exists, download aborted
Done.
--------------------------------------------------------
17021089_Trần Đức Trung 1 Tháng tư 2020  8:45 AM Mã nguồn:

https://github.com/niits/INT3411

Dữ liệu:

https://drive.google.com/file/d/1PQQwl1n7xAcLLtyxCIbnz4MekJtnJDJN/view?usp=sharing
Downloading 1PQQwl1n7xAcLLtyxCIbnz4MekJtnJDJN ...
Destination file assignment1/17021089_Trần Đức Trung/data.zip exists, download aborted
Done.
--------------------------------------------------------
17021089_Trần Đức Trung 1 Tháng tư 2020  5:34 PM Mã nguồn:

https://github.com/niits/INT3411

Dữ liệu:

https://drive.google.com/file/d/1xIzz42t54m6y8U7gSebfc343CB

Downloading 1OL2ZF6S7Wcv5fPxgSij04fOzL7ap-gLk ...
Done.
--------------------------------------------------------
17021195_Nguyễn Văn Linh 25 Tháng ba 2020  9:46 AM https://github.com/nguyenlinh-uet/voice-project
Cannot find drive file ID, check response
--------------------------------------------------------
17021195_Nguyễn Văn Linh 25 Tháng ba 2020  8:34 PM https://github.com/nguyenlinh-uet/voice-project
Cannot find drive file ID, check response
--------------------------------------------------------
17021195_Nguyễn Văn Linh - -
Cannot find drive file ID, check response
--------------------------------------------------------
16021399_Đỗ Huy Linh 31 Tháng ba 2020  1:02 AM https://github.com/Hantor28/Voice-Processing

https://drive.google.com/file/d/1x8ke32-pYAn6_l2KddfPJq3joSFe062I/view?usp=sharing
Downloading 1x8ke32-pYAn6_l2KddfPJq3joSFe062I ...
