# Update database with manually uploaded clips
Script to populate the koster lab database with information of those clips that have been manually uploaded.

## Download subjects information from Zooniverse
We first download information from those subjects manually uploaded to the Koster lab project (#9747) using the [Python SDK for Panoptes!](https://github.com/zooniverse/panoptes-python-client).
Note, only Zooniverse project collaborators can retrieve subjects information from the Koster lab Zooniverse project.

### Import required packages

In [None]:
import os
import time
from datetime import datetime
from panoptes_client import Project, Panoptes

### Specify project-specific info

In [None]:
# Connect to Zooniverse with your username and password
Panoptes.connect(username='', password='')

# Specify the project number of the koster lab
project = Project(9747)

# Specify the last date when subjects were manually uploaded
last_date = '2020-01-10 00:00:00 UTC'

# Specify the location to write the csv files                
dstn_subj = '../all_subjects.csv'
out_location_subj = '../manual_uploaded_subjects.csv'

### Define the functions to download the subject information

In [None]:
def download_file(url, dstn):
    request = requests.get(url, stream=True)
    with open(dstn, 'wb') as dstn_f:
        for chunk in request.iter_content(chunk_size=4096):
            dstn_f.write(chunk)
    return dstn


def download_exports(projt, dstn_cl, dstn_sb):
   
    try:
        meta_subj = projt.describe_export('subjects')
        generated = meta_subj['media'][0]['updated_at'][0:19]
        tdelta = (datetime.now() - datetime.strptime(generated, '%Y-%m-%dT%H:%M:%S')).total_seconds()
        age = (300 + int(tdelta / 60))
        print(str(datetime.now())[0:19] + '  Subject export', age, ' hours old')
        url_subj = meta_subj['media'][0]['src']
        file_subj = download_file(url_subj, dstn_sb)
        print(str(datetime.now())[0:19] + '  ' + file_subj + ' downloaded')
    except:
        print(str(datetime.now())[0:19] + '  Subjects download did not complete')
        return False
    return True


def include_subj(subj_record):
    #  define a function that returns True or False based on whether the argument record is to be included or not in
    #  the output file based on the conditional clauses.
    if last_date >= subj_record['created_at'] >= '2000-00-10 00:00:00 UTC':
        pass  # replace earliest and latest created_at date and times to select records commenced in a
        #  specific time period
    else:
        return False
    # otherwise :
    return True


def slice_exports(dstn_cl, out_location_cl, dstn_sb, out_location_sb):
    k = 0
    m = 0
    with open(out_location_sb, 'w', newline='') as file:
        fieldnames = ['subject_id',
                      'project_id',
                      'workflow_id',
                      'subject_set_id',
                      'metadata',
                      'created_at']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

        #  open the zooniverse data file using dictreader
        with open(dstn_sb) as f:
            r = csv.DictReader(f)
            for row in r:
                k += 1
                if include_subj(row):
                    m += 1
                    # This set up the writer to match the field names above and the variable names of their values:
                    writer.writerow({'subject_id': row['subject_id'],
                                     'project_id': row['project_id'],
                                     'workflow_id': row['workflow_id'],
                                     'subject_set_id': row['subject_set_id'],
                                     'metadata': row['metadata'],
                                     'created_at': row['created_at']})

    print(str(datetime.now())[0:19] + '  Subjects file:' +
          ' ' + str(k) + ' lines read and inspected' + ' ' + str(m) + ' records selected and copied')
    return True


### Download the subject information

In [None]:
if __name__ == '__main__':
    print(download_exports(project, dstn_subj))
    print(slice_exports(dstn_subj, out_location_subj))

## Populate the koster lab database