Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackdna/update file storage #3269

Merged
merged 6 commits into from
Mar 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ codekit-config.json
/refinery/ui/cypress/screenshots/
/refinery/ui/cypress/videos/
/import/
/media/
/static/
/transfer/
*.conflict
Expand Down
24 changes: 22 additions & 2 deletions deployment/puppet/refinery/manifests/django.pp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
$deployment_platform = $refinery::params::deployment_platform,
$app_user = $refinery::params::app_user,
$app_group = $refinery::params::app_group,
$media_root = $refinery::params::media_root,
$project_root = $refinery::params::project_root,
$django_root = $refinery::params::django_root,
$django_settings_module = $refinery::params::django_settings_module,
Expand All @@ -10,15 +11,34 @@
$site_url = $refinery::params::site_url,
$virtualenv = $refinery::params::virtualenv,
) inherits refinery::params {
file { [$media_root, $file_store_root]:
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
require => $deployment_platform ? {
'aws' => Mount[$data_dir],
default => File[$data_dir],
},
}

if $deployment_platform == 'vagrant' {
file { "${project_root}/import":
file { $import_dir:
ensure => directory,
owner => $app_user,
group => $app_group,
}

exec { 'activate_guest_user':
command => "${virtualenv}/bin/python ${django_root}/manage.py activate_user guest",
environment => ["DJANGO_SETTINGS_MODULE=${django_settings_module}"],
user => $app_user,
group => $app_group,
require => Exec['create_guest_user'],
}
}

file_line { "django_settings_module":
file_line { 'django_settings_module':
path => "/home/${app_user}/.profile",
line => "export DJANGO_SETTINGS_MODULE=${django_settings_module}",
}
Expand Down
90 changes: 20 additions & 70 deletions deployment/puppet/refinery/manifests/init.pp
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
class refinery (
$deployment_platform = $refinery::params::deployment_platform,
$app_user = $refinery::params::app_user,
$app_group = $refinery::params::app_group,
$project_root = $refinery::params::project_root,
$django_root = $refinery::params::django_root,
$django_settings_module = $refinery::params::django_settings_module,
$virtualenv = $refinery::params::virtualenv,
$solr_data_set_manager_data = $refinery::params::solr_data_set_manager_data,
$solr_core_data = $refinery::params::solr_core_data,
$docker_host = $refinery::params::docker_host,
$app_user = $refinery::params::app_user,
$app_group = $refinery::params::app_group,
$project_root = $refinery::params::project_root,
$django_root = $refinery::params::django_root,
$django_settings_module = $refinery::params::django_settings_module,
$virtualenv = $refinery::params::virtualenv,
$docker_host = $refinery::params::docker_host,
) inherits refinery::params {
sysctl { 'vm.swappiness': value => '10' } # for better performance

class { 'timezone': # to make logs easier to read
timezone => 'America/New_York',
}

if $deployment_platform == 'vagrant' {
exec { 'activate_guest_user':
command => "${virtualenv}/bin/python ${django_root}/manage.py activate_user guest",
environment => ["DJANGO_SETTINGS_MODULE=${django_settings_module}"],
user => $app_user,
group => $app_group,
require => Exec['create_guest_user'],
}
}

user { $app_user: ensure => present, }

file { "/home/${app_user}/.ssh/config":
Expand All @@ -35,10 +22,6 @@
group => $app_group,
}

file { "/opt":
ensure => directory,
}

# workaround for CloudFront error 523 Origin Unreachable for https://www.rabbitmq.com/rabbitmq-release-signing-key.asc
class { '::rabbitmq':
package_gpg_key => 'https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc',
Expand All @@ -50,63 +33,30 @@
ensure => running,
}

file { $data_dir:
ensure => directory,
}

if $deployment_platform == 'aws' {
# Ensure formatted filesystem
# https://forge.puppetlabs.com/puppetlabs/lvm
# http://docs.puppetlabs.com/puppet/4.3/reference/types/mount.html
$fstype = 'ext3'
# configure an EBS volume to store indexing data
$file_system_type = 'ext3'
# This is the block device for the external data.
# It must match the attachment point for the EC2 EBS volume.
$block_device = '/dev/xvdr'

# https://forge.puppetlabs.com/puppetlabs/lvm
filesystem { $block_device:
ensure => present,
fs_type => $fstype,
fs_type => $file_system_type,
before => File[$data_dir],
}
->
file { '/data':
ensure => directory,
}
->
mount { 'data_volume':
name => '/data',

mount { $data_dir:
ensure => mounted,
device => $block_device,
fstype => $fstype,
fstype => $file_system_type,
options => 'defaults',
}

file { '/data/media':
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
require => Mount['data_volume'],
}

file { '/data/solr':
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
before => Exec['solr_install'],
require => Mount['data_volume'],
}

file { $solr_data_set_manager_data:
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
require => Mount['data_volume'],
}

file { $solr_core_data:
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
require => Mount['data_volume'],
require => File[$data_dir],
}
}

Expand Down
21 changes: 15 additions & 6 deletions deployment/puppet/refinery/manifests/params.pp
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,30 @@

$data_dir = $deployment_platform ? {
'aws' => '/data',
default => undef,
default => "${project_root}",
}

$import_dir = $deployment_platform ? {
'aws' => undef,
default => "${data_dir}/import",
}

$media_root = $deployment_platform ? {
'aws' => "${data_dir}/media",
default => "${project_root}/media",
$media_root = "${data_dir}/media"

$file_store_root = "${media_root}/file_store"

$solr_data_dir = $deployment_platform ? {
'aws' => "${data_dir}/solr",
default => undef,
}

$solr_data_set_manager_data = $deployment_platform ? {
'aws' => "${data_dir}/solr/data_set_manager",
'aws' => "${solr_data_dir}/data_set_manager",
default => undef,
}

$solr_core_data = $deployment_platform ? {
'aws' => "${data_dir}/solr/core",
'aws' => "${solr_data_dir}/core",
default => undef,
}

Expand Down
33 changes: 26 additions & 7 deletions deployment/puppet/refinery/manifests/solr.pp
Original file line number Diff line number Diff line change
@@ -1,18 +1,37 @@
class refinery::solr (
$deployment_platform = $refinery::params::deployment_platform,
$app_user = $refinery::params::app_user,
$django_root = $refinery::params::django_root,
$solr_lib_dir = $refinery::params::solr_lib_dir,
$deployment_platform = $refinery::params::deployment_platform,
$app_user = $refinery::params::app_user,
$django_root = $refinery::params::django_root,
$solr_data_dir = $refinery::params::solr_data_dir,
$solr_core_data = $refinery::params::solr_core_data,
$solr_data_set_manager_data = $refinery::params::solr_data_set_manager_data,
$solr_lib_dir = $refinery::params::solr_lib_dir,
$data_dir = $refinery::params::data_dir
) inherits refinery::params {
$solr_version = '5.3.1'
$solr_archive = "solr-${solr_version}.tgz"
$solr_version = '5.3.1'
$solr_archive = "solr-${solr_version}.tgz"
$download_path = "/tmp/${solr_archive}"
$solr_url = "http://archive.apache.org/dist/lucene/solr/${solr_version}/${solr_archive}"
$solr_url = "http://archive.apache.org/dist/lucene/solr/${solr_version}/${solr_archive}"

package { 'java':
name => 'openjdk-7-jdk',
}

file { '/opt':
ensure => directory,
}

if $deployment_platform == 'aws' {
file { [ $solr_data_dir, $solr_core_data, $solr_data_set_manager_data ]:
ensure => directory,
owner => $app_user,
group => $app_group,
mode => '0755',
before => Exec['solr_install'],
require => Mount[$data_dir],
}
}

archive { 'solr_download':
path => "/tmp/${solr_archive}",
source => "${solr_url}",
Expand Down
4 changes: 2 additions & 2 deletions deployment/terraform/live/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,12 @@ variable "refinery_user_files_columns" {

variable "data_volume_size" {
description = "Size of the EBS data volume in GB"
default = 500
default = 1
}

variable "data_volume_type" {
description = "Type of the EBS data volume"
default = "st1"
default = "gp2"
}

variable "data_volume_snapshot_id" {
Expand Down
4 changes: 0 additions & 4 deletions media/.gitignore

This file was deleted.

2 changes: 2 additions & 0 deletions refinery/config/config.json.erb
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
"REFINERY_DATA_IMPORT_DIR": "<%= @import_dir || "/import" %>",
"REFINERY_DOCKER_HOST": "<%= @docker_host || "" %>",
"REFINERY_FILE_SOURCE_MAP": {},
"REFINERY_FILE_STORE_ROOT": "<%= @file_store_root %>",
"REFINERY_FILE_STORE_URL": "/media/file_store/",
"REFINERY_GALAXY_ANALYSIS_CLEANUP": "on_success",
"REFINERY_GOOGLE_ANALYTICS_ID": "<%= @refinery_google_analytics_id || "" %>",
"REFINERY_GOOGLE_RECAPTCHA_SITE_KEY": "<%= @refinery_google_recaptcha_site_key || "6LeIxAcTAAAAAJcZVRqyHh71UMIEGNQ_MXjiZKhI" %>",
Expand Down
13 changes: 4 additions & 9 deletions refinery/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import os
import subprocess
import urlparse

from django.core.exceptions import ImproperlyConfigured
from django.core.files.storage import FileSystemStorage
Expand Down Expand Up @@ -88,8 +87,6 @@ def get_setting(name, settings=local_settings, default=None):
# Absolute filesystem path to the directory that will hold user-uploaded files.
# Example: "/home/media/media.lawrence.com/media/"
MEDIA_ROOT = get_setting("MEDIA_ROOT")
if not os.path.isabs(MEDIA_ROOT):
MEDIA_ROOT = os.path.join(BASE_DIR, "media")

# URL that handles the media served from MEDIA_ROOT. Make sure to use a
# trailing slash.
Expand Down Expand Up @@ -354,12 +351,11 @@ def get_setting(name, settings=local_settings, default=None):
# DO NOT CHANGE THIS after initialization of your Refinery instance
REFINERY_PUBLIC_GROUP_ID = 100

# relative to MEDIA_ROOT
FILE_STORE_DIR = get_setting('FILE_STORE_DIR', default='file_store')
# absolute path to the file store root dir
FILE_STORE_BASE_DIR = os.path.join(MEDIA_ROOT, FILE_STORE_DIR)
REFINERY_FILE_STORE_ROOT = get_setting('REFINERY_FILE_STORE_ROOT')
# for SymlinkedFileSystemStorage (http://stackoverflow.com/q/4832626)
FILE_STORE_BASE_URL = urlparse.urljoin(MEDIA_URL, FILE_STORE_DIR) + '/'
REFINERY_FILE_STORE_URL = get_setting('REFINERY_FILE_STORE_URL')

# always keep uploaded files on disk
FILE_UPLOAD_MAX_MEMORY_SIZE = get_setting('FILE_UPLOAD_MAX_MEMORY_SIZE',
default=0)
Expand All @@ -369,8 +365,7 @@ def get_setting(name, settings=local_settings, default=None):
# format: {'pattern': 'replacement'} - may contain more than one key-value pair
REFINERY_FILE_SOURCE_MAP = get_setting("REFINERY_FILE_SOURCE_MAP")

# data file import directory; it should be located on the same partition as
# FILE_STORE_DIR and MEDIA_ROOT to make import operations fast
# data file import directory
REFINERY_DATA_IMPORT_DIR = get_setting("REFINERY_DATA_IMPORT_DIR")

# location of the Solr server (must be accessible from the web browser)
Expand Down
12 changes: 2 additions & 10 deletions refinery/file_store/models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
"""
* Manages all data files
* Downloads files from external repositories (by URL)
* Downloads files from external sources (by URL)

Requirements:

FILE_STORE_DIR setting - main file store directory
* must be a subdirectory of MEDIA_ROOT
* must be writeable by the Django server
"""

import logging
Expand All @@ -25,13 +20,10 @@
import constants
import core
from .utils import (S3MediaStorage, SymlinkedFileSystemStorage, copy_s3_object,
delete_s3_object, make_dir, move_file)
delete_s3_object, move_file)

logger = logging.getLogger(__name__)

# create data storage directories
make_dir(settings.FILE_STORE_BASE_DIR)


def _map_source(source):
"""Convert URLs to file system paths by applying file source map"""
Expand Down
6 changes: 3 additions & 3 deletions refinery/file_store/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def run(self, item_uuid):

def import_path_to_path(self, source_path, symlink=True):
"""Import file from an absolute file system path into
FILE_STORE_BASE_DIR
REFINERY_FILE_STORE_ROOT
"""
storage = SymlinkedFileSystemStorage()
file_store_name = storage.get_name(os.path.basename(source_path))
Expand Down Expand Up @@ -148,7 +148,7 @@ def import_path_to_s3(self, source_path):
return file_store_name

def import_s3_to_path(self, source_url):
"""Import S3 object from s3:// URL into FILE_STORE_BASE_DIR"""
"""Import S3 object from s3:// URL into REFINERY_FILE_STORE_ROOT"""
source_bucket, source_key = parse_s3_url(source_url)
storage = SymlinkedFileSystemStorage()
file_store_name = storage.get_name(os.path.basename(source_key))
Expand Down Expand Up @@ -210,7 +210,7 @@ def import_s3_to_s3(self, source_url):
return file_store_name

def import_url_to_path(self, source_url):
"""Import file from URL into FILE_STORE_BASE_DIR"""
"""Import file from URL into REFINERY_FILE_STORE_ROOT"""
# move the file from temp dir into file store dir
storage = SymlinkedFileSystemStorage()
# remove query string from URL before extracting file name
Expand Down