Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 437 lines (410 sloc) 15.1 KB
#!/usr/bin/env bash
# Copyright (C) 2015-2018 Toshinori Sato (@overlast)
#
# https://github.com/neologd/mecab-ipadic-neologd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -u
BASEDIR=$(cd $(dirname $0);pwd)
PROGRAM_NAME=install-mecab-ipadic-NEologd
ECHO_PREFIX="[${PROGRAM_NAME}] :"
VERSION="install-mecab-ipadic-NEologd version 0.0.1"
usage() {
echo "Usage: $PROGRAM_NAME [OPTIONS]"
echo " This script is the installer of mecab-ipadic-NEologd"
echo
echo "Options:"
echo " -h, --help"
echo
echo " -v, --version"
echo
echo " -p, --prefix /PATH/TO/INSTALL/DIRECTORY"
echo " Set any directory path where you want to install"
echo
echo " -y, --forceyes"
echo " If you want to install regardless of the result of test"
echo
echo " -u, --asuser"
echo " If you want to install to the user directory as an user"
echo
echo " -n, --newest"
echo " If you want to update mecab-ipadic-NEologd"
echo
echo " -a, --install_all_seed_files"
echo " If you want to install all seed files(seed/neologd-*.xz)"
echo
echo " -c, --install_only_patched_ipadic"
echo " If you want to install only patched IPADIC"
echo
echo " --as_sudoer"
echo " If you want to install to an other user's directory as sudoer"
echo
echo " --create_user_dic"
echo " If you want to create an user dictionary using seed data of mecab-ipadic-NEologd"
echo
echo " --eliminate-redundant-entry"
echo " If you want to eliminate the redundant entries"
echo
echo " --extend-column URL_1,...,URL_N"
echo " If you want to use an arbitrary column extension repository"
echo " You should set CSV value of URL(s)"
echo
echo " --min_surface_length INT_VALUE"
echo " If you want to delete the entries whose length of surface is shorter than INT_VALUE"
echo
echo " --max_surface_length INT_VALUE"
echo " If you want to delete the entries whose length of surface is longer than INT_VALUE"
echo
echo " --min_baseform_length INT_VALUE"
echo " If you want to delete the entries whose length of base form is shorter than INT_VALUE"
echo
echo " --max_baseform_length INT_VALUE"
echo " If you want to delete the entries whose length of base form is longer than INT_VALUE"
echo
echo " --ignore_adverb"
echo " If you would not like to install entries of orthographic variant of an adverbial as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_interject"
echo " If you would not like to install entries of orthographic variant of an interjection as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_noun_ortho"
echo " If you would not like to install entries of orthographic variant of a common noun as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_noun_sahen_conn_ortho"
echo " If you would not like to install entries of orthographic variant of a noun used as verb form(Sahen-setsuzoku) as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_adjective_std"
echo " If you would not like to install entries of frequent orthographic variant of an adjective as seed data of mecab-ipadic-NEologd"
echo
echo " --install_adjective_exp"
echo " If you would like to install entries of infrequent orthographic variant of an adjective as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_adjective_verb"
echo " If you would not like to install entries of orthographic variant of an adjective verb as seed data of mecab-ipadic-NEologd"
echo
echo " --install_infreq_datetime"
echo " If you would like to install infrequent entries of datetime representation type of named entity as seed data of mecab-ipadic-NEologd"
echo
echo " --install_infreq_quantity"
echo " If you would like to install infrequent entries of quantity representation type of named entity as seed data of mecab-ipadic-NEologd"
echo
echo " --ignore_ill_formed_words"
echo " If you would not like to install entries of ill formed words as seed data of mecab-ipadic-NEologd"
echo
exit 1
}
INSTALL_PATH_PREFIX=""
IS_FORCE_YES=0
IS_AS_USER=0
IS_AS_SUDOER=0
WANNA_UPDATE=0
COLUMN_EXTENSIONS_URLS=""
MIN_SURFACE_LEN=0
MAX_SURFACE_LEN=0
MIN_BASEFORM_LEN=0
MAX_BASEFORM_LEN=0
WANNA_CREATE_USER_DIC=0
WANNA_ELIMINATE_REDUNDANT_ENTRY=0
WANNA_IGNORE_ADVERB=0
WANNA_IGNORE_INTERJECT=0
WANNA_IGNORE_NOUN_ORTHO=0
WANNA_IGNORE_NOUN_SAHEN_CONN_ORTHO=0
WANNA_IGNORE_ADJECTIVE_STD=0
WANNA_INSTALL_ADJECTIVE_EXP=0
WANNA_IGNORE_ADJECTIVE_VERB=0
WANNA_INSTALL_INFREQ_DATETIME=0
WANNA_INSTALL_INFREQ_QUANTITY=0
WANNA_IGNORE_ILL_FORMED_WORDS=0
WANNA_INSTALL_ONLY_PATCHED_IPADIC=0
WANNA_INSRALL_ALL_SEED_FILES=0
set +u
for OPT in "$@"
do
case "$OPT" in
'-n'|'--newest' )
WANNA_UPDATE=1
shift 1
;;
'-h'|'--help' )
usage
exit 1
;;
'-v'|'--version' )
echo $VERSION
exit 1
;;
'-p'|'--prefix' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
INSTALL_PATH_PREFIX="$2"
shift 2
;;
'-y'|'--forceyes' )
IS_FORCE_YES=1
shift 1
;;
'-u'|'--asuser' )
IS_AS_USER=1
shift 1
;;
'--as_sudoer' )
IS_AS_SUDOER=1
shift 1
;;
'-c'|'--install_only_patched_ipadic' )
WANNA_INSTALL_ONLY_PATCHED_IPADIC=1
shift 1
;;
'-a'|'--install_all_seed_files' )
WANNA_INSRALL_ALL_SEED_FILES=1
shift 1
;;
'--create_user_dic' )
WANNA_CREATE_USER_DIC=1
shift 1
;;
'--eliminate-redundant-entry' )
WANNA_ELIMINATE_REDUNDANT_ENTRY=1
shift 1
;;
'--extend-column' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
COLUMN_EXTENSIONS_URLS="$2"
shift 2
;;
'--ignore_noun_ortho' )
WANNA_IGNORE_NOUN_ORTHO=1
shift 1
;;
'--ignore_noun_sahen_conn_ortho' )
WANNA_IGNORE_NOUN_SAHEN_CONN_ORTHO=1
shift 1
;;
'--ignore_adverb' )
WANNA_IGNORE_ADVERB=1
shift 1
;;
'--ignore_interject' )
WANNA_IGNORE_INTERJECT=1
shift 1
;;
'--ignore_adjective_std' )
WANNA_IGNORE_ADJECTIVE_STD=1
shift 1
;;
'--install_adjective_exp' )
WANNA_INSTALL_ADJECTIVE_EXP=1
shift 1
;;
'--ignore_adjective_verb' )
WANNA_IGNORE_ADJECTIVE_VERB=1
shift 1
;;
'--install_infreq_datetime' )
WANNA_INSTALL_INFREQ_DATETIME=1
shift 1
;;
'--install_infreq_quantity' )
WANNA_INSTALL_INFREQ_QUANTITY=1
shift 1
;;
'--ignore_ill_formed_words' )
WANNA_IGNORE_ILL_FORMED_WORDS=1
shift 1
;;
'--min_surface_length' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
MIN_SURFACE_LEN="$2"
shift 2
;;
'--max_surface_length' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
MAX_SURFACE_LEN="$2"
shift 2
;;
'--min_baseform_length' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
MIN_BASEFORM_LEN="$2"
shift 2
;;
'--max_baseform_length' )
if [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then
echo "${PROGRAM_NAME}: option requires an argument -- $1" 1>&2
usage
exit 1
fi
MAX_BASEFORM_LEN="$2"
shift 2
;;
-*)
echo "${PROGRAM_NAME}: illegal option -- '$(echo $1 | sed 's/^-*//')'" 1>&2
usage
exit 1
;;
*)
if [[ ! -z "$1" ]] && [[ ! "$1" =~ ^-+ ]]; then
param=( ${param[@]} "$1" )
shift 1
fi
;;
esac
done
set -u
if [ ${WANNA_INSTALL_ONLY_PATCHED_IPADIC} -gt 0 -a ${WANNA_INSRALL_ALL_SEED_FILES} -gt 0 ]; then
echo "${ECHO_PREFIX} '-a option' and '-c option' can't be specified at the same time !!"
echo "${ECHO_PREFIX} Please check usage using '--help option'"
exit 1
fi
if [[ ${WANNA_INSTALL_ONLY_PATCHED_IPADIC} -gt 0 && -z ${INSTALL_PATH_PREFIX} ]]; then
echo "${ECHO_PREFIX} When you set '-c option', please set '-p option' as install directory path"
echo "${ECHO_PREFIX} Example:"
echo "${ECHO_PREFIX} Linux: -c -p /usr/local/lib/mecab/dic/ipadic_neologd_patched"
echo "${ECHO_PREFIX} macOS: -c -p /usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic_neologd_patched"
exit 1
fi
if [ ${MIN_SURFACE_LEN} -gt 0 -a ${MAX_SURFACE_LEN} -gt 0 ]; then
if [ ${MIN_SURFACE_LEN} -ge ${MAX_SURFACE_LEN} ]; then
echo "${ECHO_PREFIX} Can't set inconsistent values: MIN_SURFACE_LEN(${MIN_SURFACE_LEN}) >= MAX_SURFACE_LEN(${MAX_SURFACE_LEN})"
usage
exit 1
fi
fi
if [ ${MIN_BASEFORM_LEN} -gt 0 -a ${MAX_BASEFORM_LEN} -gt 0 ]; then
if [ ${MIN_BASEFORM_LEN} -ge ${MAX_BASEFORM_LEN} ]; then
echo "${ECHO_PREFIX} Can't set inconsistent values: MIN_BASEFORM_LEN(${MIN_BASEFORM_LEN}) >= MAX_BASEFORM_LEN(${MAX_BASEFORM_LEN})"
usage
exit 1
fi
fi
echo "$ECHO_PREFIX Start.."
echo "$ECHO_PREFIX Check the existance of libraries"
COMMANDS=(find sort head cut egrep mecab mecab-config make curl sed cat diff tar unxz xargs grep iconv patch which file openssl awk)
for COMMAND in ${COMMANDS[@]};do
if [ ! `which ${COMMAND}` ]; then
echo "$ECHO_PREFIX ${COMMAND} is not found."
exit 1
else
echo "$ECHO_PREFIX ${COMMAND} => ok"
fi
done
if [ ${WANNA_UPDATE} = 1 ]; then
STATUS_CODE=`curl --insecure -IL https://github.com -s -w '%{http_code}\n' -o /dev/null`
if [ ${STATUS_CODE} = 200 ]; then
IS_NETWORK_ONLINE=1
else
echo "$ECHO_PREFIX Unable to access https://github.com/"
echo "$ECHO_PREFIX Status code : ${STATUS_CODE}"
echo "$ECHO_PREFIX Update error, please retry after re-connecting to network"
echo "$ECHO_PREFIX Or you can try to install without updating option(-n)"
echo "$ECHO_PREFIX \$ ./bin/install-mecab-ipadic-neologd"
exit 1
fi
echo
cd ${BASEDIR}/../
if [ `git log refs/heads/master --pretty=%H | head -1` = `git ls-remote origin -h refs/heads/master |cut -f1` ]; then
echo "$ECHO_PREFIX mecab-ipadic-NEologd is already up-to-date"
else
if [ -e /proc ] && [ -e /proc/version ] && [ `grep -n Microsoft /proc/version | head -1 | cut -d $':' -f 1` -eq 1 ]; then
echo "$ECHO_PREFIX Please execute following commands to install newest version of mecab-ipadic-NEologd"
echo
echo " $ git fetch origin; git reset --hard origin/master"
echo
exit
else
echo "$ECHO_PREFIX Get the newest updated information using git"
git fetch origin
git reset --hard origin/master
fi
fi
else
echo "$ECHO_PREFIX You should execute the following command if you want to install newest version of mecab-ipadic-NEologd"
echo
echo " $ bin/install-mecab-ipadic-neologd -n"
echo
sleep 3
fi
if [ -z ${INSTALL_PATH_PREFIX} ]; then
INSTALL_PATH_PREFIX=`mecab-config --dicdir`"/mecab-ipadic-neologd"
fi
echo ""
echo "$ECHO_PREFIX mecab-ipadic-NEologd will be install to ${INSTALL_PATH_PREFIX}"
echo ""
echo "$ECHO_PREFIX Make mecab-ipadic-NEologd"
${BASEDIR}/../libexec/make-mecab-ipadic-neologd.sh -p ${INSTALL_PATH_PREFIX} -s ${MIN_SURFACE_LEN} -l ${MAX_SURFACE_LEN} -S ${MIN_BASEFORM_LEN} -L ${MAX_BASEFORM_LEN} -u ${WANNA_CREATE_USER_DIC} -B ${WANNA_IGNORE_ADVERB} -J ${WANNA_IGNORE_INTERJECT} -O ${WANNA_IGNORE_NOUN_ORTHO} -H ${WANNA_IGNORE_NOUN_SAHEN_CONN_ORTHO} -t ${WANNA_IGNORE_ADJECTIVE_STD} -T ${WANNA_INSTALL_ADJECTIVE_EXP} -j ${WANNA_IGNORE_ADJECTIVE_VERB} -E ${WANNA_ELIMINATE_REDUNDANT_ENTRY} -D ${WANNA_INSTALL_INFREQ_DATETIME} -Q ${WANNA_INSTALL_INFREQ_QUANTITY} -I ${WANNA_IGNORE_ILL_FORMED_WORDS} -c ${WANNA_INSTALL_ONLY_PATCHED_IPADIC} -a ${WANNA_INSRALL_ALL_SEED_FILES} -G ${COLUMN_EXTENSIONS_URLS}
if [ $? != 0 ]; then
echo ""
echo "$ECHO_PREFIX Failed to make mecab-ipadic-NEologd"
exit 1
fi
function wanna_install(){
while true;do
echo ""
echo "$ECHO_PREFIX Please check the list of differences in the upper part."
echo ""
echo "$ECHO_PREFIX Do you want to install mecab-ipadic-NEologd? Type yes or no."
read answer
case $answer in
yes)
return 0
;;
no)
echo "$ECHO_PREFIX Quit from installing process"
echo "$ECHO_PREFIX Finish.."
exit
;;
*)
echo -e "cannot understand $answer.\n"
;;
esac
done
}
if [ ${IS_FORCE_YES} -eq 0 ]; then
echo "$ECHO_PREFIX Get results of tokenize test"
${BASEDIR}/../libexec/test-mecab-ipadic-neologd.sh
if [ $? != 0 ]; then
echo ""
echo "$ECHO_PREFIX Failed to test mecab-ipadic-NEologd"
echo "$ECHO_PREFIX If you want to skip the test, you can use '-y' or '--forceyes' option"
usage
exit 1
fi
wanna_install
fi
echo "$ECHO_PREFIX OK. Let's install mecab-ipadic-NEologd."
${BASEDIR}/../libexec/install-mecab-ipadic-neologd.sh -p ${INSTALL_PATH_PREFIX} -u ${IS_AS_USER} -s ${IS_AS_SUDOER}
if [ $? != 0 ]; then
echo ""
echo "$ECHO_PREFIX Failed to install mecab-ipadic-NEologd"
exit 1
fi
echo "$ECHO_PREFIX Finish.."