In [None]:
# for file in *.deb; do   dpkg-deb -x "$file" /home/cdsw/inst; done

In [None]:
import pyodbc

In [None]:
cp /var/cache/apt/archives/*.deb /home/cdsw/ubuntu_packages/pandoc

In [None]:
#! /bin/bash
#
# Copyright 2019 Team KoNLPy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set mecab related variable(s)
mecab_dicdir="/usr/local/lib/mecab/dic/mecab-ko-dic"

# Exit as soon as we fail
set -e

# Determine OS
os=$(uname)
if [[ ! $os == "Linux" ]] && [[ ! $os == "Darwin" ]]; then
    echo "This script does not support this OS."
    echo "Try consulting https://github.com/konlpy/konlpy/blob/master/scripts/mecab.sh"
    exit 0
fi

# Determine sudo
if hash "sudo" &>/dev/null; then
    sudo="sudo"
else
    sudo=""
fi

# Determine python
# TODO: Prefer python3 and Respect pyenv
python="python3"
if hash "pyenv" &>/dev/null; then
    python="python"
fi

# Determine python site location are writable.
check_python_site_location_is_writable(){
    $python - <<EOF
import site, os
found = False
for dir in site.getsitepackages():
    if not os.path.isdir(dir):
        continue
    if os.access(dir, os.W_OK | os.X_OK):
        found = True
        break
print(1 if found else 0)
EOF
}
at_user_site=""
if [[ "$(check_python_site_location_is_writable)" == "0" ]]; then
    at_user_site="--user"
fi

install_mecab_ko(){
    cd /tmp
    curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
    tar zxfv mecab-0.996-ko-0.9.2.tar.gz
    cd mecab-0.996-ko-0.9.2
    ./configure
    make
    make check
    $sudo make install
}

install_automake(){
    ## install requirement automake1.11
    # TODO: if not [automake --version]
    if [ "$os" == "Linux" ]; then
        if [ "$(grep -Ei 'debian|buntu|mint' /etc/*release)" ]; then
            $sudo apt-get update && $sudo apt-get install -y automake
        elif [ "$(grep -Ei 'fedora|redhat' /etc/*release)" ]; then
            $sudo yum install -y automake diffutils make
        else
            ##
            # Autoconf
            #
            # stage directory
            builddir=`mktemp -d` && cd $builddir

            # download and extract source
            curl -LO http://ftpmirror.gnu.org/autoconf/autoconf-latest.tar.gz
            tar -zxvf autoconf-latest.tar.gz
            rm autoconf-latest.tar.gz

            # configure, make, install --prefix=/usr/local
            cd autoconf*
            ./configure
            make
            $sudo make install

            # erase stage dir
            rm -rf $builddir


            ##
            # Automake
            #
            # stage directory
            builddir=`mktemp -d` && cd $builddir

            # download and extract source
            curl -LO http://ftpmirror.gnu.org/automake/automake-1.11.tar.gz
            tar -zxvf automake-1.11.tar.gz

            # configure, make, install --prefix=/usr/local
            cd automake-1.11
            ./configure
            make
            $sudo make install

            # erase stage dir
            rm -rf $builddir
        fi

    elif [ "$os" == "Darwin" ]; then
        if [[ $(command -v brew) == "" ]]; then
            echo "This script require Homebrew!"
            echo "Try https://brew.sh/"
            exit 0
        fi
        brew install automake
    fi
}

install_mecab_ko_dic(){
    echo "Install mecab-ko-dic"
    cd /tmp
    curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
    tar -zxvf mecab-ko-dic-2.1.1-20180720.tar.gz
    cd mecab-ko-dic-2.1.1-20180720
    ./autogen.sh
    ./configure
    if [[ $os == "Linux" ]]; then
        mecab-config --libs-only-L | $sudo tee /etc/ld.so.conf.d/mecab.conf  # XXX: Resolve #271, #182, #133
        $sudo ldconfig
    fi
    make
    $sudo sh -c 'echo "dicdir=/usr/local/lib/mecab/dic/mecab-ko-dic" > /usr/local/etc/mecabrc'
    $sudo make install
}

install_mecab_python(){
    pushd /tmp
    if [[ ! -d "mecab-python-0.996" ]]; then
        git clone https://bitbucket.org/eunjeon/mecab-python-0.996.git
    fi
    popd
    if [[ "$os" == "Darwin" ]]; then
        CFLAGS=-stdlib=libc++ $python -m pip install $at_user_site /tmp/mecab-python-0.996
    else
        # the gcc compiler has no such commandline option as -stdilb, so let's not use it. See discussion on #391.
        $python -m pip install $at_user_site /tmp/mecab-python-0.996
    fi
}


if ! hash "automake" &>/dev/null; then
    echo "Installing automake (A dependency for mecab-ko)"
    install_automake
fi

if hash "mecab" &>/dev/null; then
    echo "mecab-ko is already installed"
else
    echo "Install mecab-ko"
    install_mecab_ko
fi

if [[ -d $mecab_dicdir ]]; then
    echo "mecab-ko-dic is already installed"
else
    echo "Install mecab-ko-dic"
    install_mecab_ko_dic
fi

if [[ $($python -c 'import pkgutil; print(1 if pkgutil.find_loader("MeCab") else 0)') == "1" ]]; then
    echo "mecab-python is already installed"
else
    echo "Install mecab-python"
    install_mecab_python
fi

echo "Done."

In [None]:
apt-get install -y --no-install-recommends --download-only tree
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/tree

# Powerline

In [None]:
apt-cache depends pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic | grep "Depends" | awk '{print $2}' | xargs apt-get download

In [None]:
apt-get install -y --no-install-recommends --download-only powerline
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/powerline

In [None]:
apt-get  --print-uris --yes --no-install-recommends install \
    pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic

In [None]:
apt-get install -y --no-install-recommends --download-only vim
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/vim

In [None]:
apt-get install -y --no-install-recommends --download-only bash-completion
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/bash-completion

In [None]:
apt-get install -y --no-install-recommends --download-only python-dev
cp /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/python-dev

In [None]:
apt-get install -y --no-install-recommends --download-only python3-dev
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/python3-dev

In [None]:
apt-get install -y --no-install-recommends --download-only pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic
mv /var/cache/apt/archives/*.deb /home/cdsw/packages/ubuntu_packages/common/pandoc

apt-get install -y --no-install-recommends --download-only pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic
cp /var/cache/apt/archives/*.deb /home/cdsw/ubuntu_packages/pandoc

apt-get install texlive-xetex texlive-fonts-recommended texlive-plain-generic

apt-get install -y --no-install-recommends pandoc texlive-xetex texlive-fonts-recommended texlive-plain-generic

import pyodbc

cd /var/cache/apt/archives/

apt-get updatemM
apt-get install unixodbc-dev
apt-get install -y --no-install-recommends --download-only unixodbc-dev
cp /var/cache/apt/archives/*.deb /home/cdsw/ubuntu_packages
dpkg -i *.deb

! pip install catboost

# Basic

In [None]:
import boto3
import dateutil.relativedelta
import matplotlib
import matplotlib.font_manager
import matplotlib.pyplot
import numpy
import pandas
import requests
import six
import urllib3
import yaml
from bs4 import BeautifulSoup
from IPython import get_ipython
# from IPython.core.display import display => from IPython.display import display
from IPython.display import display
"""
/tmp/ipykernel_70/812137715.py:2: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display
  from IPython.core.display import display
"""  
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import IFrame
from numpy import ndarray
from pandas import DataFrame, Series
from pandas.compat.pickle_compat import _class_locations_map
from pandas.tseries.offsets import MonthEnd
from PIL import Image
from pytz import timezone, utc
from requests import get
from requests.packages.urllib3.exceptions import InsecureRequestWarning


In [None]:
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.classification import GBTClassifier, GBTClassificationModel, RandomForestClassifier, RandomForestClassificationModel
from pyspark.ml.feature import MinMaxScaler, OneHotEncoder, StringIndexer, StringIndexerModel, Tokenizer, VectorAssembler
from pyspark.ml.fpm import FPGrowth, FPGrowthModel
from pyspark.sql import HiveContext, SparkSession, DataFrame, types, Window, functions

# 2. User Install

## 2-0. visualization

In [None]:
! pip install seaborn

import seaborn

## 2-1. Spark

In [None]:
import pyarrow
import pyarrow.compute
import pyarrow.parquet

## 2-2. Tensorflow

In [None]:
! pip install tensorflow[and-cuda]==2.17.1

In [None]:
import tensorflow
# from tensorflow.contrib.nccl.python.ops import nccl_ops
from tensorflow.python.ops import nccl_ops
"""No module named 'tensorflow.contrib'
=> from tensorflow.python.ops import nccl_ops
"""
from tensorflow.keras import Model, Sequential, layers

## 2-3. Torch

In [1]:
! pip download torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torch==2.6.0
  Downloading https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.21.0
  Downloading https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio==2.6.0
  Downloading https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl.metadata (6.6 kB)
Collecting filelock (from torch==2.6.0)
  Downloading https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)
Collecting networkx (from torch==2.6.0)
  Downloading https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0)
  Downloading https://download.pytorch.org/whl/cu124/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)
[2K     [90m━━━━━━

! pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu118

In [5]:
import torch
import torch.backends.cudnn
import torch.nn
import torch.utils.data
import torchvision.transforms

from torch import BoolTensor, Tensor, nn
from torch.autograd import Variable
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d, Parameter
from torch.nn import Embedding, Linear, ModuleList, ReLU, Sequential, functional
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import Dataset
from torchvision import models
# from torchvision.models.vgg import model_urls
"""cannot import name 'model_urls' from 'torchvision.models.vgg' (/usr/local/lib/python3.11/site-packages/torchvision/models/vgg.py)
=> 
from torchvision.models.resnet import ResNet50_Weights

checkpoint = load_state_dict_from_url(ResNet50_Weights.IMAGENET1K_V2.url)
"""

"cannot import name 'model_urls' from 'torchvision.models.vgg' (/usr/local/lib/python3.11/site-packages/torchvision/models/vgg.py)\n=> \nfrom torchvision.models.resnet import ResNet50_Weights\n\ncheckpoint = load_state_dict_from_url(ResNet50_Weights.IMAGENET1K_V2.url)\n"

## 2-4. scikit-learn or Etc

! pip install catboost lightgbm xgboost scikit-learn lightfm tslearn
! pip install dask[dataframe]

In [6]:
! pip download --no-deps catboost
! pip download --no-deps lightgbm
! pip download --no-deps xgboost
! pip download --no-deps scikit-learn
! pip download --no-deps skope-rules
! pip download --no-deps lightfm
! pip download --no-deps tslearn

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8
[0mCollecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.6.0
[0mCollecting xgboost
  Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━

In [16]:
! pip install --no-deps threadpoolctl
! pip install --no-deps numba

[0mCollecting numba
  Downloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)
Downloading numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: numba
Successfully installed numba-0.61.2
[0m

In [17]:
import catboost
from catboost import CatBoostClassifier

import lightgbm
from lightgbm import LGBMClassifier, plot_importance

import xgboost
from xgboost import XGBClassifier

from sklearn import cluster
from sklearn import datasets, linear_model, preprocessing
from sklearn.base import clone
from sklearn.cluster import DBSCAN, KMeans
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, RandomForestRegressor
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, auc, roc_curve
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_recall_curve, make_scorer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, classification_report
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, ShuffleSplit, train_test_split, cross_val_predict
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, label_binarize, StandardScaler
from sklearn.utils import gen_batches

import collections.abc
import six
import sklearn
collections.Iterable = collections.abc.Iterable
sklearn.externals.six = six
from skrules import SkopeRules
"""cannot import name 'Iterable' from 'collections' (/usr/local/lib/python3.11/collections/__init__.py)
=>
import collections.abc
import six
import sklearn
collections.Iterable = collections.abc.Iterable
sklearn.externals.six = six
from skrules import SkopeRules
"""

from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.data import Dataset
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k

from tslearn.clustering import KShape, TimeSeriesKMeans
from tslearn.generators import random_walks
from tslearn.metrics import cdist_dtw
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.utils import to_time_series, to_time_series_dataset

ModuleNotFoundError: No module named 'llvmlite'

In [None]:
from fairseq import models, utils, hub_utils
from fairseq.criterions import FairseqCriterion, register_criterion
from fairseq.data import Dictionary
from fairseq.data.encoders import register_bpe
from fairseq.data.encoders.gpt2_bpe import get_encoder
from fairseq.models import FairseqEncoder
from fairseq.models.roberta import RobertaClassificationHead, RobertaLMHead, RobertaHubInterface, RobertaModel
from fairseq.models.roberta.hub_interface import RobertaHubInterface
from fairseq.models.transformer import TransformerModel
from fairseq.modules import TransformerSentenceEncoder
from fairseq.modules.transformer_sentence_encoder import init_bert_params
from fairseq.tasks import FairseqTask, register_task
from fairseq.tasks.audio_pretraining import AudioPretrainingTask

"""
ValueError: mutable default <class 'fairseq.dataclass.configs.CommonConfig'> for field common is not allowed: use default_factory
"""

In [9]:
! pip install --no-deps scipy

Collecting scipy
  Downloading scipy-1.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)
Downloading scipy-1.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.3/35.3 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: scipy
Successfully installed scipy-1.16.0
[0m

In [10]:
import scipy
import scipy.stats

from scipy.cluster.hierarchy import linkage, fcluster, dendrogram, to_tree, cut_tree
from scipy.signal import lfilter
from scipy.sparse import coo_matrix, hstack, vstack
# from scipy.spatial import distance, rel_entr => (underline)
from scipy.spatial import distance
from scipy.special import rel_entr
"""
cannot import name 'rel_entr' from 'scipy.spatial' (/usr/local/lib/python3.11/site-packages/scipy/spatial/__init__.py)

"""

from scipy.stats import skew, zmap, zscore
from scipy import sparse, spatial, stats

In [None]:
! pip install --no-deps statsmodels


In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa import arima_model
from statsmodels.tsa.stattools import acf, pacf, adfuller

### Computer Vision

#### Image

In [None]:
! pip install --no-deps scikit-image

In [None]:
from skimage import io

In [None]:
! pip install --no-deps opencv-python

In [None]:
import cv2

#### Speech

In [None]:
! pip install --no-deps soundfile
! pip install --no-deps epitran
! pip install --no-deps librosa
! pip install --no-deps pydub
! pip install --no-deps panphon 

In [None]:
import soundfile

import epitran

import librosa
import librosa.effects
import librosa.feature

from pydub import AudioSegment

In [None]:
! pip install --no-deps wav2letter

from wav2letter.criterion import CpuViterbiPath, get_data_ptr_as_bytes
from wav2letter.decoder import CriterionType

"""
ERROR: Could not find a version that satisfies the requirement wav2letter (from versions: none)
ERROR: No matching distribution found for wav2letter
"""

### NLP

In [63]:
! pip install --no-deps g2p_en
! pip install --no-deps nltk
! pip install --no-deps g2pk
! pip install --no-deps kss
! pip install --no-deps konlpy
! pip install --no-deps ko_pron
! pip install --no-deps koparadigm
! pip install --no-deps fasttext
! pip install --no-deps word2word
! pip install --no-deps sentence_transformers

[0mLooking in indexes: gnore-installed
[0m

In [69]:
# english
from g2p_en import G2p

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize

# korea
from g2pk import G2p

from konlpy.tag import Mecab

from kss import split_sentences

# all
import fasttext

from word2word import Word2word

# tokenizers
from tokenizers.models import BPE, Unigram
from tokenizers.normalizers import NFKC
from tokenizers import Tokenizer, decoders, pre_tokenizers
from tokenizers.implementations import BaseTokenizer

# transformers
from transformers import AdamW, AlbertForSequenceClassification, AlbertModel, TFAlbertModel, BertConfig, BertModel, BertJapaneseTokenizer, BertTokenizer, InputExample, InputFeatures
from transformers import pipeline, AutoModelForSequenceClassification, PreTrainedTokenizer, RobertaModel, RobertaTokenizer
from transformers.optimization import get_linear_schedule_with_warmup
from sentence_transformers import SentenceTransformer, util

ModuleNotFoundError: No module named 'tossi'

In [22]:
! pip install transformers
! pip install tokenizers


Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.33.4-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting tqdm>=4.27 (from transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting hf-xet<2.0.0,>=1.1.2 (from huggingface-hub<1.0,>=0.30.0->transformers)
  Downloading hf_xet-1.1.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (879 bytes)
Downloading huggingface_hub-0.33.4-py3-none-any.whl (515 kB)
Downloading hf_xet-1.1.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m

In [28]:
# transformers
# from transformers import AdamW, AlbertForSequenceClassification, AlbertModel, TFAlbertModel, BertConfig, BertModel, BertJapaneseTokenizer, BertTokenizer, InputExample, InputFeatures
from transformers import AlbertForSequenceClassification, AlbertModel, TFAlbertModel, BertConfig, BertModel, BertJapaneseTokenizer, BertTokenizer, InputExample, InputFeatures
from torch.optim import AdamW
"""cannot import name 'AdamW' from 'transformers' 
=> from torch.optim import AdamW
"""


from transformers import pipeline, AutoModelForSequenceClassification, PreTrainedTokenizer, RobertaModel, RobertaTokenizer
from transformers.optimization import get_linear_schedule_with_warmup
from sentence_transformers import SentenceTransformer, util

In [56]:
! pip install --no-deps jamo
! pip install --no-deps kollocate
! pip install --no-deps jieba
! pip install --no-deps fugashi
! pip install --no-deps ipadic
! pip install --no-deps romkan
! pip install --no-deps g2pM
! pip install --no-deps mecab
! pip install --no-deps sentencepiece
! pip install --no-deps sacremoses
! pip install --no-deps python-crfsuite

[0m

In [57]:
# korea
from jamo import h2j, j2hcj, j2h
from ko_pron import romanise
from kollocate import Kollocate
# from koparadigm import Paradigm
"""XLRDError: Excel xlsx file; not supported

"""

# china
import jieba
import jieba.posseg

# japan
import fugashi
import ipadic
import romkan

# all
from g2pM import G2pM

import mecab

# tokenizers
import sentencepiece

from sacremoses import MosesDetokenizer, MosesTokenizer

# model
import pycrfsuite

#### Geo Analysis

In [59]:
! pip install --no-deps shapely
! pip install --no-deps geopandas

Collecting shapely
  Downloading shapely-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading shapely-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: shapely
Successfully installed shapely-2.1.1
[0mCollecting geopandas
  Downloading geopandas-1.1.1-py3-none-any.whl.metadata (2.3 kB)
Downloading geopandas-1.1.1-py3-none-any.whl (338 kB)
Installing collected packages: geopandas
Successfully installed geopandas-1.1.1
[0m

In [61]:
import shapely

from shapely import wkt
from shapely.geometry import Polygon, Point, LineString

import geopandas

#### Etc

In [36]:
! pip install --no-deps html_table_extractor
! pip install --no-deps joblib
! pip install --no-deps wget
! pip install --no-deps pyathena
! pip install --no-deps whoosh
! pip install --no-deps fastdtw
! pip install --no-deps natsort
! pip install --no-deps marisa_trie
! pip install --no-deps tqdm
! pip install --no-deps pyodbc
! pip install --no-deps lxml

[0mCollecting lxml
  Downloading lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Downloading lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (5.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: lxml
Successfully installed lxml-6.0.0
[0m

In [37]:
from html_table_extractor.extractor import Extractor

import joblib

import wget

from lxml import etree

from pyathena import connect
from pyathena.pandas.util import to_sql

from whoosh import index
from whoosh.qparser import QueryParser

from fastdtw import fastdtw

from natsort import natsorted

from marisa_trie import RecordTrie, Trie

from tqdm import tqdm

import pyodbc
"""
libodbc.so.2: cannot open shared object file: No such file or directory
"""

'\nlibodbc.so.2: cannot open shared object file: No such file or directory\n'

In [32]:
! pip install --no-deps umap-learn
! pip install --no-deps gower
! pip install --no-deps shap
! pip install --no-deps pynndescent
! pip install --no-deps xverse

Collecting umap-learn
  Downloading umap_learn-0.5.9.post2-py3-none-any.whl.metadata (25 kB)
Downloading umap_learn-0.5.9.post2-py3-none-any.whl (90 kB)
Installing collected packages: umap-learn
Successfully installed umap-learn-0.5.9.post2
[0mCollecting gower
  Downloading gower-0.1.2-py3-none-any.whl.metadata (3.7 kB)
Downloading gower-0.1.2-py3-none-any.whl (5.2 kB)
Installing collected packages: gower
Successfully installed gower-0.1.2
[0mCollecting shap
  Downloading shap-0.48.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Downloading shap-0.48.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m-:--:--[0m
[?25hInstalling collected packages: shap
Successfully installed shap-0.48.0
[0mCollecting pynndescent
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading pynndescent-0.5.13-py3-none-an

In [34]:
from umap import UMAP

import gower

import shap

import pynndescent

from xverse.transformer import WOE

ModuleNotFoundError: No module named 'llvmlite'