In [1]:
import os
import pandas as pd
from bertopic import BERTopic
from datetime import datetime, date
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import spacy
import torch
import warnings
import cudf

os.chdir("../")
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth = 200

from src.modeling import _BERTopic
from src.config import (
    umap_data,
    hdbscan_data,
    sent_transformers_data,
    tfidf_data,
    tokenizer_data,
    mmr_data,
    bertopic_data,
)
from src.data_preprocess import Preprocessing
from src.utils import (
    getClusteringModel,
    getDimReductionModel,
    getMaximalMarginalRelevance,
    getTfidfTransformers,
    getTokenizer,
    getEmbeddings,
    getFrequencyDictForText,
    plot_wordcloud,
    global_wordcloud,
    context_stopwords
)

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


# Configs Params

In [2]:
with open(f'./data/test-context-stopwords.txt') as f:
    list_context_sw = [line.strip() for line in f.readlines()]
f.close()

language = "french"
spacy_model = 'fr_core_news_md'
transformer = "dangvantuan/sentence-camembert-large"
use_preprocessing = False
preprocessor = Preprocessing(spacy_model, language, list_context_sw, use_preprocessing)

docs_name = "tests"

In [3]:
if torch.cuda.is_available():
    device = torch.cuda.device(0)
    print(torch.cuda.get_device_properties(device))

_CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=14910MB, multi_processor_count=40)


In [4]:
!nvidia-smi

Tue Jul 18 09:24:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:86:00.0 Off |                    0 |
| N/A   69C    P0    67W /  70W |   3060MiB / 15360MiB |     97%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces