In [None]:
import os
import pandas as pd
from bertopic import BERTopic
from datetime import datetime, date
from nltk import FreqDist
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.datasets import fetch_20newsgroups
from stop_words import get_stop_words
import spacy
import torch
import warnings

os.chdir("../")
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth = 200

from src.modeling import _BERTopic
from src.config import (
    umap_data,
    hdbscan_data,
    sent_transformers_data,
    tfidf_data,
    tokenizer_data,
    mmr_data,
    bertopic_data,
)
from src.data_preprocess import Preprocessing
from src.utils import (
    getClusteringModel,
    getDimReductionModel,
    getMaximalMarginalRelevance,
    getTfidfTransformers,
    getTokenizer,
    getEmbeddings,
    getFrequencyDictForText,
    plot_wordcloud,
    global_wordcloud,
    context_stopwords
)

# Configs Params

In [None]:
with open(f'./data/test-context-stopwords.txt') as f:
    list_context_sw = [line.strip() for line in f.readlines()]
f.close()

language = "french"
spacy_model = 'fr_core_news_md'
transformer = "dangvantuan/sentence-camembert-large"
use_preprocessing = False
preprocessor = Preprocessing(spacy_model, language, list_context_sw, use_preprocessing)

docs_name = "tests"

In [None]:
if torch.cuda.is_available():
    device = torch.cuda.device(0)
    print(torch.cuda.get_device_properties(device))

In [None]:
!nvidia-smi