# Тестирование модулей

## Модуль `datasets`

In [1]:
cd ..

C:\Users\kobel\учёба\recs\recs


In [2]:
import dataset

In [3]:
df_video_games = dataset.load_video_games()
df_video_games

Unnamed: 0,target
0,Wii Sports
1,Super Mario Bros.
2,Mario Kart Wii
3,Wii Sports Resort
4,Pokemon Red/Pokemon Blue
...,...
11557,15 Days
11558,Aiyoku no Eustia
11559,Woody Woodpecker in Crazy Castle 5
11560,LMA Manager 2007


## Модуль `preprocessing`

In [4]:
import preprocessing

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
cleaner_base = preprocessing.BaseCleaner(remove_number=False)

In [6]:
clear_text = cleaner_base.transform(df_video_games.target)
clear_text[-5:]

['15 days',
 'aiyoku no eustia',
 'woody woodpecker in crazy castle 5',
 'lma manager 2007',
 'haitaka no psychedelica']

## Модуль `models`

In [7]:
import models

#### TFIDF:

In [8]:
model_tfidf_wrapper = models.TfidfWrapperModel()

In [9]:
%%time
embedding_basedata_tfidf = model_tfidf_wrapper.fit_transform(clear_text)

print(embedding_basedata_tfidf.shape)
embedding_basedata_tfidf

(11562, 9169)
CPU times: total: 375 ms
Wall time: 377 ms


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [10]:
# model_tfidf_wrapper.save('testing')

In [11]:
# model_tfidf_wrapper.load('testing')

#### FastText

In [12]:
model_fasttext_wrapper = models.FastTextWrapperModel(
    min_count=3,
    vector_size=100, 
    window=3, 
    sg=1
)

In [13]:
%%time
embedding_basedata_fasttext = model_fasttext_wrapper.fit_transform(clear_text)

print(embedding_basedata_fasttext.shape)
embedding_basedata_fasttext

(11562, 100)
CPU times: total: 5.47 s
Wall time: 4.26 s


array([[-0.15230161, -0.02121007, -0.09091443, ..., -0.08356076,
         0.05686298,  0.05183981],
       [-0.15229307, -0.02348029, -0.08850324, ..., -0.08940327,
         0.05713321,  0.04942806],
       [-0.15165074, -0.02096771, -0.09120651, ..., -0.08512478,
         0.05741649,  0.05147989],
       ...,
       [-0.15105745, -0.02188818, -0.09104431, ..., -0.0852009 ,
         0.05585201,  0.04948147],
       [-0.15118353, -0.02251317, -0.09281135, ..., -0.08021916,
         0.05730164,  0.05155784],
       [-0.14995156, -0.02489554, -0.09146437, ..., -0.08269682,
         0.05654266,  0.05103846]], dtype=float32)

#### SentenceTransformers

In [14]:
model_transformer_wrapper = models.SentenceTransformerWrapperModel()

In [17]:
# model_transformer_wrapper.fit(clear_text)

In [18]:
embedding_basedata_transformer = model_transformer_wrapper.transform(clear_text)

print(embedding_basedata_transformer.shape)
embedding_basedata_transformer

(11562, 384)


array([[-0.03356819, -0.00994224, -0.38047782, ..., -0.3514836 ,
        -0.13663206, -0.2837611 ],
       [-0.10025516,  0.12757903, -0.04055376, ..., -0.15694027,
        -0.11605553,  0.16881616],
       [-0.12735258,  0.15968087, -0.1906877 , ..., -0.34117892,
        -0.13013414,  0.04280528],
       ...,
       [ 0.1761831 ,  0.07396939,  0.12333596, ..., -0.25326034,
        -0.03523049,  0.22894253],
       [ 0.00739819, -0.14365028, -0.0232647 , ..., -0.43694916,
        -0.5284057 ,  0.35563788],
       [-0.03762897,  0.03798281, -0.12057619, ..., -0.13760985,
        -0.17423956, -0.13552156]], dtype=float32)

## Модуль `similarity_search`

In [19]:
import similarity_search

#### ForCycleSearch

Для TfIdf:

In [20]:
%%time
search_for_cycle = similarity_search.ForCycleSearch(model_tfidf_wrapper, embedding_basedata_tfidf, df_video_games.target.values)
search_for_cycle.search('mario 9', 5)

CPU times: total: 1.53 s
Wall time: 1.64 s


Unnamed: 0,name,similarity
479,Mario Party,0.724069
381,Mario Party 9,0.724069
531,Mario Party 2,0.724069
71,Mario Party 8,0.724069
965,Mario Party 7,0.724069


Для FastText:

In [21]:
%%time
search_for_cycle = similarity_search.ForCycleSearch(model_fasttext_wrapper, embedding_basedata_fasttext, df_video_games.target.values)
search_for_cycle.search('mario 9', 5)

CPU times: total: 297 ms
Wall time: 299 ms


Unnamed: 0,name,similarity
381,Mario Party 9,0.999814
4814,Wild 9,0.999797
9751,Medabots 9: Metabee / Rokusho,0.999728
9799,Medarot 9: Kabuto Ver. / Kuwagata Ver.,0.999695
6374,World Soccer Winning Eleven 9 Bonus Pack,0.999642


Для SentenceTransformer:

In [24]:
%%time
search_for_cycle = similarity_search.ForCycleSearch(model_transformer_wrapper, embedding_basedata_transformer, df_video_games.target.values)
search_for_cycle.search('mario 9', 5)

CPU times: total: 953 ms
Wall time: 246 ms


Unnamed: 0,name,similarity
596,Mario Bros.,0.833682
99,Mario Kart 8,0.830035
2924,Mario & Wario,0.822572
37,Mario Kart 7,0.792277
381,Mario Party 9,0.786914


## `TEMP` code: