In [1]:
import os
os.chdir('../../..')

In [2]:
import convokit
from convokit import TensorDecomposer, HyperConvo, Corpus
import numpy as np
import pandas as pd

In [50]:
# CORPUS_DIR = "convokit/thread_generator/fake-corpus-trajectory"
# CORPUS_DIR = "convokit/thread_generator/annotated-fake-trajectory"

CORPUS_DIR = "convokit/tensor_decomposer/experiments/reddit-trajectory-subset-annotated"

In [51]:
corpus = Corpus(CORPUS_DIR)

In [52]:
GROUP_SIZE = 500

## Standard TCA

In [53]:
hyperconv_range = range(3, 20+1)

In [54]:
td = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3
                     )

In [55]:
td.fit(corpus)

Constructing tensor...Done.
Decomposing tensor...Done.


<convokit.tensor_decomposer.tensorDecomposer.TensorDecomposer at 0x12fda1fd0>

In [56]:
# td.summarize(corpus, axis_names=["Comment idx", "Threads", "Features"], 
#              output_dir='convokit/tensor_decomposer/reports/25May_normal', 
#              report_title="25 May (normal)")

In [57]:
td.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

0.81

## Non-negative TCA (HALS)

In [80]:
hyperconv_range = range(2, 20+1)
td_nonneg_hals = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      anomaly_threshold=2,
                     )

In [81]:
td_nonneg_hals.fit(corpus)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.5989193538284204, improvement inf.
NCP_HALS: iteration 2, objective 0.44457780035295025, improvement 0.15434155347547018.
NCP_HALS: iteration 3, objective 0.42317771647029634, improvement 0.02140008388265391.
NCP_HALS: iteration 4, objective 0.41876053959569015, improvement 0.004417176874606188.
NCP_HALS: iteration 5, objective 0.41607842370207127, improvement 0.002682115893618886.
NCP_HALS: iteration 6, objective 0.41443895630084165, improvement 0.0016394674012296107.
NCP_HALS: iteration 7, objective 0.41356969958418666, improvement 0.000869256716654998.
NCP_HALS: iteration 8, objective 0.41314749032271225, improvement 0.0004222092614744044.
NCP_HALS: iteration 9, objective 0.4129260796916498, improvement 0.0002214106310624575.
NCP_HALS: iteration 10, objective 0.4128044519700363, improvement 0.00012162772161350421.
NCP_HALS: iteration 11, objective 0.41273495028098894, improvement 6.950168904734655e-0

<convokit.tensor_decomposer.tensorDecomposer.TensorDecomposer at 0x12fe32710>

In [82]:
# td_nonneg_hals.summarize(corpus, axis_names=["Comment idx", "Threads", "Features"], 
#              output_dir='convokit/tensor_decomposer/reports/26May_nonneg_hals', 
#              report_title="26 May (non-negative, HALS)")

In [83]:
td_nonneg_hals.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

0.81

## Non-negative TCA (BCD)

In [84]:
td_nonneg_bcd = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      anomaly_threshold=2,
                     )

In [85]:
td_nonneg_bcd.fit(corpus)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.6718731318817349, improvement inf.
NCP_BCD: iteration 2, objective 0.6290455425119065, improvement 0.042827589369828445.
NCP_BCD: iteration 3, objective 0.6213398300026501, improvement 0.007705712509256335.
NCP_BCD: iteration 4, objective 0.6180424428925099, improvement 0.0032973871101402707.
NCP_BCD: iteration 5, objective 0.6142069851321517, improvement 0.0038354577603582074.
NCP_BCD: iteration 6, objective 0.6039927726383885, improvement 0.010214212493763197.
NCP_BCD: iteration 7, objective 0.576107310921172, improvement 0.02788546171721651.
NCP_BCD: iteration 8, objective 0.5325931972086955, improvement 0.04351411371247649.
NCP_BCD: iteration 9, objective 0.48055115380769464, improvement 0.052042043401000826.
NCP_BCD: iteration 10, objective 0.4541077692194844, improvement 0.026443384588210217.
NCP_BCD: iteration 11, objective 0.4436587357346576, improvement 0.010449033484826842.
NCP_BCD: iteration 1

<convokit.tensor_decomposer.tensorDecomposer.TensorDecomposer at 0x12fe4e450>

In [86]:
# td_nonneg_bcd.summarize(corpus, axis_names=["Comment idx", "Threads", "Features"], 
#              output_dir='convokit/tensor_decomposer/reports/26May_nonneg_bcd', 
#              report_title="26 May (non-negative, BCD)")

In [87]:
td_nonneg_bcd.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

0.8113333333333334

## Non-negative, standard-scaled

In [88]:
from sklearn.preprocessing import StandardScaler
def standard_scale(tensor):
    tensor = tensor.copy()
    for i in range(tensor.shape[2]):
        tensor[:, :, i] = StandardScaler().fit_transform(tensor[:, :, i])
    return tensor

### HALS, 3-20, standard-scaled

In [89]:
td_nonneg_hals_std = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=standard_scale
                     )
td_nonneg_hals_std.fit(corpus)
td_nonneg_hals_std.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.9291441951149553, improvement inf.
NCP_HALS: iteration 2, objective 0.9218848155851838, improvement 0.007259379529771515.
NCP_HALS: iteration 3, objective 0.9209688539558395, improvement 0.0009159616293442641.
NCP_HALS: iteration 4, objective 0.9206312079458848, improvement 0.000337646009954673.
NCP_HALS: iteration 5, objective 0.9204099867544795, improvement 0.0002212211914053297.
NCP_HALS: iteration 6, objective 0.9202498694319202, improvement 0.00016011732255927225.
NCP_HALS: iteration 7, objective 0.9201567653926052, improvement 9.310403931506173e-05.
NCP_HALS: iteration 8, objective 0.9201096733398707, improvement 4.709205273445427e-05.
NCP_HALS: iteration 9, objective 0.9200866811195542, improvement 2.2992220316542245e-05.
NCP_HALS: iteration 10, objective 0.9200750465411732, improvement 1.1634578380936844e-05.
NCP_HALS: iteration 11, objective 0.9200682933037229, improvement 6.753237450363159e-06

0.8693333333333333

### HALS, 2-20, standard-scaled

In [90]:
hyperconv_range=range(2,20+1)
td_nonneg_hals_std = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=standard_scale
                     )
td_nonneg_hals_std.fit(corpus)
td_nonneg_hals_std.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.9291441951149553, improvement inf.
NCP_HALS: iteration 2, objective 0.9218848155851838, improvement 0.007259379529771515.
NCP_HALS: iteration 3, objective 0.9209688539558395, improvement 0.0009159616293442641.
NCP_HALS: iteration 4, objective 0.9206312079458848, improvement 0.000337646009954673.
NCP_HALS: iteration 5, objective 0.9204099867544795, improvement 0.0002212211914053297.
NCP_HALS: iteration 6, objective 0.9202498694319202, improvement 0.00016011732255927225.
NCP_HALS: iteration 7, objective 0.9201567653926052, improvement 9.310403931506173e-05.
NCP_HALS: iteration 8, objective 0.9201096733398707, improvement 4.709205273445427e-05.
NCP_HALS: iteration 9, objective 0.9200866811195542, improvement 2.2992220316542245e-05.
NCP_HALS: iteration 10, objective 0.9200750465411732, improvement 1.1634578380936844e-05.
NCP_HALS: iteration 11, objective 0.9200682933037229, improvement 6.753237450363159e-06

0.8693333333333333

### BCD, 3-20, standard-scaled

In [91]:
hyperconv_range=range(3, 21)
td_nonneg_bcd_std = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=standard_scale
                     )
td_nonneg_bcd_std.fit(corpus)
td_nonneg_bcd_std.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.9489063627864627, improvement inf.
NCP_BCD: iteration 2, objective 0.9337036644871701, improvement 0.01520269829929255.
NCP_BCD: iteration 3, objective 0.9313614545177508, improvement 0.002342209969419362.
NCP_BCD: iteration 4, objective 0.9306337639103562, improvement 0.0007276906073945266.
NCP_BCD: iteration 5, objective 0.9300619858376821, improvement 0.0005717780726741672.
NCP_BCD: iteration 6, objective 0.9294940742371393, improvement 0.0005679116005428142.
NCP_BCD: iteration 7, objective 0.9288321407789624, improvement 0.0006619334581768888.
NCP_BCD: iteration 8, objective 0.9280630666207008, improvement 0.0007690741582615512.
NCP_BCD: iteration 9, objective 0.9272206166860036, improvement 0.0008424499346971759.
NCP_BCD: iteration 10, objective 0.9263611520204079, improvement 0.0008594646655957217.
NCP_BCD: iteration 11, objective 0.9254937282004757, improvement 0.0008674238199322115.
NCP_BCD: iter

0.894

### BCD, 2-20, standard-scaled

In [92]:
hyperconv_range=range(3, 21)
td_nonneg_bcd_std = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=standard_scale
                     )
td_nonneg_bcd_std.fit(corpus)
td_nonneg_bcd_std.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.9489063627864627, improvement inf.
NCP_BCD: iteration 2, objective 0.9337036644871701, improvement 0.01520269829929255.
NCP_BCD: iteration 3, objective 0.9313614545177508, improvement 0.002342209969419362.
NCP_BCD: iteration 4, objective 0.9306337639103562, improvement 0.0007276906073945266.
NCP_BCD: iteration 5, objective 0.9300619858376821, improvement 0.0005717780726741672.
NCP_BCD: iteration 6, objective 0.9294940742371393, improvement 0.0005679116005428142.
NCP_BCD: iteration 7, objective 0.9288321407789624, improvement 0.0006619334581768888.
NCP_BCD: iteration 8, objective 0.9280630666207008, improvement 0.0007690741582615512.
NCP_BCD: iteration 9, objective 0.9272206166860036, improvement 0.0008424499346971759.
NCP_BCD: iteration 10, objective 0.9263611520204079, improvement 0.0008594646655957217.
NCP_BCD: iteration 11, objective 0.9254937282004757, improvement 0.0008674238199322115.
NCP_BCD: iter

0.894

## Min-max normalization

In [93]:
import numpy as np
def _min_max_scale(mat):
    max_ = np.max(mat)
    min_ = np.min(mat)
    return (mat - min_) / (max_ - min_)

def minmax_normalize(tensor):
    tensor = tensor.copy()
    for i in range(tensor.shape[2]):
        tensor[:, :, i] = _min_max_scale(tensor[:, :, i])
    return tensor

### 3-20, MM, HALS

In [94]:
hyperconv_range=range(3, 21)
td_nonneg_hals_mm = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=minmax_normalize
                     )
td_nonneg_hals_mm.fit(corpus)
td_nonneg_hals_mm.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.43097866009800284, improvement inf.
NCP_HALS: iteration 2, objective 0.35359202974748694, improvement 0.0773866303505159.
NCP_HALS: iteration 3, objective 0.3271803830432561, improvement 0.02641164670423085.
NCP_HALS: iteration 4, objective 0.31621588168125875, improvement 0.010964501361997336.
NCP_HALS: iteration 5, objective 0.3141716690128713, improvement 0.002044212668387446.
NCP_HALS: iteration 6, objective 0.31348575360841086, improvement 0.0006859154044604465.
NCP_HALS: iteration 7, objective 0.313106175352715, improvement 0.0003795782556958449.
NCP_HALS: iteration 8, objective 0.31286504650148755, improvement 0.00024112885122745675.
NCP_HALS: iteration 9, objective 0.3127079991827363, improvement 0.00015704731875126887.
NCP_HALS: iteration 10, objective 0.31260464472772254, improvement 0.00010335445501374885.
NCP_HALS: iteration 11, objective 0.3125366971913282, improvement 6.794753639433049e-05

0.6666666666666666

### 2-20, MM, HALS

In [95]:
hyperconv_range=range(2, 21)
td_nonneg_hals_mm = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=minmax_normalize
                     )
td_nonneg_hals_mm.fit(corpus)
td_nonneg_hals_mm.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.4360863894644641, improvement inf.
NCP_HALS: iteration 2, objective 0.3680191860531257, improvement 0.06806720341133837.
NCP_HALS: iteration 3, objective 0.3357523369033273, improvement 0.03226684914979844.
NCP_HALS: iteration 4, objective 0.3292217484600573, improvement 0.00653058844326998.
NCP_HALS: iteration 5, objective 0.3270945182781233, improvement 0.0021272301819340034.
NCP_HALS: iteration 6, objective 0.32527747383786343, improvement 0.001817044440259874.
NCP_HALS: iteration 7, objective 0.32375280620305324, improvement 0.001524667634810184.
NCP_HALS: iteration 8, objective 0.322575239220092, improvement 0.0011775669829612312.
NCP_HALS: iteration 9, objective 0.32170035810534214, improvement 0.0008748811147498703.
NCP_HALS: iteration 10, objective 0.321089029382533, improvement 0.000611328722809168.
NCP_HALS: iteration 11, objective 0.32068837407665357, improvement 0.0004006553058794049.
NCP_HA

0.6666666666666666

### 3-20, MM, BCD

In [96]:
hyperconv_range=range(3, 21)
td_nonneg_bcd_mm = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=minmax_normalize
                     )
td_nonneg_bcd_mm.fit(corpus)
td_nonneg_bcd_mm.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.4485789912305747, improvement inf.
NCP_BCD: iteration 2, objective 0.43307986021828954, improvement 0.015499131012285172.
NCP_BCD: iteration 3, objective 0.42553363552302353, improvement 0.007546224695266002.
NCP_BCD: iteration 4, objective 0.4206423128390538, improvement 0.0048913226839697455.
NCP_BCD: iteration 5, objective 0.41671001998315693, improvement 0.003932292855896857.
NCP_BCD: iteration 6, objective 0.41233343023466235, improvement 0.004376589748494586.
NCP_BCD: iteration 7, objective 0.40614210510167664, improvement 0.006191325132985703.
NCP_BCD: iteration 8, objective 0.3970576438033903, improvement 0.009084461298286328.
NCP_BCD: iteration 9, objective 0.38550081069934305, improvement 0.01155683310404726.
NCP_BCD: iteration 10, objective 0.374178253207438, improvement 0.011322557491905028.
NCP_BCD: iteration 11, objective 0.36241934785538393, improvement 0.011758905352054094.
NCP_BCD: itera

0.6666666666666666

### 2-20, MM, BCD

In [97]:
hyperconv_range=range(2, 21)
td_nonneg_bcd_mm = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=minmax_normalize
                     )
td_nonneg_bcd_mm.fit(corpus)
td_nonneg_bcd_mm.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.45640583542384155, improvement inf.
NCP_BCD: iteration 2, objective 0.4393436290630484, improvement 0.017062206360793142.
NCP_BCD: iteration 3, objective 0.43145226187361707, improvement 0.007891367189431342.
NCP_BCD: iteration 4, objective 0.42585108678790123, improvement 0.005601175085715837.
NCP_BCD: iteration 5, objective 0.42045631439226394, improvement 0.005394772395637293.
NCP_BCD: iteration 6, objective 0.4135471750852902, improvement 0.006909139306973733.
NCP_BCD: iteration 7, objective 0.4039661503871584, improvement 0.009581024698131824.
NCP_BCD: iteration 8, objective 0.39227148574411397, improvement 0.011694664643044417.
NCP_BCD: iteration 9, objective 0.38091839445953923, improvement 0.011353091284574734.
NCP_BCD: iteration 10, objective 0.3696155507174774, improvement 0.011302843742061808.
NCP_BCD: iteration 11, objective 0.3552893262560573, improvement 0.014326224461420123.
NCP_BCD: itera

0.6666666666666666

## Non-negative, standard-scaled by comment index

In [75]:
from sklearn.preprocessing import scale
def scale_by_comment_idx(tensor):
    tensor = tensor.copy()
    for i in range(tensor.shape[0]):
        tensor[0, :, :] = scale(tensor[0, :, :])
    return tensor

### HALS, 3-20

In [98]:
hyperconv_range=range(3, 21)
td_nonneg_hals_scale_idx = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=scale_by_comment_idx
                     )
td_nonneg_hals_scale_idx.fit(corpus)
td_nonneg_hals_scale_idx.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.5847326014861733, improvement inf.
NCP_HALS: iteration 2, objective 0.44724996002037426, improvement 0.13748264146579908.
NCP_HALS: iteration 3, objective 0.43885342749020045, improvement 0.008396532530173806.
NCP_HALS: iteration 4, objective 0.43211150710389135, improvement 0.006741920386309108.
NCP_HALS: iteration 5, objective 0.42670926754888, improvement 0.0054022395550113456.




NCP_HALS: iteration 6, objective 0.42265197335998017, improvement 0.004057294188899829.
NCP_HALS: iteration 7, objective 0.4195631345135251, improvement 0.0030888388464550554.
NCP_HALS: iteration 8, objective 0.4173283461894587, improvement 0.002234788324066428.
NCP_HALS: iteration 9, objective 0.41583728700307493, improvement 0.0014910591863837586.
NCP_HALS: iteration 10, objective 0.41491614565860163, improvement 0.0009211413444732974.
NCP_HALS: iteration 11, objective 0.4143817158114916, improvement 0.0005344298471100517.
NCP_HALS: iteration 12, objective 0.4140797703328296, improvement 0.0003019454786619691.
NCP_HALS: iteration 13, objective 0.41390603889440963, improvement 0.0001737314384199773.
NCP_HALS: iteration 14, objective 0.41380367595443057, improvement 0.00010236293997906154.
NCP_HALS: iteration 15, objective 0.4137434028907967, improvement 6.027306363387552e-05.
NCP_HALS: iteration 16, objective 0.4137070222883468, improvement 3.638060244987029e-05.
NCP_HALS: iteration 1

0.8186666666666667

### HALS, 2-20

In [99]:
hyperconv_range=range(2, 21)
td_nonneg_hals_scale_idx = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-hals',
                      normalize_func=scale_by_comment_idx
                     )
td_nonneg_hals_scale_idx.fit(corpus)
td_nonneg_hals_scale_idx.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_HALS: iteration 1, objective 0.5979506808723881, improvement inf.
NCP_HALS: iteration 2, objective 0.44302441273215015, improvement 0.15492626814023797.
NCP_HALS: iteration 3, objective 0.42146575467759845, improvement 0.021558658054551705.
NCP_HALS: iteration 4, objective 0.4169959071875714, improvement 0.004469847490027046.
NCP_HALS: iteration 5, objective 0.4142934700167866, improvement 0.002702437170784777.
NCP_HALS: iteration 6, objective 0.4126497443283105, improvement 0.0016437256884761497.
NCP_HALS: iteration 7, objective 0.4117819178506981, improvement 0.0008678264776123545.
NCP_HALS: iteration 8, objective 0.4113615242702575, improvement 0.0004203935804406034.
NCP_HALS: iteration 9, objective 0.4111409835806552, improvement 0.000220540689602311.
NCP_HALS: iteration 10, objective 0.41101968409064155, improvement 0.00012129949001365992.
NCP_HALS: iteration 11, objective 0.41095028814584833, improvement 6.939594479321576e-05.


0.81

### BCD, 3-20

In [100]:
hyperconv_range=range(3, 21)
td_nonneg_bcd_scale_idx = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=scale_by_comment_idx
                     )
td_nonneg_bcd_scale_idx.fit(corpus)
td_nonneg_bcd_scale_idx.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.662097527351528, improvement inf.
NCP_BCD: iteration 2, objective 0.6216978376147345, improvement 0.04039968973679353.
NCP_BCD: iteration 3, objective 0.6084393579975929, improvement 0.01325847961714166.




NCP_BCD: iteration 4, objective 0.5890455946104586, improvement 0.019393763387134277.
NCP_BCD: iteration 5, objective 0.5522170685936694, improvement 0.03682852601678921.
NCP_BCD: iteration 6, objective 0.5152629784162073, improvement 0.036954090177462096.
NCP_BCD: iteration 7, objective 0.49056767294626935, improvement 0.024695305469937923.
NCP_BCD: iteration 8, objective 0.4763008872114972, improvement 0.014266785734772136.
NCP_BCD: iteration 9, objective 0.4686417792080012, improvement 0.007659108003496007.
NCP_BCD: iteration 10, objective 0.4645485686820743, improvement 0.004093210525926927.
NCP_BCD: iteration 11, objective 0.46154855666926825, improvement 0.0030000120128060304.
NCP_BCD: iteration 12, objective 0.45848240913859867, improvement 0.003066147530669583.
NCP_BCD: iteration 13, objective 0.4551189274086325, improvement 0.0033634817299661446.
NCP_BCD: iteration 14, objective 0.4517036795755256, improvement 0.003415247833106927.
NCP_BCD: iteration 15, objective 0.4489980755

0.8093333333333333

### BCD, 2-20

In [101]:
hyperconv_range=range(2, 21)
td_nonneg_bcd_scale_idx = TensorDecomposer(obj_type="conversation",
                      feature_set=["hyperconvo-{}".format(i) for i in hyperconv_range],
                      group_func=lambda convo: convo.get_utterance(convo.id).meta['subreddit'],
                      rank=3, tensor_func='tensortools-ncp-bcd',
                      normalize_func=scale_by_comment_idx
                     )
td_nonneg_bcd_scale_idx.fit(corpus)
td_nonneg_bcd_scale_idx.purity(n_clusters=3, actual_num_clusters=3, group_size=GROUP_SIZE)

Constructing tensor...Done.
Decomposing tensor...NCP_BCD: iteration 1, objective 0.6718556712278926, improvement inf.
NCP_BCD: iteration 2, objective 0.6283926699812932, improvement 0.04346300124659941.
NCP_BCD: iteration 3, objective 0.6205182067144807, improvement 0.007874463266812515.
NCP_BCD: iteration 4, objective 0.6171166723188156, improvement 0.00340153439566504.
NCP_BCD: iteration 5, objective 0.6131055102869242, improvement 0.0040111620318914465.
NCP_BCD: iteration 6, objective 0.6023442843823225, improvement 0.010761225904601712.
NCP_BCD: iteration 7, objective 0.573539896758957, improvement 0.028804387623365457.
NCP_BCD: iteration 8, objective 0.5293641674692354, improvement 0.04417572928972158.
NCP_BCD: iteration 9, objective 0.47776094905175676, improvement 0.05160321841747867.
NCP_BCD: iteration 10, objective 0.4520579731898742, improvement 0.025702975861882582.
NCP_BCD: iteration 11, objective 0.4415776803149377, improvement 0.010480292874936459.
NCP_BCD: iteration 12, 

0.8113333333333334