In [1]:
from __future__ import print_function
from pyspark.ml.feature import HashingTF, IDF, Tokenizer, StopWordsRemover
from pyspark import SparkContext, SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import array, struct, split, explode, udf
from pyspark.sql import functions as f

In [2]:
conf = SparkConf().setMaster("local[*]").setAppName('haha')
sc = SparkContext(conf=conf)
spark = SparkSession(sc)

In [6]:
spark.read.json('reviews_Books_5.json').printSchema()

root
 |-- asin: string (nullable = true)
 |-- helpful: array (nullable = true)
 |    |-- element: long (containsNull = true)
 |-- overall: double (nullable = true)
 |-- reviewText: string (nullable = true)
 |-- reviewTime: string (nullable = true)
 |-- reviewerID: string (nullable = true)
 |-- reviewerName: string (nullable = true)
 |-- summary: string (nullable = true)
 |-- unixReviewTime: long (nullable = true)



In [3]:
review_sp = spark.read.json('reviews_Books_5.json').select("asin","reviewText")
review_sp.show(5)

+----------+--------------------+
|      asin|          reviewText|
+----------+--------------------+
|000100039X|Spiritually and m...|
|000100039X|This is one my mu...|
|000100039X|This book provide...|
|000100039X|I first read THE ...|
|000100039X|A timeless classi...|
+----------+--------------------+
only showing top 5 rows



In [8]:
# group by asin, all reviewText will be in the same row
reviews = review_sp.groupBy("asin").agg(f.concat_ws(" ", f.collect_list('reviewText')).alias('reviewText'))
reviews.show(5)

+----------+--------------------+
|      asin|          reviewText|
+----------+--------------------+
|0001837192|At least it was f...|
|0001845357|Last year I track...|
|0002216973|And to me, there'...|
|000255111X|This is what Joyc...|
|0004708075|Seller's contact ...|
+----------+--------------------+
only showing top 5 rows



In [9]:
tokenizer = Tokenizer(inputCol="reviewText", outputCol="words")
reviews_w_words = tokenizer.transform(reviews)
remover = StopWordsRemover(inputCol="words", outputCol="mf_words")
reviews_w_mfwords = remover.transform(reviews_w_words)
reviews_w_mfwords.show(3)

+----------+--------------------+--------------------+--------------------+
|      asin|          reviewText|               words|            mf_words|
+----------+--------------------+--------------------+--------------------+
|0001837192|At least it was f...|[at, least, it, w...|[least, first, re...|
|0001845357|Last year I track...|[last, year, i, t...|[last, year, trac...|
|0002216973|And to me, there'...|[and, to, me,, th...|[me,, big, differ...|
+----------+--------------------+--------------------+--------------------+
only showing top 3 rows



In [10]:
hashingTF = HashingTF(inputCol="mf_words", outputCol="tf", numFeatures=20)
tf = hashingTF.transform(reviews_w_mfwords)
idf = IDF(inputCol="tf", outputCol="tfidf").fit(tf)
tfidf = idf.transform(tf)
for item in tfidf.select('tfidf').collect():
    print(item)

Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.0821, 2: 0.102, 3: 0.39, 5: 0.2963, 6: 0.4629, 7: 0.3738, 8: 0.4346, 10: 0.1704, 11: 0.1775, 12: 0.1267, 13: 0.1834, 14: 0.1719, 15: 0.1323, 16: 0.0872, 17: 0.3081, 18: 0.3184, 19: 0.3562}))
Row(tfidf=SparseVector(20, {0: 1.4863, 1: 2.3804, 2: 1.9373, 3: 1.8719, 4: 1.179, 5: 1.7776, 6: 1.9289, 7: 1.6822, 8: 2.0489, 9: 1.8532, 10: 1.8174, 11: 1.5978, 12: 1.1826, 13: 0.8252, 14: 2.2343, 15: 1.9183, 16: 2.4422, 17: 1.4173, 18: 1.5124, 19: 1.7097}))
Row(tfidf=SparseVector(20, {0: 4.657, 1: 4.6788, 2: 6.9337, 3: 5.2256, 4: 4.7161, 5: 4.7403, 6: 7.0984, 7: 4.0186, 8: 6.3329, 9: 5.0791, 10: 5.793, 11: 7.19, 12: 5.8287, 13: 2.5978, 14: 7.2184, 15: 4.9612, 16: 6.1054, 17: 4.4368, 18: 4.4575, 19: 7.0527}))
Row(tfidf=SparseVector(20, {0: 0.4459, 1: 0.3283, 2: 0.102, 3: 1.2479, 4: 0.7205, 5: 0.3703, 6: 0.6173, 7: 0.3738, 8: 0.8071, 9: 0.3432, 10: 0.7383, 11: 0.7989, 12: 0.9715, 13: 0.3362, 14: 0.7734, 15: 0.7276, 16: 0.6105, 17: 0.6778, 18: 0.398, 19: 0.

Row(tfidf=SparseVector(20, {0: 1.5853, 1: 1.3954, 2: 1.4275, 3: 1.2479, 4: 1.3755, 5: 1.3332, 6: 2.0832, 7: 1.2149, 8: 2.4214, 9: 1.0295, 10: 1.0791, 11: 1.4203, 12: 1.4783, 13: 0.7335, 14: 1.7187, 15: 1.2568, 16: 0.9594, 17: 1.0476, 18: 1.5124, 19: 1.8522}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.0821, 4: 0.0655, 5: 0.0741, 7: 0.2804, 8: 0.0621, 10: 0.0568, 13: 0.0306, 17: 0.0616, 18: 0.0796, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 1.2881, 1: 2.0521, 2: 1.8354, 3: 1.6379, 4: 1.965, 5: 1.9998, 6: 1.7746, 7: 1.4018, 8: 1.0555, 9: 1.8532, 10: 1.363, 11: 0.8877, 12: 2.0696, 13: 0.9169, 14: 1.0312, 15: 1.9845, 16: 1.8316, 17: 1.7254, 18: 1.194, 19: 1.2111}))
Row(tfidf=SparseVector(20, {0: 0.0991, 2: 0.4079, 3: 0.234, 4: 0.3275, 6: 0.1543, 7: 0.3738, 8: 0.2484, 9: 0.2745, 10: 0.1704, 11: 0.3551, 13: 0.2139, 14: 0.1719, 15: 0.1984, 16: 0.3489, 17: 0.0616, 18: 0.2388, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 0.2973, 1: 0.2463, 2: 0.2039, 3: 0.546, 4: 0.262, 5: 0.4444, 6: 0.1543,

Row(tfidf=SparseVector(20, {0: 1.9321, 1: 2.7909, 2: 3.1609, 3: 2.4958, 4: 1.834, 5: 1.8517, 6: 3.3949, 7: 2.7102, 8: 1.9247, 9: 1.5786, 10: 2.215, 11: 2.4854, 12: 1.1404, 13: 1.2225, 14: 2.6639, 15: 2.3814, 16: 2.6166, 17: 2.7113, 18: 2.0696, 19: 2.2084}))
Row(tfidf=SparseVector(20, {0: 27.5454, 1: 34.0649, 2: 38.4411, 3: 36.2674, 4: 41.1345, 5: 34.515, 6: 39.2727, 7: 38.4099, 8: 30.7955, 9: 30.8178, 10: 31.5773, 11: 31.2455, 12: 35.8172, 13: 17.7871, 14: 34.3733, 15: 32.6113, 16: 59.7455, 17: 37.4043, 18: 34.6256, 19: 32.4851}))
Row(tfidf=SparseVector(20, {0: 0.1486, 3: 0.234, 4: 0.262, 5: 0.0741, 6: 0.0772, 8: 0.3725, 9: 0.1373, 10: 0.3408, 11: 0.0888, 12: 0.0422, 14: 0.0859, 15: 0.1323, 16: 0.3489, 17: 0.1849, 18: 0.1592, 19: 0.3562}))
Row(tfidf=SparseVector(20, {0: 3.3193, 1: 6.2384, 2: 6.2199, 3: 5.6936, 4: 7.0741, 5: 4.8143, 6: 6.2497, 7: 6.2615, 8: 5.0912, 9: 5.6968, 10: 8.3487, 11: 4.6158, 12: 6.1244, 13: 3.5146, 14: 5.156, 15: 7.8056, 16: 7.7626, 17: 4.56, 18: 6.6863, 19: 6.9

Row(tfidf=SparseVector(20, {0: 17.4388, 1: 27.4981, 2: 34.0565, 3: 30.4178, 4: 28.0998, 5: 22.9606, 6: 29.3195, 7: 28.4102, 8: 24.5246, 9: 25.3269, 10: 27.261, 11: 33.2871, 12: 23.7796, 13: 13.9363, 14: 35.2327, 15: 27.6502, 16: 20.9327, 17: 24.094, 18: 23.8797, 19: 24.5775}))
Row(tfidf=SparseVector(20, {0: 19.371, 1: 21.2598, 2: 21.9226, 3: 23.7883, 4: 26.1348, 5: 22.4421, 6: 33.6403, 7: 23.1768, 8: 23.1587, 9: 23.5424, 10: 23.1718, 11: 24.233, 12: 15.1632, 13: 11.6136, 14: 21.4833, 15: 27.7825, 16: 20.8455, 17: 21.6292, 18: 19.3426, 19: 21.443}))
Row(tfidf=SparseVector(20, {0: 0.4954, 1: 0.4104, 2: 0.4079, 3: 0.8579, 4: 0.3275, 5: 0.0741, 6: 0.7716, 7: 0.4673, 8: 0.2484, 9: 0.3432, 10: 0.7383, 11: 0.3551, 12: 0.1689, 13: 0.2751, 14: 0.1719, 15: 0.1323, 16: 0.2617, 17: 0.7395, 18: 0.4776, 19: 0.4274}))
Row(tfidf=SparseVector(20, {0: 0.0991, 5: 0.1481, 7: 0.1869, 10: 0.0568, 11: 0.0888, 12: 0.0422, 13: 0.0611, 19: 0.1425}))
Row(tfidf=SparseVector(20, {0: 1.189, 1: 3.8579, 2: 2.1413, 3:

Row(tfidf=SparseVector(20, {0: 7.0845, 1: 5.828, 2: 8.0553, 3: 5.6156, 4: 7.9256, 5: 7.1104, 6: 9.1816, 7: 6.7287, 8: 6.4571, 9: 6.795, 10: 8.9734, 11: 6.5687, 12: 8.2785, 13: 5.7151, 14: 7.1325, 15: 10.1869, 16: 8.8092, 17: 6.84, 18: 7.2435, 19: 8.9761}))
Row(tfidf=SparseVector(20, {0: 14.5158, 1: 13.1334, 2: 14.989, 3: 15.8329, 4: 16.1132, 5: 15.7762, 6: 19.9835, 7: 15.7004, 8: 13.5351, 9: 13.59, 10: 15.959, 11: 18.6408, 12: 11.1929, 13: 10.3911, 14: 17.2726, 15: 17.5956, 16: 14.8274, 17: 12.8789, 18: 14.5666, 19: 16.1001}))
Row(tfidf=SparseVector(20, {0: 0.644, 1: 1.8058, 2: 2.0393, 3: 1.4819, 4: 1.2445, 5: 2.222, 6: 1.3117, 7: 3.1775, 8: 1.1797, 9: 1.51, 10: 1.2495, 11: 1.4203, 12: 0.5913, 13: 0.7641, 14: 1.8905, 15: 1.0584, 16: 1.1339, 17: 0.9243, 18: 1.194, 19: 1.3535}))
Row(tfidf=SparseVector(20, {0: 6.1928, 1: 5.5817, 2: 6.2199, 3: 7.7994, 4: 6.5501, 5: 4.7403, 6: 10.4161, 7: 4.5793, 8: 8.8785, 9: 5.4909, 10: 9.9389, 11: 6.6574, 12: 7.7717, 13: 3.1785, 14: 5.7575, 15: 6.681, 16

Row(tfidf=SparseVector(20, {0: 0.545, 1: 0.3283, 2: 0.4079, 3: 0.39, 4: 0.655, 5: 1.0369, 6: 0.6173, 7: 0.3738, 8: 0.8071, 9: 0.9609, 10: 0.9655, 11: 0.7989, 12: 0.5491, 13: 0.1834, 14: 0.8593, 15: 0.463, 16: 0.2617, 17: 0.4314, 18: 1.2736, 19: 0.9261}))
Row(tfidf=SparseVector(20, {0: 0.5945, 1: 0.4925, 2: 1.7334, 3: 0.7019, 4: 0.786, 5: 0.3703, 6: 0.5401, 7: 0.4673, 8: 0.7451, 9: 0.5491, 10: 0.6815, 11: 1.154, 12: 0.8025, 13: 0.5807, 14: 0.1719, 15: 0.7276, 16: 0.2617, 17: 0.493, 18: 0.4776, 19: 0.7124}))
Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.3283, 2: 0.3059, 4: 0.4585, 6: 0.3086, 7: 0.4673, 8: 0.4346, 9: 0.2745, 10: 0.5679, 12: 0.0422, 13: 0.0917, 14: 0.2578, 15: 0.3307, 16: 0.1744, 17: 0.1232, 18: 0.398, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.0991, 1: 0.2463, 2: 0.2039, 4: 0.131, 6: 0.1543, 7: 0.1869, 8: 0.1863, 9: 0.3432, 10: 0.1136, 11: 0.1775, 12: 0.2112, 15: 0.2646, 17: 0.0616, 18: 0.1592, 19: 0.4274}))
Row(tfidf=SparseVector(20, {0: 0.9413, 1: 1.2313, 2: 0.9177, 3

Row(tfidf=SparseVector(20, {0: 4.112, 1: 8.2905, 2: 5.5061, 3: 5.7716, 4: 5.4366, 5: 7.1844, 6: 5.5553, 7: 6.4484, 8: 6.0225, 9: 5.6282, 10: 6.0201, 11: 5.681, 12: 7.9828, 13: 3.8508, 14: 5.3279, 15: 4.8289, 16: 5.146, 17: 6.9016, 18: 6.6863, 19: 4.9867}))
Row(tfidf=SparseVector(20, {0: 5.2019, 1: 7.798, 2: 6.016, 3: 5.0696, 4: 4.5851, 5: 6.8882, 6: 6.7898, 7: 8.224, 8: 5.3395, 9: 3.9809, 10: 6.0769, 11: 6.5687, 12: 4.8573, 13: 2.8117, 14: 4.9841, 15: 5.0273, 16: 6.5415, 17: 5.5459, 18: 5.8107, 19: 7.0527}))
Row(tfidf=SparseVector(20, {0: 0.3963, 1: 0.4925, 2: 0.102, 3: 0.234, 4: 0.1965, 5: 0.2222, 6: 0.3858, 7: 0.1869, 8: 0.3725, 9: 0.0686, 10: 0.2272, 11: 0.2663, 12: 0.3801, 13: 0.1834, 15: 0.5292, 16: 0.5233, 17: 0.1232, 18: 0.398, 19: 0.2137}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.1642, 3: 0.234, 10: 0.1136, 13: 0.0306, 18: 0.0796}))
Row(tfidf=SparseVector(20, {0: 1.8331, 1: 1.4775, 2: 1.9373, 3: 1.6379, 4: 1.1135, 5: 1.4073, 6: 1.3117, 7: 2.2429, 8: 0.8692, 9: 0.9609, 10: 1

Row(tfidf=SparseVector(20, {0: 0.5945, 1: 0.4925, 2: 0.102, 3: 1.0919, 4: 0.9825, 5: 0.6666, 6: 0.8487, 7: 0.4673, 8: 0.7451, 9: 0.6177, 10: 0.7383, 11: 0.9764, 12: 1.3516, 13: 0.6112, 14: 0.0859, 15: 0.7276, 16: 0.785, 17: 0.3081, 18: 0.6368, 19: 0.7124}))
Row(tfidf=SparseVector(20, {0: 0.1486, 1: 0.0821, 3: 0.078, 5: 0.0741, 7: 0.1869, 8: 0.1242, 9: 0.3432, 10: 0.2272, 11: 0.0888, 12: 0.0845, 13: 0.0917, 14: 0.1719, 15: 0.1323, 16: 0.0872, 17: 0.1232, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.6936, 1: 0.7388, 2: 0.5098, 3: 0.468, 4: 0.786, 5: 0.2222, 6: 1.3888, 7: 1.1215, 8: 0.8071, 9: 0.4118, 10: 0.3976, 11: 0.7101, 12: 0.6336, 13: 0.489, 14: 0.3437, 15: 0.8599, 16: 0.6978, 17: 0.6778, 18: 0.3184, 19: 0.7836}))
Row(tfidf=SparseVector(20, {0: 0.1486, 3: 0.078, 5: 0.0741, 6: 0.0772, 7: 0.2804, 10: 0.0568, 11: 0.0888, 12: 0.0422, 13: 0.0917, 14: 0.0859, 15: 0.2646, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.4954, 1: 0.985, 2: 1.1216, 3: 0.468, 4: 0.393, 5: 0.2963, 6: 1.1573, 7

Row(tfidf=SparseVector(20, {0: 0.7927, 1: 0.4104, 2: 1.3256, 3: 1.6379, 4: 0.786, 5: 0.5185, 6: 0.6944, 7: 0.5607, 8: 0.8692, 9: 0.755, 10: 1.1359, 11: 0.9764, 12: 1.774, 13: 0.7641, 14: 1.289, 15: 0.6615, 16: 1.1339, 17: 1.4789, 18: 0.8756, 19: 1.2111}))
Row(tfidf=SparseVector(20, {0: 21.5508, 1: 22.9014, 2: 26.2052, 3: 24.9582, 4: 26.8553, 5: 28.0712, 6: 31.0941, 7: 40.3724, 8: 25.518, 9: 20.1792, 10: 29.9871, 11: 26.0083, 12: 34.4656, 13: 13.1417, 14: 23.4598, 15: 27.9148, 16: 32.4458, 17: 25.3265, 18: 25.3125, 19: 28.2108}))
Row(tfidf=SparseVector(20, {0: 11.6424, 1: 12.0664, 2: 16.0086, 3: 13.649, 4: 14.4757, 5: 13.9986, 6: 15.7399, 7: 14.0182, 8: 15.3357, 9: 12.6978, 10: 15.9022, 11: 16.8655, 12: 12.7979, 13: 8.5574, 14: 14.8665, 15: 12.1052, 16: 15.6123, 17: 17.9319, 18: 12.895, 19: 15.2452}))
Row(tfidf=SparseVector(20, {0: 16.448, 1: 31.8486, 2: 20.699, 3: 23.0863, 4: 21.2222, 5: 19.3314, 6: 26.619, 7: 18.8778, 8: 18.1296, 9: 16.5414, 10: 25.046, 11: 22.1914, 12: 18.331, 13: 11

Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.8208, 2: 0.4079, 3: 0.7019, 4: 0.131, 5: 0.8147, 6: 0.6173, 7: 0.8411, 8: 0.7451, 9: 0.755, 10: 0.7951, 11: 1.154, 12: 0.4224, 13: 0.4279, 14: 0.8593, 15: 0.6615, 16: 0.6105, 17: 0.9243, 18: 0.8756, 19: 0.6412}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.1642, 2: 0.3059, 3: 0.078, 4: 0.262, 5: 0.1481, 6: 0.2315, 7: 0.0935, 8: 0.0621, 9: 0.0686, 12: 0.1267, 13: 0.0611, 15: 0.1323, 16: 0.0872, 17: 0.0616, 18: 0.0796, 19: 0.2137}))
Row(tfidf=SparseVector(20, {0: 0.1486, 2: 0.3059, 3: 0.156, 4: 0.524, 5: 0.1481, 6: 0.0772, 7: 0.4673, 8: 0.2484, 9: 0.2745, 10: 0.1704, 11: 0.1775, 12: 0.2112, 13: 0.1528, 14: 0.1719, 15: 0.2646, 16: 0.5233, 17: 0.1849, 18: 0.2388, 19: 0.2137}))
Row(tfidf=SparseVector(20, {1: 0.0821, 2: 0.102, 4: 0.0655, 5: 0.0741, 6: 0.0772, 7: 0.3738, 9: 0.2059, 10: 0.1136, 11: 0.1775, 12: 0.3379, 13: 0.0611, 15: 0.0661, 16: 0.1744, 17: 0.0616, 18: 0.0796}))
Row(tfidf=SparseVector(20, {1: 0.2463, 2: 0.102, 4: 0.131, 6: 0.2315, 7:

Row(tfidf=SparseVector(20, {0: 7.7286, 1: 7.1413, 2: 13.4595, 3: 10.8412, 4: 9.5631, 5: 8.5917, 6: 13.5024, 7: 14.7658, 8: 9.189, 9: 9.815, 10: 9.087, 11: 12.0721, 12: 7.5182, 13: 4.9816, 14: 9.7105, 15: 9.5254, 16: 7.8498, 17: 8.9351, 18: 10.1091, 19: 10.8284}))
Row(tfidf=SparseVector(20, {0: 24.0279, 1: 38.2512, 2: 42.0099, 3: 38.2952, 4: 36.8769, 5: 37.6998, 6: 43.7478, 7: 52.5215, 8: 31.168, 9: 33.8378, 10: 36.5183, 11: 37.0153, 12: 33.1985, 13: 17.726, 14: 40.4746, 15: 38.4986, 16: 35.4112, 17: 40.9167, 18: 39.3219, 19: 39.1816}))
Row(tfidf=SparseVector(20, {0: 7.0845, 1: 7.8801, 2: 12.4398, 3: 9.3593, 4: 9.0391, 5: 10.1471, 6: 9.4903, 7: 10.7473, 8: 7.6368, 9: 8.8541, 10: 10.5068, 11: 11.0069, 12: 7.8561, 13: 7.4266, 14: 7.8199, 15: 9.8562, 16: 10.5536, 17: 11.4616, 18: 10.5071, 19: 10.1872}))
Row(tfidf=SparseVector(20, {0: 0.9908, 1: 0.6567, 2: 0.4079, 3: 1.0919, 4: 0.524, 5: 0.9629, 6: 1.2345, 7: 1.2149, 8: 0.683, 9: 1.0295, 10: 0.4543, 11: 1.3315, 12: 0.718, 13: 0.4279, 14: 1.

Row(tfidf=SparseVector(20, {0: 0.1486, 1: 0.1642, 2: 0.2039, 3: 0.234, 4: 0.131, 5: 0.5925, 6: 0.4629, 7: 0.4673, 8: 0.1863, 9: 0.2059, 10: 0.0568, 11: 0.1775, 12: 0.2112, 13: 0.1222, 14: 0.2578, 15: 0.1984, 16: 0.1744, 17: 0.0616, 18: 0.1592, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 2.0312, 1: 3.2834, 2: 3.3649, 3: 3.5097, 4: 3.537, 5: 3.6293, 6: 2.7776, 7: 3.9251, 8: 3.0423, 9: 2.1964, 10: 3.4076, 11: 3.018, 12: 2.9566, 13: 1.467, 14: 3.9529, 15: 2.3152, 16: 3.3144, 17: 3.4508, 18: 4.2187, 19: 2.6359}))
Row(tfidf=SparseVector(20, {0: 0.2973, 1: 0.4104, 2: 0.3059, 3: 0.078, 4: 0.1965, 5: 0.2222, 6: 0.3086, 7: 0.1869, 8: 0.1242, 9: 0.2059, 10: 0.284, 11: 0.1775, 12: 0.2112, 13: 0.1222, 15: 0.1323, 16: 0.1744, 17: 0.4314, 18: 0.9552, 19: 0.3562}))
Row(tfidf=SparseVector(20, {1: 0.0821, 3: 0.078, 6: 0.0772, 9: 0.2059, 12: 0.0845, 13: 0.0611, 15: 0.1323, 16: 0.0872}))
Row(tfidf=SparseVector(20, {0: 1.2386, 1: 1.1492, 2: 1.4275, 3: 1.0139, 4: 1.441, 5: 1.9257, 6: 1.2345, 7: 1.7756, 8: 

Row(tfidf=SparseVector(20, {0: 5.7964, 1: 6.7309, 2: 9.4828, 3: 7.5655, 4: 6.4846, 5: 8.888, 6: 9.5674, 7: 9.0651, 8: 6.2709, 9: 9.4032, 10: 8.803, 11: 9.7642, 12: 7.1803, 13: 4.8594, 14: 8.5074, 15: 10.5176, 16: 8.4603, 17: 7.4562, 18: 8.8355, 19: 9.4036}))
Row(tfidf=SparseVector(20, {0: 21.4517, 1: 28.8936, 2: 37.9312, 3: 30.1838, 4: 30.5233, 5: 42.366, 6: 30.4768, 7: 26.8215, 8: 24.9593, 9: 25.121, 10: 25.2164, 11: 27.5173, 12: 32.5227, 13: 18.8262, 14: 27.3268, 15: 31.8175, 16: 30.6141, 17: 29.0238, 18: 35.1828, 19: 35.0497}))
Row(tfidf=SparseVector(20, {0: 6.4405, 1: 10.3426, 2: 8.6671, 3: 8.9693, 4: 13.5587, 5: 9.999, 6: 10.8019, 7: 12.9902, 8: 9.3752, 9: 8.2364, 10: 9.7117, 11: 11.4508, 12: 11.2773, 13: 6.2652, 14: 12.7181, 15: 13.8912, 16: 8.6348, 17: 8.2573, 18: 9.4723, 19: 10.7571}))
Row(tfidf=SparseVector(20, {0: 5.7469, 1: 6.7309, 2: 11.2162, 3: 14.6629, 4: 11.3971, 5: 10.7396, 6: 8.9502, 7: 9.1586, 8: 9.8719, 9: 7.6873, 10: 8.6894, 11: 9.3204, 12: 8.9543, 13: 3.7591, 14: 9

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Row(tfidf=SparseVector(20, {0: 27.0995, 1: 37.4303, 2: 31.5074, 3: 27.922, 4: 29.6718, 5: 39.1812, 6: 39.5813, 7: 29.9055, 8: 27.8774, 9: 26.9056, 10: 32.5996, 11: 34.5298, 12: 31.8469, 13: 21.3018, 14: 35.3186, 15: 28.6424, 16: 36.5451, 17: 38.6367, 18: 35.6604, 19: 34.2661}))
Row(tfidf=SparseVector(20, {0: 4.657, 1: 4.5146, 2: 7.0356, 3: 4.2897, 4: 5.4366, 5: 5.6291, 6: 6.1725, 7: 6.2615, 8: 6.0846, 9: 4.3241, 10: 4.8843, 11: 5.1484, 12: 4.3082, 13: 3.6674, 14: 4.3826, 15: 6.2841, 16: 5.5821, 17: 5.7308, 18: 6.8455, 19: 5.4142}))
Row(tfidf=SparseVector(20, {0: 10.5029, 1: 20.7673, 2: 19.0676, 3: 18.0947, 4: 18.6022, 5: 18.6648, 6: 17.2831, 7: 17.5695, 8: 17.8813, 9: 13.1782, 10: 13.8577, 11: 18.3745, 12: 18.9223, 13: 8.9241, 14: 20.4521, 15: 17.5294, 16: 16.9206, 17: 15.7751, 18: 23.0837, 19: 18.9496}))
Row(tfidf=SparseVector(20, {0: 0.0991, 1: 0.2463, 2: 0.102, 3: 0.156, 4: 0.3275, 5: 0.0741, 6: 0.2315, 7: 0.2804, 8: 0.1863, 9: 0.2745, 10: 0.5679, 11: 0.4438, 12: 0.1267, 13: 0.0611

Row(tfidf=SparseVector(20, {0: 0.0495, 2: 0.102, 3: 0.078, 4: 0.131, 5: 0.1481, 6: 0.0772, 9: 0.1373, 10: 0.0568, 13: 0.0306, 14: 0.0859, 16: 0.0872}))
Row(tfidf=SparseVector(20, {0: 0.2477, 2: 0.2039, 3: 0.39, 4: 0.0655, 5: 0.1481, 6: 0.0772, 7: 0.0935, 8: 0.3104, 9: 0.2745, 10: 0.0568, 11: 0.0888, 12: 0.4224, 13: 0.1834, 14: 0.2578, 15: 0.1323, 16: 0.2617, 17: 0.3081, 18: 0.1592, 19: 0.3562}))
Row(tfidf=SparseVector(20, {0: 0.1486, 1: 0.3283, 2: 0.4079, 3: 0.234, 4: 0.131, 5: 0.2222, 6: 0.3858, 7: 0.2804, 8: 0.3104, 9: 0.4118, 10: 0.2272, 11: 0.3551, 12: 0.2112, 13: 0.2751, 14: 0.2578, 15: 0.1984, 17: 0.3697, 18: 0.1592, 19: 0.2137}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.7388, 2: 0.2039, 3: 0.39, 4: 0.3275, 5: 0.6666, 6: 0.8487, 7: 0.4673, 8: 0.3725, 9: 0.2059, 10: 0.3408, 11: 0.3551, 12: 0.4224, 13: 0.2139, 14: 0.7734, 15: 0.3307, 16: 0.0872, 17: 0.6162, 18: 0.4776, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.0821, 2: 0.2039, 3: 0.078, 4: 0.0655, 5: 0.0741, 6: 0

Row(tfidf=SparseVector(20, {0: 0.3963, 1: 0.3283, 2: 0.3059, 3: 0.39, 4: 0.7205, 5: 0.5185, 6: 0.5401, 7: 0.2804, 8: 0.4967, 9: 0.4118, 10: 0.5111, 11: 0.0888, 12: 0.3379, 13: 0.3362, 14: 0.5156, 15: 0.6615, 16: 0.2617, 17: 0.5546, 18: 0.4776, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 5.2515, 1: 9.6859, 2: 6.8317, 3: 5.8496, 4: 8.1221, 5: 7.4066, 6: 8.6415, 7: 9.1586, 8: 11.7346, 9: 7.7559, 10: 9.371, 11: 7.5451, 12: 7.8561, 13: 4.7677, 14: 9.3667, 15: 10.1208, 16: 6.4543, 17: 8.9351, 18: 7.6415, 19: 9.7598}))
Row(tfidf=SparseVector(20, {0: 0.644, 1: 0.4925, 2: 0.8157, 3: 0.624, 4: 0.7205, 5: 1.0369, 6: 1.003, 7: 2.2429, 8: 1.1176, 9: 0.5491, 10: 0.5111, 11: 0.6214, 12: 1.3516, 13: 0.6418, 14: 1.0312, 15: 0.8599, 16: 1.3083, 17: 0.6162, 18: 0.8756, 19: 0.7836}))
Row(tfidf=SparseVector(20, {0: 36.8593, 1: 50.3996, 2: 73.0074, 3: 46.4846, 4: 52.4661, 5: 54.2907, 6: 55.3213, 7: 55.7924, 8: 70.6558, 9: 55.0466, 10: 54.2379, 11: 54.7684, 12: 65.3833, 13: 33.2821, 14: 62.5595, 15: 59.4016

Row(tfidf=SparseVector(20, {0: 2.6257, 1: 4.843, 2: 6.3219, 3: 2.5738, 4: 4.2575, 5: 3.2589, 6: 3.472, 7: 3.1775, 8: 3.4148, 9: 3.02, 10: 2.9533, 11: 3.6394, 12: 3.379, 13: 2.2005, 14: 3.867, 15: 3.9689, 16: 3.4888, 17: 3.3892, 18: 3.98, 19: 5.5567}))
Row(tfidf=SparseVector(20, {0: 5.2515, 1: 7.0592, 2: 6.3219, 3: 8.1114, 4: 6.1571, 5: 6.9622, 6: 7.0212, 7: 8.0371, 8: 6.6434, 9: 6.1087, 10: 6.4745, 11: 6.3911, 12: 6.4623, 13: 3.423, 14: 6.445, 15: 5.358, 16: 7.3265, 17: 6.0389, 18: 5.3331, 19: 7.4801}))
Row(tfidf=SparseVector(20, {0: 2.7248, 1: 3.1192, 2: 4.3845, 3: 3.5877, 4: 3.8645, 5: 4.2959, 6: 4.8609, 7: 2.6167, 8: 2.6077, 9: 3.02, 10: 2.8965, 11: 3.1956, 12: 2.6187, 13: 1.8643, 14: 3.6092, 15: 4.1674, 16: 4.7099, 17: 3.3276, 18: 3.0248, 19: 2.992}))
Row(tfidf=SparseVector(20, {0: 0.644, 1: 0.1642, 2: 0.9177, 3: 1.1699, 4: 0.7205, 5: 0.8888, 6: 0.8487, 7: 0.5607, 8: 0.0621, 9: 0.755, 10: 0.3408, 11: 0.8877, 12: 1.2249, 13: 0.489, 14: 0.5156, 15: 0.7276, 16: 1.1339, 17: 0.6162, 18:

Row(tfidf=SparseVector(20, {0: 0.0991, 1: 0.0821, 2: 0.102, 7: 0.0935, 10: 0.1136, 13: 0.0306, 14: 0.0859, 17: 0.0616, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.5746, 2: 0.5098, 3: 0.468, 4: 0.1965, 5: 0.3703, 6: 0.3086, 7: 0.4673, 8: 0.3104, 9: 0.0686, 10: 0.2272, 11: 0.1775, 12: 0.6758, 13: 0.3362, 14: 0.1719, 15: 0.3307, 16: 0.6105, 17: 0.3081, 18: 0.398, 19: 0.8549}))
Row(tfidf=SparseVector(20, {0: 3.3689, 1: 3.1192, 2: 3.1609, 3: 3.5097, 4: 3.406, 5: 3.1108, 6: 3.6264, 7: 2.6167, 8: 2.2352, 9: 2.1964, 10: 2.8965, 11: 2.8405, 12: 4.815, 13: 1.9254, 14: 2.7499, 15: 3.9028, 16: 3.8377, 17: 3.0811, 18: 2.5472, 19: 2.992}))
Row(tfidf=SparseVector(20, {0: 0.545, 1: 0.4104, 2: 0.5098, 3: 0.7019, 4: 0.393, 5: 0.5185, 6: 0.3858, 7: 0.2804, 8: 0.1242, 9: 0.6177, 10: 0.4543, 11: 0.6214, 12: 0.1267, 13: 0.3973, 14: 0.3437, 15: 0.463, 16: 0.8722, 17: 0.6778, 18: 0.398, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 2.6257, 1: 3.94, 2: 4.5885, 3: 6.3175, 4: 4.5851, 5: 4.8143, 6: 6

Row(tfidf=SparseVector(20, {0: 1.0899, 1: 1.6417, 2: 1.2236, 3: 1.8719, 4: 2.489, 5: 1.9257, 6: 2.3147, 7: 1.3084, 8: 1.4901, 9: 1.2355, 10: 1.9878, 11: 1.9528, 12: 2.7032, 13: 0.8557, 14: 1.9765, 15: 1.786, 16: 3.4016, 17: 2.0335, 18: 2.2288, 19: 1.9235}))
Row(tfidf=SparseVector(20, {0: 0.0991, 1: 0.1642, 2: 0.2039, 3: 0.234, 4: 0.262, 5: 0.2963, 6: 0.1543, 8: 0.3104, 9: 0.0686, 10: 0.2272, 11: 0.2663, 12: 0.718, 13: 0.1834, 15: 0.3969, 16: 0.3489, 17: 0.2465, 18: 0.3184, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.1642, 2: 0.3059, 3: 0.156, 4: 0.131, 5: 0.0741, 6: 0.0772, 7: 0.0935, 8: 0.1863, 9: 0.0686, 10: 0.1136, 11: 0.2663, 12: 0.1267, 13: 0.1222, 14: 0.3437, 15: 0.1323, 16: 0.4361, 17: 0.1849, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.2463, 2: 0.4079, 3: 0.312, 4: 0.655, 5: 0.2963, 6: 0.4629, 7: 0.2804, 8: 0.3725, 9: 0.6177, 10: 0.1704, 11: 0.4438, 12: 0.0422, 13: 0.0306, 14: 0.4297, 15: 0.2646, 16: 0.3489, 17: 0.4314, 18: 0.8756, 19: 0.5699}))
Row(

Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.4104, 2: 0.2039, 3: 0.156, 4: 0.1965, 5: 0.0741, 6: 0.1543, 7: 0.0935, 8: 0.0621, 9: 0.3432, 10: 0.1136, 11: 0.0888, 12: 0.2957, 13: 0.2139, 15: 0.1984, 18: 0.2388, 19: 0.0712}))
Row(tfidf=SparseVector(20, {4: 0.0655, 8: 0.1863, 9: 0.0686, 10: 0.1704, 11: 0.0888, 12: 0.0422, 13: 0.0306, 14: 0.0859, 16: 0.2617, 17: 0.1849, 18: 0.0796}))
Row(tfidf=SparseVector(20, {0: 0.0495, 6: 0.0772, 8: 0.0621, 9: 0.1373, 12: 0.0422, 13: 0.0611, 14: 0.0859}))
Row(tfidf=SparseVector(20, {0: 0.1486, 1: 0.2463, 2: 0.5098, 3: 0.312, 4: 0.0655, 5: 0.2963, 6: 0.3858, 7: 0.4673, 8: 0.1242, 9: 0.4118, 10: 0.284, 11: 0.3551, 12: 0.0422, 13: 0.2139, 14: 0.3437, 15: 0.0661, 16: 0.4361, 17: 0.2465, 18: 0.3184, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.2463, 2: 0.3059, 3: 0.39, 4: 0.1965, 5: 0.2963, 6: 0.2315, 7: 0.4673, 8: 0.683, 9: 0.2059, 10: 0.0568, 11: 0.2663, 12: 0.3801, 13: 0.0917, 14: 0.3437, 15: 0.3307, 16: 1.0466, 17: 0.3081, 18: 0.3184, 19: 0.2

Row(tfidf=SparseVector(20, {0: 1.6844, 1: 2.1342, 2: 2.3452, 3: 2.3398, 4: 2.358, 5: 2.222, 6: 2.0061, 7: 2.7102, 8: 1.428, 9: 3.5005, 10: 1.7606, 11: 2.4854, 12: 1.7317, 13: 1.1308, 14: 2.3202, 15: 2.0506, 16: 3.0527, 17: 1.7254, 18: 1.99, 19: 3.277}))
Row(tfidf=SparseVector(20, {0: 10.5525, 1: 17.4839, 2: 14.3772, 3: 15.2089, 4: 16.8992, 5: 16.2206, 6: 17.2059, 7: 15.8873, 8: 14.2181, 9: 15.0314, 10: 15.5047, 11: 18.552, 12: 13.9383, 13: 6.8153, 14: 18.9053, 15: 15.0819, 16: 25.0321, 17: 14.7276, 18: 17.5914, 19: 18.166}))
Row(tfidf=SparseVector(20, {0: 8.3231, 1: 15.5139, 2: 17.6401, 3: 19.7326, 4: 15.6547, 5: 12.4432, 6: 16.5887, 7: 12.7098, 8: 11.3, 9: 14.6196, 10: 12.381, 11: 13.3148, 12: 15.2054, 13: 8.1906, 14: 11.2573, 15: 14.0235, 16: 14.304, 17: 9.4281, 18: 14.7258, 19: 15.8864}))
Row(tfidf=SparseVector(20, {0: 0.1982, 1: 0.8208, 2: 0.7138, 3: 0.546, 4: 0.655, 5: 0.6666, 6: 0.5401, 7: 0.8411, 8: 0.683, 9: 0.6864, 10: 0.5111, 11: 0.7101, 12: 0.718, 13: 0.2751, 14: 0.6015, 15:

Row(tfidf=SparseVector(20, {0: 3.1212, 1: 4.5967, 2: 4.6904, 3: 5.7716, 4: 4.9126, 5: 4.2218, 6: 7.407, 7: 7.1026, 8: 3.9115, 9: 3.7064, 10: 5.509, 11: 6.2136, 12: 4.097, 13: 2.8728, 14: 5.4997, 15: 8.004, 16: 5.5821, 17: 5.3611, 18: 4.0595, 19: 4.3456}))
Row(tfidf=SparseVector(20, {0: 0.6936, 1: 0.9029, 2: 1.3256, 3: 0.9359, 4: 1.048, 5: 1.0369, 6: 1.1573, 7: 0.8411, 8: 0.683, 9: 1.0982, 10: 0.6815, 11: 1.4203, 12: 1.0137, 13: 0.8252, 14: 1.3749, 15: 0.3969, 16: 0.4361, 17: 0.9243, 18: 0.796, 19: 0.7836}))
Row(tfidf=SparseVector(20, {0: 0.3963, 1: 0.6567, 2: 0.7138, 3: 0.39, 4: 0.4585, 5: 0.7407, 6: 0.4629, 7: 1.028, 8: 0.8692, 9: 0.755, 10: 0.5111, 11: 0.7989, 12: 0.4646, 13: 0.2751, 14: 0.3437, 15: 0.3969, 16: 1.57, 17: 0.3697, 18: 0.7164, 19: 0.8549}))
Row(tfidf=SparseVector(20, {0: 0.6936, 1: 0.9029, 2: 2.3452, 3: 0.468, 4: 1.572, 5: 0.9629, 6: 1.6974, 7: 1.5887, 8: 0.7451, 9: 0.6177, 10: 1.1927, 11: 1.509, 12: 0.887, 13: 1.0391, 14: 1.4609, 15: 1.0584, 16: 1.3083, 17: 1.3557, 18:

Row(tfidf=SparseVector(20, {0: 0.1486, 1: 0.2463, 3: 0.078, 4: 0.0655, 5: 0.1481, 6: 0.1543, 7: 0.0935, 8: 0.1242, 9: 0.1373, 10: 0.1704, 11: 0.0888, 12: 0.0422, 13: 0.1222, 14: 0.2578, 15: 0.1984, 16: 0.1744, 17: 0.0616, 18: 0.1592, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 2.4276, 1: 3.7759, 2: 2.855, 3: 3.6657, 4: 2.0305, 5: 2.222, 6: 3.3177, 7: 4.2989, 8: 2.7939, 9: 2.8827, 10: 3.6916, 11: 3.1956, 12: 3.2945, 13: 1.8337, 14: 2.6639, 15: 3.9028, 16: 3.576, 17: 3.9438, 18: 3.9004, 19: 2.992}))
Row(tfidf=SparseVector(20, {0: 19.8168, 1: 24.1327, 2: 37.8293, 3: 27.766, 4: 35.3704, 5: 27.7009, 6: 35.0291, 7: 28.2233, 8: 26.0147, 9: 21.0028, 10: 25.5004, 11: 30.0028, 12: 28.8903, 13: 14.6087, 14: 27.069, 15: 31.0238, 16: 27.9975, 17: 26.867, 18: 35.2624, 19: 28.4245}))
Row(tfidf=SparseVector(20, {0: 0.0991, 3: 0.312, 4: 0.3275, 5: 0.1481, 6: 0.0772, 8: 0.0621, 11: 0.2663, 13: 0.1222, 14: 0.5156, 15: 0.1984, 16: 0.1744, 17: 0.3081, 18: 0.1592, 19: 0.0712}))
Row(tfidf=SparseVector(20, {

Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.0821, 2: 0.3059, 3: 0.078, 4: 0.131, 5: 0.2222, 6: 0.1543, 7: 0.1869, 8: 0.1242, 9: 0.1373, 11: 0.0888, 12: 0.0845, 13: 0.1222, 14: 0.0859, 15: 0.1984, 17: 0.1232, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.0495, 3: 0.312, 4: 0.131, 5: 0.1481, 11: 0.0888, 13: 0.0306, 15: 0.0661, 16: 0.0872, 17: 0.1849, 18: 0.1592}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.1642, 3: 0.078, 4: 0.1965, 5: 0.0741, 6: 0.1543, 7: 0.0935, 9: 0.2059, 11: 0.0888, 13: 0.0611, 14: 0.1719, 15: 0.0661, 17: 0.0616, 18: 0.1592, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.1642, 3: 0.234, 5: 0.1481, 7: 0.0935, 8: 0.0621, 9: 0.0686, 11: 0.0888, 12: 0.1267, 13: 0.0306, 15: 0.0661, 16: 0.0872, 17: 0.1849, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.7431, 1: 0.6567, 2: 0.5098, 3: 1.0139, 4: 1.2445, 5: 0.9629, 6: 0.9259, 7: 1.028, 8: 0.8071, 9: 0.755, 10: 0.5679, 11: 0.8877, 12: 1.0559, 13: 0.9474, 14: 1.0312, 15: 0.6615, 16: 1.0466, 17: 0.5546, 18: 0.7164,

Row(tfidf=SparseVector(20, {0: 0.2973, 1: 0.9029, 2: 0.5098, 3: 0.546, 4: 0.3275, 5: 0.2963, 6: 0.3086, 7: 0.4673, 8: 0.8071, 9: 0.0686, 10: 0.3976, 11: 0.0888, 12: 0.6758, 13: 0.1528, 14: 0.4297, 15: 0.1984, 16: 0.2617, 17: 1.2941, 18: 0.6368, 19: 0.7836}))
Row(tfidf=SparseVector(20, {0: 2.7248, 1: 3.6938, 2: 3.059, 3: 4.1337, 4: 2.0305, 5: 2.5183, 6: 3.2406, 7: 2.6167, 8: 3.2286, 9: 2.6768, 10: 3.9756, 11: 2.9293, 12: 3.3367, 13: 2.2005, 14: 2.4921, 15: 2.8444, 16: 2.5294, 17: 3.6357, 18: 3.0248, 19: 2.4221}))
Row(tfidf=SparseVector(20, {0: 0.8918, 1: 1.4775, 2: 1.8354, 3: 1.6379, 4: 1.5065, 5: 0.6666, 6: 1.7746, 7: 1.3084, 8: 1.3659, 9: 0.9609, 10: 2.0446, 11: 1.2427, 12: 0.887, 13: 0.5807, 14: 1.1171, 15: 1.1245, 16: 1.1339, 17: 0.9859, 18: 1.5124, 19: 1.6385}))
Row(tfidf=SparseVector(20, {0: 0.3468, 1: 0.1642, 2: 0.2039, 3: 0.234, 4: 0.1965, 5: 0.1481, 6: 0.1543, 7: 0.2804, 9: 0.3432, 10: 0.2272, 11: 0.3551, 12: 0.3379, 13: 0.0611, 14: 0.0859, 15: 0.0661, 16: 0.1744, 17: 0.3081, 1

Row(tfidf=SparseVector(20, {0: 2.5266, 1: 5.3355, 2: 5.5061, 3: 3.1198, 4: 2.62, 5: 3.333, 6: 3.8578, 7: 5.6073, 8: 3.3527, 9: 3.1573, 10: 2.7829, 11: 4.9709, 12: 2.5342, 13: 1.6504, 14: 3.0077, 15: 2.7782, 16: 4.4482, 17: 3.8205, 18: 3.1044, 19: 4.1319}))
Row(tfidf=SparseVector(20, {0: 2.6753, 1: 3.6938, 2: 4.9963, 3: 3.8217, 4: 3.668, 5: 4.1477, 6: 4.0121, 7: 4.4858, 8: 4.1599, 9: 2.7455, 10: 4.3163, 11: 5.1484, 12: 2.4498, 13: 2.8423, 14: 3.9529, 15: 4.3658, 16: 4.7971, 17: 3.8822, 18: 5.0943, 19: 6.0553}))
Row(tfidf=SparseVector(20, {0: 14.7635, 1: 20.2748, 2: 20.801, 3: 17.0808, 4: 18.4057, 5: 18.7388, 6: 20.4465, 7: 17.0088, 8: 15.5219, 9: 17.0905, 10: 19.1963, 11: 17.8419, 12: 15.5433, 13: 9.9327, 14: 15.6399, 15: 19.7123, 16: 20.1478, 17: 17.5005, 18: 18.1486, 19: 19.0209}))
Row(tfidf=SparseVector(20, {0: 0.8918, 1: 0.8208, 2: 1.0197, 3: 0.39, 4: 0.786, 5: 0.8147, 6: 0.8487, 7: 0.8411, 8: 0.6209, 9: 0.6864, 10: 0.8519, 11: 0.9764, 12: 0.8025, 13: 0.3362, 14: 0.8593, 15: 1.3891,

Row(tfidf=SparseVector(20, {0: 3.1707, 1: 3.2834, 2: 4.4865, 3: 3.0418, 4: 4.192, 5: 4.3699, 6: 4.6294, 7: 4.3924, 8: 2.4835, 9: 3.5005, 10: 4.4299, 11: 5.9473, 12: 3.9281, 13: 2.1699, 14: 3.7811, 15: 4.2335, 16: 3.4888, 17: 3.5124, 18: 3.4228, 19: 4.8443}))
Row(tfidf=SparseVector(20, {0: 2.378, 1: 2.2163, 2: 3.4668, 3: 4.2897, 4: 2.882, 5: 3.7774, 6: 4.6294, 7: 2.5233, 8: 2.7319, 9: 2.5396, 10: 3.9188, 11: 3.1956, 12: 4.8573, 13: 1.6504, 14: 3.7811, 15: 2.6459, 16: 4.8843, 17: 2.8962, 18: 2.7064, 19: 3.7044}))
Row(tfidf=SparseVector(20, {0: 17.092, 1: 19.536, 2: 21.3108, 3: 21.0585, 4: 25.0868, 5: 29.1822, 6: 22.4526, 7: 29.6251, 8: 19.7439, 9: 20.1792, 10: 26.5794, 11: 30.5354, 12: 20.7385, 13: 14.9449, 14: 20.0225, 15: 23.4828, 16: 18.665, 17: 25.3265, 18: 27.6209, 19: 25.005}))
Row(tfidf=SparseVector(20, {0: 3.8147, 1: 3.7759, 2: 3.8747, 3: 3.0418, 4: 2.5545, 5: 3.7774, 6: 5.3238, 7: 3.6447, 8: 2.9181, 9: 3.2259, 10: 4.0323, 11: 4.172, 12: 3.8858, 13: 2.445, 14: 3.5233, 15: 3.4397,

Row(tfidf=SparseVector(20, {0: 0.644, 1: 1.3133, 2: 0.8157, 3: 0.156, 4: 0.524, 5: 0.8147, 6: 0.7716, 7: 0.3738, 8: 0.4967, 9: 0.5491, 10: 0.284, 11: 0.7989, 12: 1.3094, 13: 0.3056, 14: 0.2578, 15: 0.5292, 16: 0.6105, 17: 0.5546, 18: 0.398, 19: 0.4274}))
Row(tfidf=SparseVector(20, {0: 0.0495, 1: 0.0821, 2: 0.2039, 3: 0.234, 4: 0.1965, 5: 0.2963, 6: 0.1543, 7: 0.0935, 11: 0.1775, 12: 0.1267, 13: 0.0917, 15: 0.1323, 17: 0.1232, 18: 0.0796, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 1.4863, 1: 1.3133, 2: 1.3256, 3: 1.4819, 4: 1.441, 5: 1.4813, 6: 1.6203, 7: 1.4953, 8: 1.4901, 9: 1.5786, 10: 1.8174, 11: 1.6865, 12: 0.5913, 13: 0.6112, 14: 1.8905, 15: 1.4553, 16: 2.5294, 17: 1.6022, 18: 1.7512, 19: 1.2111}))
Row(tfidf=SparseVector(20, {0: 0.2973, 1: 0.1642, 2: 0.2039, 3: 0.468, 4: 0.3275, 5: 0.3703, 6: 0.5401, 7: 0.5607, 8: 0.3725, 9: 0.4805, 10: 0.3976, 11: 0.2663, 12: 1.0982, 13: 0.4584, 14: 0.7734, 15: 0.5953, 16: 0.3489, 17: 0.4314, 18: 0.9552, 19: 0.4987}))
Row(tfidf=SparseVector(20,

Row(tfidf=SparseVector(20, {0: 0.6936, 1: 0.4925, 2: 0.4079, 3: 0.7019, 4: 0.8515, 5: 0.7407, 6: 1.003, 7: 0.9345, 8: 0.8071, 9: 0.6177, 10: 0.3408, 11: 0.7101, 12: 0.6758, 13: 0.5807, 14: 1.1171, 15: 0.6615, 16: 1.6572, 17: 1.1092, 18: 0.398, 19: 0.7124}))
Row(tfidf=SparseVector(20, {3: 0.156, 4: 0.0655, 6: 0.0772, 9: 0.1373, 10: 0.0568, 12: 0.1267, 13: 0.0611, 15: 0.1323, 16: 0.0872, 17: 0.1849, 18: 0.0796, 19: 0.0712}))
Row(tfidf=SparseVector(20, {0: 0.3963, 1: 0.6567, 2: 0.2039, 3: 0.39, 4: 0.4585, 5: 0.2963, 6: 0.3086, 7: 0.6542, 8: 0.2484, 9: 0.4805, 10: 0.6247, 11: 0.4438, 12: 0.887, 13: 0.2445, 14: 0.4297, 15: 0.6615, 16: 0.785, 17: 0.1849, 18: 0.0796, 19: 0.2137}))
Row(tfidf=SparseVector(20, {0: 0.4954, 1: 0.4104, 2: 0.9177, 3: 0.9359, 4: 0.393, 5: 0.9629, 6: 0.9259, 7: 0.5607, 8: 0.683, 9: 0.3432, 10: 0.5679, 11: 0.7101, 12: 0.718, 13: 0.5807, 14: 0.9453, 15: 0.6615, 16: 1.6572, 17: 0.4314, 18: 0.5572, 19: 0.7124}))
Row(tfidf=SparseVector(20, {0: 0.4954, 1: 0.6567, 2: 0.3059,

Row(tfidf=SparseVector(20, {0: 0.0495, 2: 0.102, 3: 0.078, 4: 0.0655, 5: 0.0741, 6: 0.0772, 7: 0.3738, 8: 0.0621, 9: 0.2745, 10: 0.1704, 12: 0.0422, 13: 0.1834, 14: 0.0859, 15: 0.0661, 16: 0.0872, 17: 0.1232, 18: 0.1592, 19: 0.1425}))
Row(tfidf=SparseVector(20, {0: 0.0991, 1: 0.1642, 2: 0.102, 3: 0.078, 4: 0.131, 6: 0.1543, 7: 0.2804, 8: 0.0621, 9: 0.2745, 10: 0.1704, 11: 0.0888, 12: 0.1689, 13: 0.2445, 14: 0.3437, 15: 0.0661, 17: 0.1849, 18: 0.1592, 19: 0.2137}))
Row(tfidf=SparseVector(20, {0: 0.5945, 1: 0.5746, 2: 0.2039, 3: 0.156, 4: 0.5895, 5: 0.5925, 6: 0.3858, 7: 0.9345, 8: 0.8692, 9: 0.6177, 10: 0.3408, 11: 0.5326, 12: 0.1689, 13: 0.4279, 14: 0.8593, 15: 0.1984, 16: 0.4361, 17: 0.1849, 18: 0.398, 19: 0.7836}))
Row(tfidf=SparseVector(20, {0: 0.0991, 4: 0.0655, 5: 0.0741, 6: 0.0772, 9: 0.0686, 13: 0.1222, 14: 0.2578, 15: 0.0661, 16: 0.0872, 18: 0.0796}))
Row(tfidf=SparseVector(20, {0: 0.0991, 2: 0.102, 5: 0.0741, 7: 0.0935, 8: 0.0621, 9: 0.0686, 10: 0.0568, 12: 0.0845, 13: 0.0611,

Row(tfidf=SparseVector(20, {2: 0.2039, 3: 0.234, 4: 0.0655, 13: 0.0611, 14: 0.0859, 15: 0.1323, 17: 0.0616, 18: 0.0796}))
Row(tfidf=SparseVector(20, {0: 8.1249, 1: 14.4468, 2: 13.6634, 3: 11.9331, 4: 16.1132, 5: 14.517, 6: 15.6628, 7: 16.1677, 8: 11.7346, 9: 11.6682, 10: 12.0403, 11: 13.5811, 12: 10.4326, 13: 7.61, 14: 14.0931, 15: 16.4049, 16: 16.5718, 17: 14.6043, 18: 12.6562, 19: 16.1001}))
Row(tfidf=SparseVector(20, {0: 0.5945, 1: 0.4104, 2: 0.7138, 3: 0.8579, 4: 1.31, 5: 0.8888, 6: 0.7716, 7: 1.1215, 8: 0.683, 9: 0.8236, 10: 0.7383, 11: 0.7101, 12: 0.3801, 13: 0.4584, 14: 1.1171, 15: 1.1245, 16: 0.9594, 17: 0.5546, 18: 1.3532, 19: 0.5699}))
Row(tfidf=SparseVector(20, {0: 1.4367, 1: 0.985, 2: 0.8157, 3: 1.0139, 4: 0.9825, 5: 1.0369, 6: 1.5431, 7: 0.8411, 8: 0.9934, 9: 0.9609, 10: 1.4198, 11: 2.3079, 12: 1.0982, 13: 0.6112, 14: 1.0312, 15: 1.1245, 16: 1.2211, 17: 1.6638, 18: 1.6716, 19: 1.7097}))
Row(tfidf=SparseVector(20, {0: 2.6753, 1: 3.0371, 2: 3.2629, 3: 4.5237, 4: 2.751, 5: 2.

Row(tfidf=SparseVector(20, {0: 9.958, 1: 12.1484, 2: 13.5614, 3: 13.103, 4: 13.8862, 5: 13.332, 6: 15.4313, 7: 13.2706, 8: 9.1269, 9: 11.7369, 10: 11.9835, 11: 16.1553, 12: 9.2077, 13: 6.3263, 14: 17.1867, 15: 15.6772, 16: 13.3446, 17: 17.8086, 18: 16.3178, 19: 13.108}))
Row(tfidf=SparseVector(20, {0: 24.4243, 1: 29.3861, 2: 30.8956, 3: 43.2089, 4: 29.0823, 5: 38.8849, 6: 29.7824, 7: 36.167, 8: 42.5922, 9: 32.1219, 10: 31.2933, 11: 35.5063, 12: 35.9439, 13: 25.9166, 14: 27.7565, 15: 43.0628, 16: 31.1375, 17: 37.8973, 18: 53.0925, 19: 34.4798}))
Row(tfidf=SparseVector(20, {0: 3.0221, 1: 1.8879, 2: 3.5688, 3: 4.0557, 4: 3.144, 5: 3.8515, 6: 3.0091, 7: 4.3924, 8: 2.2352, 9: 2.8141, 10: 3.2372, 11: 3.8169, 12: 2.9566, 13: 1.9254, 14: 4.5545, 15: 2.9105, 16: 3.6632, 17: 4.1286, 18: 3.9004, 19: 3.4907}))
Row(tfidf=SparseVector(20, {0: 36.9584, 1: 45.8029, 2: 64.8502, 3: 52.7242, 4: 49.9771, 5: 51.5503, 6: 55.5527, 7: 65.0445, 8: 54.4509, 9: 55.4584, 10: 82.8052, 11: 52.638, 12: 48.8685, 13: 