### Generate a recommendation model based on content filtering in python
### At least use the description field. You can decide whether to combine the decription with other fields. You decide.
### Save this model in a .sav file

In [1]:
import pandas as pd

articles_df = pd.read_csv("data/shared_articles.csv")

display(articles_df.head())

Unnamed: 0,timestamp,eventType,contentId,authorPersonId,authorSessionId,authorUserAgent,authorRegion,authorCountry,contentType,url,title,text,lang
0,1459192779,CONTENT REMOVED,-6451309518266745024,4340306774493623681,8940341205206233829,,,,HTML,http://www.nytimes.com/2016/03/28/business/dea...,"Ethereum, a Virtual Currency, Enables Transact...",All of this work is still very early. The firs...,en
1,1459193988,CONTENT SHARED,-4110354420726924665,4340306774493623681,8940341205206233829,,,,HTML,http://www.nytimes.com/2016/03/28/business/dea...,"Ethereum, a Virtual Currency, Enables Transact...",All of this work is still very early. The firs...,en
2,1459194146,CONTENT SHARED,-7292285110016212249,4340306774493623681,8940341205206233829,,,,HTML,http://cointelegraph.com/news/bitcoin-future-w...,Bitcoin Future: When GBPcoin of Branson Wins O...,The alarm clock wakes me at 8:00 with stream o...,en
3,1459194474,CONTENT SHARED,-6151852268067518688,3891637997717104548,-1457532940883382585,,,,HTML,https://cloudplatform.googleblog.com/2016/03/G...,Google Data Center 360° Tour,We're excited to share the Google Data Center ...,en
4,1459194497,CONTENT SHARED,2448026894306402386,4340306774493623681,8940341205206233829,,,,HTML,https://bitcoinmagazine.com/articles/ibm-wants...,"IBM Wants to ""Evolve the Internet"" With Blockc...",The Aite Group projects the blockchain market ...,en


In [2]:
articles_df.isna().sum()

timestamp             0
eventType             0
contentId             0
authorPersonId        0
authorSessionId       0
authorUserAgent    2442
authorRegion       2442
authorCountry      2442
contentType           0
url                   0
title                 0
text                  0
lang                  0
dtype: int64

In [3]:
articles_df.dropna(inplace=True)
articles_df = articles_df[articles_df['lang'] == 'en']

articles_df.isna().sum()

timestamp          0
eventType          0
contentId          0
authorPersonId     0
authorSessionId    0
authorUserAgent    0
authorRegion       0
authorCountry      0
contentType        0
url                0
title              0
text               0
lang               0
dtype: int64

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 10)

#Step 1: Import/build your stop word dictionary

#Step 2: Create a TfidfVectorizer and remove stop words
tfidf = TfidfVectorizer(stop_words='english')

articles_df['combined'] = articles_df['title'] + " " + articles_df['text'] #combine features for better accuracy

#Step 3: Fit and transform the data to a tfidf matrix
tfidf_matrix = tfidf.fit_transform(articles_df['combined'])

print(tfidf_matrix)

#to help us interpret the results, view it in a DataFrame
df_tfidf = pd.DataFrame(tfidf_matrix.T.todense(), index=tfidf.get_feature_names_out(), columns=articles_df['title'])


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 136919 stored elements and shape (473, 19470)>
  Coords	Values
  (0, 9274)	0.009554934467554729
  (0, 5503)	0.4155979893551253
  (0, 19156)	0.6926633155918757
  (0, 15609)	0.21224278343607936
  (0, 256)	0.03528170872174988
  (0, 2336)	0.023983049355927315
  (0, 17692)	0.01078797255248374
  (0, 11031)	0.11964713779648416
  (0, 1289)	0.011836389558532352
  (0, 7535)	0.007285044294131887
  (0, 1815)	0.011157045936076741
  (0, 8643)	0.016004858638758793
  (0, 3926)	0.009554934467554729
  (0, 1662)	0.015386428366421175
  (0, 5120)	0.05051581775651578
  (0, 13712)	0.10772326783847641
  (0, 6450)	0.020841327992781038
  (0, 11703)	0.014736607367102528
  (0, 6814)	0.007534330962593222
  (0, 4099)	0.20082682684938136
  (0, 13315)	0.010611874004709108
  (0, 13273)	0.013539668462971721
  (0, 5090)	0.016305522469861677
  (0, 17341)	0.008364424811094293
  (0, 9043)	0.011994801008154084
  :	:
  (471, 4575)	0.043466842279494856
  (471, 1468

In [5]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

df_results = pd.DataFrame(cosine_sim, index=articles_df['title'], columns=articles_df['title'])

df_results


#store df_results as a csv file. open it and sort it by the show you're interested in and the next 5 rows are the most closely related shows.
# df_results.sort_values(by=['title'], ascending=False)

title,Introducing Docker for Windows Server 2016 - Docker Blog,Microsoft and Bank of America Merrill Lynch Collaborate to Transform Trade Finance Transacting With Azure Blockchain as a Service,Digitization and The Loss of Iconography - Posted by SYPartners,"Google saves Allo conversations, a win for machine learning but a loss for privacy - Tech2","A Neural Network for Machine Translation, at Production Scale","Onion Creates a $5 Linux Computer with Wi-Fi, Designed for IoT Applications",​Kubernetes 1.4: One DevOps tool to rule all the containers | ZDNet,Today in Apple history: 'Here's to the crazy ones...' | Cult of Mac,Baidu Research grills AI models on deep learning,Your health insurance might score you an Apple Watch,Bitcoin Accepted! German Energy Giant Enables Payments - CCN: Financial Bitcoin & Cryptocurrency News,Not Your Father's Insurance Industry - Insurance Thought Leadership,Don't Waste Your Time on Networking Events,Digital development with online check-in and mobile room keys,Blockchain Will Be Used By 15% of Big Banks By 2017,How CIOs Leverage Business Intelligence to Drive Smarter Decision-Making,Six iconic retailers and their digital transformation journeys,The future of traditional retail is digital,Large CPGs are under attack by startups... and consumers are winning,Breaking: Apple's Search Ads go LIVE!,"Use Docker to build, test and push your Artifacts",IBM Watson: Talking To An Ad,Bringing Pokémon GO to life on Google Cloud,Google's Cloud Machine Learning service is now in public beta,All Together Now. Introducing G Suite.,Save time with Quick Access in Drive,Google Cloud Platform sets a course for new horizons,New Early Adopter Programs: Team Drives and a new Hangouts video meetings experience,"Explore in Docs, Sheets and Slides makes work a breeze - and makes you look good, too",Tracking Multiple Categories in Google Analytics for Content Pages - E-Nor | Google Analytics Blog,Pury - New Way to Profile Your Android Application,"Facebook, Amazon, Google, IBM and Microsoft come together to create historic Partnership on AI","Largest botnet attack in history peaks at over 1 terabits per second, aided by Internet of Things",The new tech talent you need to succeed in digital,Blog | Niantic,Innovation is in all the wrong places,Introducing the Open Images Dataset,The Best Advice From Quora on 'How to Learn Machine Learning',So you want to sell to banks?,Most experts say AI isn't as much of a threat as you might think,Engineers Shouldn't Write ETL: A Guide to Building a High Functioning Data Science Department,"Meerkat, star app of 2015, is officially dead",Advanced Android Espresso,JPMorgan is Quietly Developing a Private Ethereum Blockchain - CoinDesk,"Simple rolls out shared bank accounts that work for anyone, including roommates",New APIs Expand Line's Third Party Integration and Chatbot Capabilities,Use Poka-Yoke Technique to Improve Software Quality,Future Of Consulting: Consultancies Adapt To Digital Change,How Robots Can Acquire New Skills from Their Shared Experience,New Trend Briefing from TrendWatching | AUTONOMANIA!,...,Centrally manage all your Google Cloud resources with Cloud Resource Manager,Expanding our IDE support with a new Eclipse plugin for App Engine,Guest post: building IoT applications with MQTT and Google Cloud Pub/Sub,Behind the Scam: What Does It Take to Be a 'Best-Selling Author'? $3 and 5 Minutes. - The Mission,BACK FROM THE DEAD - TrendWatching,Speed Up a WordPress Website in 8 Steps,The Complete Google Analytics Audit Checklist | Optimization Up,Java 8 Streams - A Deeper Approach About Performance Improvement,Tom Brady: Business Guru. Who Knew?,How to get the most from your agency relationships in 2017,Introducing Google Cloud Search: Bringing the power of Google Search to G Suite customers,Gmail API: New endpoints for settings,Here's how Evernote moved 3 petabytes of data to Google's cloud,Sharing innovation with your competitors - Dries Buytaert,Python - It's metaclasses all the way down,What Is a Good Retention Rate for Online Software,How do you sell SaaS software to enterprise businesses?,How Mobile Health Apps and Wearables Could Actually Make People Sicker,Introducing Google Cloud Search: Bringing the power of Google Search to G Suite customers,Salesforce adds some artificial intelligence to customer service products,Banking Needs a Customer Experience Wake-Up Call,"Razorfish, US digital revenues, drag down Publicis","Google Cloud Endpoints now generally available: a fast, scalable API gateway",Now sites can fingerprint you online even when you use multiple browsers,Introducing Cloud Spanner: a global database service for mission-critical applications,Dries Distributions remain a growing opportunity for Drupal - Buytaert,Don't document your code. Code your documentation.,"What do you mean by ""Event-Driven""?","Spanner, the Google Database That Mastered Time, Is Now Open to Everyone",Lean and banking: the pot of gold waiting to be discovered | Blog post,IBM wants to bring machine learning to the mainframe,Former Google career coach shares a visual trick for figuring out what to do with your life,When to make a Git Commit,"Amazon looking to buy Capital One? "" Banking Technology",3 Big Blockchain Ideas MIT is Working on Right Now - CoinDesk,How to build cross-platform mobile apps using nothing more than a JSON markup,Amazon EBS Update - New Elastic Volumes Change Everything | AWS Blog,My experience with Google's Associate Android Developer Certification,Concrete things you can do about your technical debt,Optimizing the Performance of Vector Drawables - upday devs,Top 10 Insurtech Trends for 2017 - Insurance Thought Leadership,Life Beyond Email: Chatbot Marketing,Another option for file sharing,Gartner Reprint,Command-line utilities,Swift Top 10 Articles For The Past Year (v.2017),Amazon takes on Skype and GoToMeeting with its Chime video conferencing app,Code.org 2016 Annual Report,"JPMorgan Software Does in Seconds What Took Lawyers 360,000 Hours",The 2017 Acquia Partners of the Year
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
Introducing Docker for Windows Server 2016 - Docker Blog,1.000000,0.039629,0.011760,0.010833,0.016796,0.043153,0.138733,0.008398,0.015812,0.004686,0.008508,0.010789,0.012906,0.003162,0.011779,0.016893,0.019479,0.011509,0.014094,0.037825,0.395282,0.004301,0.043046,0.023125,0.030339,0.012983,0.050984,0.013735,0.023030,0.009565,0.024384,0.023318,0.014000,0.038118,0.006111,0.021814,0.051254,0.009254,0.015021,0.007109,0.019548,0.010007,0.019266,0.015608,0.019596,0.024682,0.010247,0.000432,0.018772,0.003222,...,0.040825,0.026688,0.015943,0.009191,0.013539,0.050289,0.012825,0.026864,0.008790,0.014889,0.017645,0.024823,0.023537,0.015049,0.013256,0.007529,0.014967,0.018659,0.017645,0.015713,0.009663,0.008419,0.051065,0.017071,0.025499,0.018169,0.008848,0.018993,0.018003,0.023243,0.018193,0.006884,0.015825,0.011346,0.012460,0.043599,0.008216,0.011251,0.012735,0.038041,0.019009,0.019145,0.028905,0.006468,0.032860,0.012320,0.061094,0.017990,0.020613,0.004823
Microsoft and Bank of America Merrill Lynch Collaborate to Transform Trade Finance Transacting With Azure Blockchain as a Service,0.039629,1.000000,0.006639,0.011124,0.013881,0.007533,0.023408,0.009170,0.011039,0.018210,0.053726,0.033044,0.024956,0.016901,0.264978,0.061319,0.047793,0.048417,0.049483,0.017171,0.014339,0.012754,0.025484,0.055774,0.036663,0.024757,0.061803,0.032665,0.012660,0.007596,0.015671,0.070132,0.019395,0.089417,0.016985,0.037592,0.012833,0.008578,0.152895,0.024984,0.024641,0.010128,0.015356,0.102368,0.097798,0.019222,0.016491,0.008107,0.009503,0.008743,...,0.029214,0.019945,0.025881,0.019544,0.009638,0.015175,0.024408,0.010752,0.014102,0.036066,0.033519,0.009377,0.043386,0.026208,0.005845,0.020949,0.047591,0.030875,0.033519,0.022700,0.160437,0.044091,0.031607,0.022551,0.047199,0.019710,0.006274,0.015391,0.073842,0.071464,0.022986,0.007265,0.006626,0.171032,0.114284,0.012651,0.008240,0.025887,0.009454,0.008108,0.087682,0.026910,0.026233,0.008499,0.011897,0.020786,0.035197,0.049371,0.141245,0.012929
Digitization and The Loss of Iconography - Posted by SYPartners,0.011760,0.006639,1.000000,0.036262,0.015882,0.027383,0.014724,0.037558,0.009365,0.005867,0.014729,0.011861,0.031963,0.000000,0.017769,0.012935,0.028545,0.024084,0.038179,0.024893,0.038186,0.004061,0.066146,0.015884,0.038317,0.024064,0.016940,0.027105,0.042184,0.011989,0.030514,0.018668,0.023569,0.036474,0.040238,0.050568,0.033314,0.012051,0.023028,0.022144,0.032736,0.015464,0.028466,0.008311,0.014893,0.011422,0.016900,0.016492,0.014530,0.003790,...,0.013454,0.009925,0.006513,0.034396,0.033197,0.024970,0.027925,0.026310,0.053985,0.025232,0.017923,0.008833,0.020796,0.016430,0.015171,0.025342,0.026086,0.028649,0.017923,0.031224,0.015023,0.024921,0.009264,0.018577,0.007952,0.013905,0.015512,0.031397,0.034428,0.032099,0.036111,0.033488,0.032347,0.014207,0.018749,0.022288,0.011292,0.021173,0.015008,0.040009,0.023949,0.029559,0.017273,0.005550,0.034441,0.009001,0.007636,0.045450,0.031906,0.000000
"Google saves Allo conversations, a win for machine learning but a loss for privacy - Tech2",0.010833,0.011124,0.036262,1.000000,0.116335,0.011539,0.042788,0.016874,0.076222,0.021953,0.006264,0.011960,0.012335,0.007889,0.020380,0.023107,0.039962,0.044469,0.020538,0.070407,0.019137,0.014973,0.122507,0.323409,0.069931,0.059485,0.182491,0.060794,0.070379,0.064608,0.021736,0.044785,0.027567,0.058231,0.039083,0.017528,0.054462,0.077242,0.034937,0.028920,0.053911,0.032705,0.037295,0.028583,0.016984,0.098042,0.018557,0.011531,0.042534,0.005594,...,0.021111,0.060762,0.071883,0.013233,0.014334,0.052663,0.121021,0.043107,0.025675,0.064010,0.124896,0.039234,0.134861,0.017226,0.004694,0.030957,0.034545,0.018242,0.124896,0.024291,0.041301,0.021043,0.100506,0.042425,0.069966,0.010367,0.006206,0.015146,0.206338,0.028170,0.071576,0.042034,0.004596,0.030462,0.043940,0.019578,0.000970,0.076681,0.003127,0.022380,0.039803,0.104779,0.035452,0.008096,0.024818,0.013367,0.037187,0.024828,0.039143,0.007194
"A Neural Network for Machine Translation, at Production Scale",0.016796,0.013881,0.015882,0.116335,1.000000,0.005704,0.024557,0.014672,0.115912,0.008067,0.009634,0.014539,0.014745,0.006407,0.015950,0.023708,0.027306,0.026178,0.017467,0.032208,0.013245,0.006137,0.055141,0.181001,0.031227,0.039137,0.106164,0.049054,0.049614,0.030865,0.014780,0.037631,0.017360,0.061912,0.017232,0.015658,0.069832,0.105532,0.014998,0.039575,0.042800,0.020810,0.018112,0.020871,0.013169,0.017244,0.019309,0.014718,0.048000,0.006440,...,0.009894,0.023663,0.036412,0.014785,0.020478,0.027174,0.051041,0.049774,0.014139,0.026992,0.088515,0.018950,0.065721,0.026105,0.016054,0.021587,0.024538,0.021105,0.088515,0.023511,0.032102,0.010047,0.053416,0.031429,0.029111,0.024745,0.012402,0.024072,0.074611,0.028138,0.094560,0.019889,0.021377,0.010570,0.025044,0.019259,0.007112,0.028455,0.015996,0.027589,0.032586,0.028362,0.018488,0.006019,0.015950,0.039714,0.014261,0.024304,0.044707,0.006076
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Swift Top 10 Articles For The Past Year (v.2017),0.012320,0.020786,0.009001,0.013367,0.039714,0.018528,0.020952,0.046023,0.023754,0.019062,0.003926,0.014065,0.008667,0.002982,0.017972,0.019377,0.034744,0.016389,0.028668,0.034922,0.004231,0.003168,0.021703,0.031014,0.010077,0.010582,0.042542,0.014677,0.035074,0.047210,0.014065,0.017716,0.013810,0.054969,0.012918,0.022558,0.036728,0.041527,0.013861,0.025031,0.052344,0.010061,0.016447,0.027650,0.046711,0.022643,0.014379,0.000000,0.030366,0.009474,...,0.003194,0.014445,0.018092,0.028763,0.042916,0.021960,0.031789,0.036787,0.014577,0.032874,0.028199,0.010933,0.022267,0.065734,0.008393,0.012389,0.020499,0.017370,0.028199,0.010519,0.037577,0.015206,0.014170,0.014087,0.015169,0.015169,0.006844,0.029715,0.028805,0.017126,0.068519,0.036843,0.015226,0.015699,0.007439,0.034131,0.001744,0.016186,0.019210,0.006821,0.054785,0.038023,0.014078,0.022456,0.016897,1.000000,0.024469,0.030348,0.024876,0.046336
Amazon takes on Skype and GoToMeeting with its Chime video conferencing app,0.061094,0.035197,0.007636,0.037187,0.014261,0.021926,0.030135,0.020264,0.012827,0.011461,0.026569,0.021194,0.030269,0.023146,0.022537,0.046369,0.061487,0.020258,0.040453,0.042330,0.011086,0.018003,0.013869,0.047395,0.050126,0.053676,0.031542,0.090254,0.036238,0.010795,0.019591,0.055287,0.025595,0.051124,0.006145,0.053164,0.007449,0.003309,0.039189,0.016704,0.024904,0.049104,0.028264,0.022059,0.050489,0.058662,0.016314,0.002982,0.035768,0.005040,...,0.014950,0.028192,0.016129,0.072513,0.018341,0.053400,0.047324,0.025157,0.016689,0.030287,0.028872,0.014395,0.030824,0.015551,0.014314,0.045835,0.051649,0.034750,0.028872,0.015915,0.021928,0.017239,0.040261,0.032787,0.021740,0.011279,0.007959,0.013212,0.036394,0.034534,0.021291,0.029626,0.017653,0.077980,0.019862,0.055479,0.034465,0.037406,0.013431,0.016540,0.041048,0.068314,0.045662,0.006378,0.030692,0.024469,1.000000,0.029675,0.034028,0.005829
Code.org 2016 Annual Report,0.017990,0.049371,0.045450,0.024828,0.024304,0.051310,0.020238,0.034460,0.029380,0.013922,0.031096,0.029401,0.027305,0.013097,0.023258,0.021730,0.042256,0.014465,0.036235,0.042151,0.015467,0.012769,0.026690,0.052987,0.023296,0.020839,0.047138,0.039192,0.026804,0.031509,0.023942,0.030559,0.013035,0.059073,0.018526,0.029775,0.031998,0.089787,0.030137,0.071174,0.091510,0.021803,0.024212,0.031949,0.032060,0.023529,0.012164,0.011953,0.036629,0.017541,...,0.013637,0.022537,0.018307,0.038161,0.054409,0.027732,0.037224,0.065941,0.036257,0.045377,0.020644,0.014470,0.030953,0.046554,0.041938,0.035033,0.026006,0.019431,0.020644,0.015262,0.042572,0.025228,0.017856,0.044827,0.039876,0.031644,0.113159,0.032737,0.041656,0.039958,0.036662,0.049665,0.022764,0.021859,0.018091,0.020479,0.013728,0.040533,0.047601,0.016991,0.039305,0.040323,0.019503,0.022679,0.023725,0.030348,0.029675,1.000000,0.059248,0.081730
"JPMorgan Software Does in Seconds What Took Lawyers 360,000 Hours",0.020613,0.141245,0.031906,0.039143,0.044707,0.014316,0.036103,0.037244,0.069779,0.026300,0.033571,0.058961,0.070718,0.012313,0.099677,0.081865,0.068705,0.064710,0.061821,0.036526,0.025449,0.018819,0.036513,0.125304,0.061875,0.048920,0.121357,0.068129,0.037863,0.016710,0.033316,0.076680,0.039928,0.128488,0.022325,0.067290,0.033579,0.066256,0.149234,0.042495,0.061685,0.028811,0.028632,0.302615,0.077202,0.048957,0.035537,0.031240,0.040932,0.025987,...,0.032894,0.047457,0.038562,0.039469,0.035580,0.033797,0.049785,0.036275,0.039660,0.064084,0.104824,0.026386,0.089581,0.032235,0.022636,0.042688,0.060950,0.040063,0.104824,0.043058,0.090672,0.049985,0.051949,0.038606,0.072704,0.026908,0.014874,0.046122,0.060902,0.062641,0.109506,0.038918,0.031801,0.098510,0.085429,0.029043,0.019509,0.027842,0.017779,0.020766,0.086931,0.055501,0.039256,0.012557,0.026306,0.024876,0.034028,0.059248,1.000000,0.042435


In [6]:
df_results.to_csv("../IS 455/content_filtering.csv", index=False)