In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import string
import re

In [None]:
import gensim
from gensim import corpora, models, similarities

import nltk
from nltk.corpus import stopwords

In [None]:
import scipy
from scipy.spatial.distance import cdist
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from scipy.linalg import svd

In [None]:
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import silhouette_score, silhouette_samples
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
from sklearn.model_selection import train_test_split

# Reading data sets

1. news_articles - Contains raw articles without user data amalgamation
2. user_interest - Contains raw articles mixed with user data as an amalgamation with ArticleID as Foreign Key

## news_articles

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/VividhPandey003/newsRecomm/main/data/0_news_articles.csv?token=GHSAT0AAAAAACF4FNQBY6AYIOICPOBB5MVAZUEHMWQ')
data.head()

Unnamed: 0,Article_id,Title,Description,Date,Category,URL
0,0,Fire at Vaishno Devi shrine complex; cash coun...,"No one was injured in the fire, which broke ou...","June 8, 2021 7:28:32 pm",India,https://indianexpress.com/article/india/vaishn...
1,1,"Had not gone to meet Nawaz Sharif, says Uddhav...",Uddhav Thackeray led a delegation of his cabin...,"June 8, 2021 6:56:40 pm",India,https://indianexpress.com/article/india/had-no...
2,2,Corruption case: Former Haryana I-T deputy com...,It was in 2016 that the CBI had arrested Nitin...,"June 8, 2021 6:25:24 pm",India,https://indianexpress.com/article/india/corrup...
3,3,Kannur MP K Sudhakaran appointed chief of Cong...,Sudhakaran will replace Ramachandran who had a...,"June 8, 2021 5:04:40 pm",India,https://indianexpress.com/article/india/sudhak...
4,4,"Kerala girl of Class 5 writes to CJI, lauds SC...",Chief Justice N V Ramana responded to the Clas...,"June 8, 2021 4:43:10 pm",India,https://indianexpress.com/article/india/kerala...


## user_interest

In [None]:
user = pd.read_csv('https://raw.githubusercontent.com/VividhPandey003/newsRecomm/main/data/2_user_interest.csv?token=GHSAT0AAAAAACF4FNQBDA55YGMO3SPIVYAYZUEHN2A')
print(user.shape)
user.drop(columns= user.columns[0],
        axis=1,
        inplace=True)
user.head()


(1187, 11)


Unnamed: 0,Article_id,Title,Description,Date,URL,UserId,SessionId,Article Rank,Click,Time Spent (seconds)
0,0,Fire at Vaishno Devi shrine complex; cash coun...,"No one was injured in the fire, which broke ou...","June 8, 2021 7:28:32 pm",https://indianexpress.com/article/india/vaishn...,1,1,1,True,81
1,2,Corruption case: Former Haryana I-T deputy com...,It was in 2016 that the CBI had arrested Nitin...,"June 8, 2021 6:25:24 pm",https://indianexpress.com/article/india/corrup...,1,1,3,True,49
2,5,Madhya Pradesh govt gets HC notice on communal...,Hearing a petition on the matter filed by form...,"June 8, 2021 4:10:12 pm",https://indianexpress.com/article/india/mp-hc-...,1,1,6,True,19
3,6,Uddhav Thackeray meets PM Modi; discusses Mara...,Deputy Chief Minister and senior NCP leader Aj...,"June 8, 2021 2:25:23 pm",https://indianexpress.com/article/india/mahara...,1,1,7,True,33
4,7,"New Covid-19 vaccination guidelines out, alloc...","As per the revised guidelines, the Centre will...","June 8, 2021 4:27:29 pm",https://indianexpress.com/article/india/govt-r...,1,1,8,True,71


## Content Based

In [None]:
vectorizer = TfidfVectorizer(analyzer = 'word')
tfidf_matrix = vectorizer.fit_transform(user['Title'])
tfidf_matrix.shape

(1187, 4589)

In [None]:
# cosine-similarity (linear kernel)
cosine_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)

# index of Title
indices = pd.Series(user['Title'].index)

In [None]:
# making predictions

def recommendation(index, method):
    id = indices[index]

    # fetching the top 10 articles

    similarity_score = list(enumerate(method[id]))
    similarity_score = sorted(similarity_score, key = lambda x: x[1], reverse = True)
    similarity_score = similarity_score[1:11]

    # get the article index
    news_index = [i[0] for i in similarity_score]

    # returning the top 10 most similar books
    return user['Title'].iloc[news_index]

In [None]:
# obtaining random input
input = user.sample()
input

Unnamed: 0,Article_id,Title,Description,Date,URL,UserId,SessionId,Article Rank,Click,Time Spent (seconds)
290,552,Abduction by ‘fake CBI officers’: Kolkata Poli...,“Three persons have been arrested. There are m...,"May 27, 2021 7:20:22 am",https://indianexpress.com/article/india/abduct...,542,537,3,True,97


In [None]:
# passing the input
recommendation(224, cosine_similarity)

291     Congress says new IT rules dire, drastic, drac...
1004    Four months after giving birth, Ashley Graham ...
215     Take action against institutions giving Covid ...
64      Aligarh hooch tragedy: Congress slams BJP govt...
271            Kerala Congress unit chief wants to resign
333     ‘Toolkit’ probe: Police notice to its leaders,...
317     Coronavirus India Highlights: Noting stabilisa...
337     ‘Manipulated Media’ tag, Delhi Police’s visit:...
582     Naseem Shah set to join bio-secure bubble for ...
623     ‘Moin Khan scolded me as well’: Younis Khan re...
Name: Title, dtype: object