In [55]:
import pandas as pd
df = pd.read_csv('recommendation.csv')

In [56]:
df.dropna(subset = ['title_summary','date'], inplace = True)
print(df.isna().sum())

Unnamed: 0.1     0
Unnamed: 0       0
link             0
text             0
title            0
date             0
keywords         0
summary          0
title_summary    0
dtype: int64


In [57]:
df['tags'] = df['text'] + df['title'] + df['keywords'] + df['summary']

In [68]:
import spacy
nlp=spacy.load('en_core_web_sm')

In [69]:
def lemmatiz(text):
    l=[]
    doc=nlp(text)
    for i in doc:
        l.append(i.lemma_)

    return " ".join(l)

In [70]:
df['tags']=df['tags'].apply(lemmatiz)

In [80]:
df['tags']=df['tags'].apply(lambda x:x.lower())
df.head(1)
df['tags'][10]

'update : the oracle -tiktok tie - up be confirm as a concept . oracle put out a release early today say that be party to an agreement in which it would partner with tiktok , and that the proposal have be submit to the us government . here ’ oracle ’s statement in full : \n\n oracle confirm secretary mnuchin ’s statement that it be part of the proposal submit by bytedance to the treasury department over the weekend in which oracle will serve as the trust technology provider . oracle have a 40 - year track record provide secure , highly performant technology solution . \n\n original article : enterprise provider oracle be say to have win the bidding war for the u.s . operation of tiktok , a chase in which microsoft be boot from early today . \n\n a tiktok spokesperson say the company " [ do not ] comment on rumor or speculation . " oracle do not immediately respond to techcrunch for comment . \n\n the wall street journal write that oracle , a rare ally of the trump administration in sil

In [81]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF vectorization for content-based recommendations
tfidf_vectorizer = TfidfVectorizer(max_features=6000, stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['tags'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)



In [82]:
tfidf_matrix.toarray()[0]

array([0.        , 0.01281591, 0.        , ..., 0.        , 0.        ,
       0.        ])

In [83]:
cosine_sim.shape

(1689, 1689)

In [84]:
tfidf_vectorizer.get_feature_names_out()[1000:1015]

array(['cancel', 'cancellation', 'cancer', 'candidate', 'canoo', 'cap',
       'capability', 'capable', 'capacity', 'capital', 'capitalist',
       'capitalization', 'capitalize', 'capri', 'captain'], dtype=object)

In [85]:
def recommend_articles(user_activity, num_recommendations=10):
    # Calculate user's profile based on their activity (e.g., articles read)
    user_profile = tfidf_vectorizer.transform([user_activity]).toarray()

    # Calculate similarity between user profile and all articles
    cosine_scores = cosine_similarity(user_profile, tfidf_matrix)

    # Get indices of articles sorted by similarity score
    article_indices = cosine_scores.argsort()[0][::-1]

    # Recommend top num_recommendations articles
    recommended_articles = df.iloc[article_indices[:num_recommendations]]

    # Create a DataFrame with titles and links
    recommended_df = recommended_articles[['title', 'link']]
    
    return recommended_df


In [86]:
# Example usage to recommend 5 articles
recommended_df = recommend_articles('Musk is the CEO and founder of Tesla, SpaceX, Boring Co., and X.AI, among other ventures. In 2022, he purchased Twitter for $44 billion. In an exclusive excerpt from his new biography', num_recommendations=5)

# Display the recommended articles with titles and links
for i, row in recommended_df.iterrows():
    print(f"{i + 1}. Title: {row['title']}")
    print(f"   Link: {row['link']}\n")


1113. Title: Elon Musk is now richer than Mark Zuckerberg
   Link: https://mashable.com/article/elon-musk-third-richest-person/

1840. Title: Tesla shares rebound from steep rout
   Link: https://www.reuters.com/article/us-tesla-stocks-idUSKBN261334

1839. Title: Tesla shares rebound from steep rout
   Link: https://ca.reuters.com/article/us-tesla-stocks-idCAKBN261334

1979. Title: Tesla's Musk discusses vaccine project, car plant with German politicians
   Link: https://www.reuters.com/article/us-tesla-musk-germany-idUSKBN25T12I

1506. Title: Elon Musk demonstrates Neuralink’s tech live using pigs with surgically implanted brain-monitoring devices – TechCrunch
   Link: http://techcrunch.com/2020/08/28/elon-musk-demonstrates-neuralinks-tech-live-using-pigs-with-surgically-implanted-brain-monitoring-devices/

