In [37]:
import requests
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import plotly.graph_objects as go
import dash
import dash_html_components as html
import dash_core_components as dcc


# Load the API key from the config.json file
with open('config.json') as f:
    config = json.load(f)

api_key = config['API_KEY']
topic = "technology"
num_articles = 10
url = f"https://newsapi.org/v2/top-headlines"
params = {
    "apiKey": api_key,
    "q": topic,
    "pageSize": num_articles
}

# Send GET request to NewsAPI
response = requests.get(url, params=params)
data = response.json()
# Send GET request to NewsAPI
response = requests.get(url, params=params)
data = response.json()
# Extract article titles and descriptions
articles = []
for article in data["articles"]:
    title = article["title"]
    description = article["description"]
    if title and description:
        articles.append(title + " " + description)

# Data preprocessing
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
preprocessed_articles = []

for article in articles:
    # Tokenize the article into words
    words = word_tokenize(article.lower())
    
    # Remove stopwords and lemmatize the words
    filtered_words = [lemmatizer.lemmatize(word) for word in words if word.isalnum() and word not in stop_words]
    
    # Join the filtered words back into a sentence
    preprocessed_article = " ".join(filtered_words)
    preprocessed_articles.append(preprocessed_article)

# TF-IDF calculation
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_articles)
feature_names = vectorizer.get_feature_names_out()
# Print TF-IDF scores for each term in the first article
first_article_tfidf = tfidf_matrix[0]
for col_idx in first_article_tfidf.indices:
    term = feature_names[col_idx]
    tfidf_score = first_article_tfidf[0, col_idx]
    print(f"Term: {term}, TF-IDF Score: {tfidf_score}")


# User input
user_input = "Technology"
user_keywords = word_tokenize(user_input.lower())
user_keywords = [lemmatizer.lemmatize(word) for word in user_keywords if word.isalnum() and word not in stop_words]
user_tfidf = vectorizer.transform([" ".join(user_keywords)])

# Recommendation algorithm
similarity_scores = cosine_similarity(user_tfidf, tfidf_matrix).flatten()

# Top Article Recommendations
top_indices = similarity_scores.argsort()[::-1][:5]  # Get indices of top 5 similar articles
top_articles = [articles[idx] for idx in top_indices]

# Print the top article recommendations
print("Top Article Recommendations:")
for i, article in enumerate(top_articles):
    print(f"{i+1}. {article}")


# Create a bar plot for the similarity scores
bar_plot = go.Bar(
    x=list(range(len(similarity_scores))),  # Convert range to list
    y=similarity_scores,
    marker=dict(color="rgb(158,202,225)")
)

# Set the layout for the bar plot
layout = go.Layout(
    title="Similarity Scores of Articles",
    xaxis=dict(title="Article Index"),
    yaxis=dict(title="Similarity Score"),
    showlegend=False
)

# Create the dashboard app
app = dash.Dash(__name__)

# Set the layout for the dashboard
app.layout = html.Div([
    html.H1("Article Recommendation Dashboard"),
    html.H2("Top Article Recommendations:"),
    html.Ul([html.Li(article) for article in top_articles]),
    dcc.Graph(figure=go.Figure(data=[bar_plot], layout=layout))
])

# Run the dashboard app
if __name__ == '__main__':
    app.run_server(debug=True)



The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html
The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc


{'status': 'ok',
 'totalResults': 2,
 'articles': [{'source': {'id': 'the-verge', 'name': 'The Verge'},
   'author': None,
   'title': 'The Verge',
   'description': 'The Verge is about technology and how it makes us feel. Founded in 2011, we offer our audience everything from breaking news to reviews to award-winning features and investigations, on our site, in video, and in podcasts.',
   'url': 'http://www.theverge.com/',
   'urlToImage': 'https://cdn.vox-cdn.com/uploads/chorus_asset/file/23966628/the_verge_social_share.png',
   'publishedAt': '2023-06-26T01:52:17.4158176Z',
   'content': 'Elon Musk thinks his fight with Mark Zuckerberg might actually happen.\r\nIn a Twitter Spaces session with Bloomberg reporter Ashlee Vance, at the 6:20 mark of the recording, Musk, who was at a friends… [+696 chars]'},
  {'source': {'id': 'newsweek', 'name': 'Newsweek'},
   'author': 'Mark Davis',
   'title': 'Newsweek',
   'description': 'Newsweek provides in-depth analysis, news and opinion abou

Term: podcasts, TF-IDF Score: 0.22934037812132071
Term: video, TF-IDF Score: 0.22934037812132071
Term: site, TF-IDF Score: 0.22934037812132071
Term: investigation, TF-IDF Score: 0.22934037812132071
Term: feature, TF-IDF Score: 0.22934037812132071
Term: review, TF-IDF Score: 0.22934037812132071
Term: news, TF-IDF Score: 0.1631775679084811
Term: breaking, TF-IDF Score: 0.22934037812132071
Term: everything, TF-IDF Score: 0.22934037812132071
Term: audience, TF-IDF Score: 0.22934037812132071
Term: offer, TF-IDF Score: 0.22934037812132071
Term: 2011, TF-IDF Score: 0.22934037812132071
Term: founded, TF-IDF Score: 0.22934037812132071
Term: feel, TF-IDF Score: 0.22934037812132071
Term: make, TF-IDF Score: 0.22934037812132071
Term: technology, TF-IDF Score: 0.1631775679084811
Term: verge, TF-IDF Score: 0.45868075624264143
Top Article Recommendations:
1. Newsweek Newsweek provides in-depth analysis, news and opinion about international issues, technology, business, culture and politics.
2. The Ve