In [None]:
! pip install dash
!pip install pyngrok
!pip install dash-bootstrap-components

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m70.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloadi

In [None]:
import dash
from dash import dcc, html, Input, Output, State
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack

# Loading dataset
df = pd.read_csv("/content/dataset_with_sentiment_scores.csv")

# Combining text columns for better feature representation
df['text_features'] = df['main_headline_clean'] + ' ' + df['abstract'] + ' ' + df['keywords']

# Function to display Recommendations in Styled Format
def display_recommendations_stylish(article_id, top_n=10):
    """
    Display recommendations in a styled format using Rich.

    Parameters:
        article_id (int): Index of the article in the dataset.
        top_n (int): Number of similar articles to recommend.
    """
    # Fetching similarity scores for the input article
    sim_scores = list(enumerate(cosine_sim[article_id]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    article_indices = [i[0] for i in sim_scores]

    results =[]
    # Fetching recommended articles
    recommendations = df.iloc[article_indices][['main_headline_clean', 'web_url']].copy()
    for idx, row in recommendations.iterrows():
            results.append((row['main_headline_clean'], row['web_url']))
    results = list(set(results))
    return results[:top_n]


# Precomputing features for recommendation
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['text_features'])

#  Normalization of Sentiment Scores
sentiment_features = df[['bert_positive_score', 'bert_neutral_score', 'bert_negative_score', 'bert_compound_score']]
scaler = MinMaxScaler()
scaled_sentiment = scaler.fit_transform(sentiment_features)

# Combining Text and Sentiment Features
combined_features = hstack([tfidf_matrix, scaled_sentiment])

# Computing Cosine Similarity
cosine_sim = cosine_similarity(combined_features, combined_features)

# Initializing Dash app
app = dash.Dash(__name__)
app.title = "News Article Recommender"

app.layout = html.Div([
    html.H1("News Article Recommender"),
    dcc.Input(id="keyword-input", type="text", placeholder="Enter a keyword to search"),
    html.Button("Search", id="search-btn", n_clicks=0),
    html.H3("Recommendations"),
    html.Div(id="search-results", children=[]),
    html.Hr(),
    html.Div(id="recommendations", children=[]),
    dcc.Store(id="clicked-article-url", storage_type='memory'),
])

@app.callback(
    [
        Output("search-results", "children"),
        Output("clicked-article-url", "data"),
        Output({"type": "article-link", "index": dash.dependencies.ALL}, "n_clicks"),
    ],
    [
        Input("search-btn", "n_clicks"),
        Input({"type": "article-link", "index": dash.dependencies.ALL}, "n_clicks"),
    ],
    [
        State("keyword-input", "value"),
        State({"type": "article-link", "index": dash.dependencies.ALL}, "href"),
        State("clicked-article-url", "data"),
    ]
)
def update_articles_and_recommendations(search_clicks, article_clicks, keyword, href_list, last_clicked_url):
    ctx = dash.callback_context

    if not ctx.triggered:
        return dash.no_update, dash.no_update, [0] * len(href_list)

    trigger_id = ctx.triggered[0]["prop_id"]

    # Handling search button click
    if "search-btn" in trigger_id:
        if search_clicks > 0 and keyword:
            matches = df[df['text_features'].str.contains(keyword, case=False, na=False)]
            if matches.empty:
                return [html.Div("No articles found.")], dash.no_update, [0] * len(href_list)

            return [
                html.Div([
                    html.A(row['main_headline_clean'], href=row['web_url'], target="_blank", id={"type": "article-link", "index": idx})
                ]) for idx, row in matches.iterrows()
            ], dash.no_update, [0] * len(href_list)

    # Handling article link click
    elif "article-link" in trigger_id:
        clicked_index = next(
            (i for i, n_clicks in enumerate(article_clicks) if n_clicks),
            None
        )

        if clicked_index is None:
            return dash.no_update, dash.no_update, [0] * len(article_clicks)

        clicked_url = href_list[clicked_index]

        if clicked_url not in df['web_url'].values:
            return [html.Div("Clicked article not found.")], dash.no_update, [0] * len(article_clicks)

        recommended_articles = display_recommendations_stylish(clicked_index)
        if not recommended_articles:
            return [html.Div("No recommendations found.")], clicked_url, [0] * len(article_clicks)

        return [
            html.Div([
                html.A(headline, href=web_url, target="_blank", id={"type": "article-link", "index": idx})
            ]) for idx, (headline, web_url) in enumerate(recommended_articles)
        ], clicked_url, [0] * len(article_clicks)

    return dash.no_update, dash.no_update, [0] * len(href_list)


In [None]:
from pyngrok import ngrok
ngrok.set_auth_token("2pPEy0OGIAm3GWMRv0tPtdeyVti_3BCd5T68twD6cXjCnMBcW")
public_url = ngrok.connect(8050)
print("App URL:", public_url)
app.run_server(port=8050)

App URL: NgrokTunnel: "https://28d5-34-125-51-133.ngrok-free.app" -> "http://localhost:8050"


<IPython.core.display.Javascript object>

In [None]:
# Close all ngrok tunnels
ngrok.kill()

print("Ngrok tunnel closed and Streamlit app terminated.")

Ngrok tunnel closed and Streamlit app terminated.
