In [15]:
%pip install numpy pandas matplotlib plotly statsmodels scikit-learn \
    tensorflow ipywidgets nbformat vaderSentiment


Note: you may need to restart the kernel to use updated packages.


In [16]:
# ============================================================
# SPRINT 3 – Sentiment + Time Series Dashboard (DJIA Example)
# Data source: Kaggle (Combined_News_DJIA, DJIA prices)
# Models: Simple Sentiment + Visual Time Series (no TensorFlow)
# ============================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.express as px

from sklearn.metrics import accuracy_score, f1_score
from textblob import TextBlob

import ipywidgets as widgets
from IPython.display import display, clear_output

# ------------------------------------------------------------
# 1) LOAD DATA (KAGGLE FILES)
# ------------------------------------------------------------

# Adjust these file names if yours are different
news_path = "Combined_News_DJIA.csv"      # daily headlines + 'Label'
djia_path = "upload_DJIA_table.csv"       # DJIA price data (Date, Open, Close etc.)

news_df = pd.read_csv(news_path)
djia_df = pd.read_csv(djia_path)

print("News data:", news_df.shape)
print("DJIA data:", djia_df.shape)

display(news_df.head())
display(djia_df.head())

# ------------------------------------------------------------
# 2) BASIC CLEANING + MERGING
# ------------------------------------------------------------

# In Combined_News_DJIA, date column is usually named 'Date'
news_df['Date'] = pd.to_datetime(news_df['Date'])

# Create one combined headline string per day
headline_cols = [c for c in news_df.columns if c.startswith("Top")]
news_df['combined_headlines'] = news_df[headline_cols].fillna("").agg(". ".join, axis=1)

# Keep only Date, Label, and combined_headlines
news_simple = news_df[['Date', 'Label', 'combined_headlines']].copy()

# Prepare DJIA price data; adjust date column name if needed
# Common names: 'Date', 'Datetime', 'DATE'
if 'Date' in djia_df.columns:
    djia_df['Date'] = pd.to_datetime(djia_df['Date'])
elif 'Datetime' in djia_df.columns:
    djia_df['Date'] = pd.to_datetime(djia_df['Datetime']).dt.date
    djia_df['Date'] = pd.to_datetime(djia_df['Date'])
else:
    raise ValueError("Cannot find a date column in DJIA table. Please rename it to 'Date'.")

# For simplicity keep Date and Close
if 'Close' not in djia_df.columns:
    raise ValueError("DJIA table must contain a 'Close' column.")

djia_simple = djia_df[['Date', 'Close']].copy()

# Merge on Date (inner join = only dates present in both)
merged = pd.merge(news_simple, djia_simple, on='Date', how='inner').sort_values('Date')
merged.reset_index(drop=True, inplace=True)

print("Merged dataset:", merged.shape)
display(merged.head())

# Label explanation:
# Label = 1  -> market went up
# Label = 0  -> market went down or stayed flat

# ------------------------------------------------------------
# 3) SENTIMENT ANALYSIS WITH TEXTBLOB
# ------------------------------------------------------------

def get_sentiment_score(text):
    """
    Return TextBlob polarity score in range [-1, 1].
    Positive -> positive sentiment, negative -> negative sentiment.
    """
    try:
        return TextBlob(str(text)).sentiment.polarity
    except Exception:
        return 0.0

print("Calculating sentiment scores... (this can take some seconds)")
merged['sentiment_score'] = merged['combined_headlines'].apply(get_sentiment_score)

# Convert sentiment score to a simple binary prediction:
# If sentiment >= 0 => predict market up (1), else down (0)
merged['sentiment_pred'] = (merged['sentiment_score'] >= 0).astype(int)

# Ground truth target is 'Label' from Kaggle dataset
y_true = merged['Label'].values
y_pred = merged['sentiment_pred'].values

sentiment_accuracy = accuracy_score(y_true, y_pred)
sentiment_f1 = f1_score(y_true, y_pred)

print(f"Sentiment Model Accuracy: {sentiment_accuracy:.4f}")
print(f"Sentiment Model F1 Score: {sentiment_f1:.4f}")

# ------------------------------------------------------------
# 4) TIME SERIES VIEW OF DJIA + SENTIMENT
# ------------------------------------------------------------

# Create a time-series friendly dataframe
ts_df = merged[['Date', 'Close', 'sentiment_score', 'Label', 'sentiment_pred']].copy()
ts_df = ts_df.sort_values('Date')

# Compute daily returns from Close price
ts_df['return'] = ts_df['Close'].pct_change()

# ------------------------------------------------------------
# 5) RESULTS SUMMARY TABLE (FOR DASHBOARD)
# ------------------------------------------------------------

results_summary = pd.DataFrame({
    "Model": ["Sentiment (TextBlob)"],
    "Accuracy": [sentiment_accuracy],
    "F1_Score": [sentiment_f1]
})

display(results_summary)

# ------------------------------------------------------------
# 6) INTERACTIVE DASHBOARD (ipywidgets + Plotly)
# ------------------------------------------------------------

model_dropdown = widgets.Dropdown(
    options=["Sentiment vs Market"],
    value="Sentiment vs Market",
    description="View:"
)

metric_dropdown = widgets.Dropdown(
    options=["Accuracy", "F1_Score"],
    value="Accuracy",
    description="Metric:"
)

upload_widget = widgets.FileUpload(
    accept=".csv",
    multiple=False,
    description="Upload News CSV"
)

output_plot = widgets.Output()
output_metrics = widgets.Output()
output_text = widgets.Output()

def update_plot(change=None):
    with output_plot:
        clear_output(wait=True)
        
        # Plot Close price + sentiment score
        fig = make_sentiment_price_figure(ts_df)
        fig.show()
    
    with output_metrics:
        clear_output(wait=True)
        metric = metric_dropdown.value
        value = results_summary.loc[0, metric]
        print(f"Sentiment model {metric}: {value:.4f}")

    with output_text:
        clear_output(wait=True)
        print("Explanation:")
        print("- We use Kaggle's Combined_News_DJIA dataset for daily news headlines.")
        print("- We calculate a sentiment score for each day using TextBlob.")
        print("- Positive sentiment generally corresponds to index up moves; negative to down moves.")
        print("- The table above shows classification performance vs the original Kaggle Label.")

def make_sentiment_price_figure(df):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df['Date'],
        y=df['Close'],
        mode='lines',
        name='DJIA Close'
    ))

    # Secondary axis via normalizing sentiment to price scale for visualization
    sent_norm = (df['sentiment_score'] - df['sentiment_score'].min()) / (
        df['sentiment_score'].max() - df['sentiment_score'].min() + 1e-9
    )
    sent_norm = sent_norm * (df['Close'].max() - df['Close'].min()) + df['Close'].min()

    fig.add_trace(go.Scatter(
        x=df['Date'],
        y=sent_norm,
        mode='lines',
        name='Sentiment (scaled)'
    ))

    fig.update_layout(
        title="DJIA Close vs News Sentiment (Kaggle Combined_News_DJIA)",
        xaxis_title="Date",
        yaxis_title="Index / Scaled Sentiment",
        hovermode='x unified'
    )

    return fig

def handle_upload(change):
    """
    Simple demonstration for 'upload data' requirement.
    In a real system, we would re-run sentiment pipeline on uploaded data.
    """
    if upload_widget.value:
        uploaded = list(upload_widget.value.values())[0]
        content = uploaded['content']
        import io
        user_df = pd.read_csv(io.BytesIO(content))
        print("User file uploaded with shape:", user_df.shape)
        print("Columns:", list(user_df.columns))
        print("For the sprint explanation, you can say:")
        print("- The dashboard supports uploading external news CSV files for analysis.")
        print("- Pipeline can be extended to compute sentiment on custom data.")

# Link callbacks
model_dropdown.observe(update_plot, names='value')
metric_dropdown.observe(update_plot, names='value')
upload_widget.observe(handle_upload, names='value')

# Initial display
update_plot()

dashboard = widgets.VBox([
    widgets.HBox([model_dropdown, metric_dropdown]),
    output_metrics,
    output_plot,
    widgets.HTML("<b>Upload your own news CSV (optional, same structure as Kaggle headlines):</b>"),
    upload_widget,
    output_text
])

display(dashboard)


News data: (1989, 27)
DJIA data: (1989, 7)


Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,"b""Georgia 'downs two Russian warplanes' as cou...",b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into So...,b'Russian tanks are moving towards the capital...,"b""Afghan children raped with 'impunity,' U.N. ...",b'150 Russian tanks have entered South Ossetia...,"b""Breaking: Georgia invades South Ossetia, Rus...","b""The 'enemy combatent' trials are nothing but...",...,b'Georgia Invades South Ossetia - if Russia ge...,b'Al-Qaeda Faces Islamist Backlash',"b'Condoleezza Rice: ""The US would not act to p...",b'This is a busy day: The European Union has ...,"b""Georgia will withdraw 1,000 soldiers from Ir...",b'Why the Pentagon Thinks Attacking Iran is a ...,b'Caucasus in crisis: Georgia invades South Os...,b'Indian shoe manufactory - And again in a se...,b'Visitors Suffering from Mental Illnesses Ban...,"b""No Help for Mexico's Kidnapping Surge"""
1,2008-08-11,1,b'Why wont America and Nato help us? If they w...,b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli ...",b'Georgian army flees in disarray as Russians ...,"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zea...,b'Russia angered by Israeli military sale to G...,b'An American citizen living in S.Ossetia blam...,...,b'Israel and the US behind the Georgian aggres...,"b'""Do not believe TV, neither Russian nor Geor...",b'Riots are still going on in Montreal (Canada...,b'China to overtake US as largest manufacturer',b'War in South Ossetia [PICS]',b'Israeli Physicians Group Condemns State Tort...,b' Russia has just beaten the United States ov...,b'Perhaps *the* question about the Georgia - R...,b'Russia is so much better at war',"b""So this is what it's come to: trading sex fo..."
2,2008-08-12,0,b'Remember that adorable 9-year-old who sang a...,"b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would hav...","b""Al-Qa'eda is losing support in Iraq because ...",b'Ceasefire in Georgia: Putin Outmaneuvers the...,b'Why Microsoft and Intel tried to kill the XO...,b'Stratfor: The Russo-Georgian War and the Bal...,"b""I'm Trying to Get a Sense of This Whole Geor...",...,b'U.S. troops still in Georgia (did you know t...,b'Why Russias response to Georgia was right',"b'Gorbachev accuses U.S. of making a ""serious ...","b'Russia, Georgia, and NATO: Cold War Two'",b'Remember that adorable 62-year-old who led y...,b'War in Georgia: The Israeli connection',b'All signs point to the US encouraging Georgi...,b'Christopher King argues that the US and NATO...,b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man..."
3,2008-08-13,0,b' U.S. refuses Israel weapons to attack Iran:...,"b""When the president ordered to attack Tskhinv...",b' Israel clears troops who killed Reuters cam...,b'Britain\'s policy of being tough on drugs is...,b'Body of 14 year old found in trunk; Latest (...,b'China has moved 10 *million* quake survivors...,"b""Bush announces Operation Get All Up In Russi...",b'Russian forces sink Georgian ships ',...,b'Elephants extinct by 2020?',b'US humanitarian missions soon in Georgia - i...,"b""Georgia's DDOS came from US sources""","b'Russian convoy heads into Georgia, violating...",b'Israeli defence minister: US against strike ...,b'Gorbachev: We Had No Choice',b'Witness: Russian forces head towards Tbilisi...,b' Quarter of Russians blame U.S. for conflict...,b'Georgian president says US military will ta...,b'2006: Nobel laureate Aleksander Solzhenitsyn...
4,2008-08-14,1,b'All the experts admit that we should legalis...,b'War in South Osetia - 89 pictures made by a ...,b'Swedish wrestler Ara Abrahamian throws away ...,b'Russia exaggerated the death toll in South O...,b'Missile That Killed 9 Inside Pakistan May Ha...,"b""Rushdie Condemns Random House's Refusal to P...",b'Poland and US agree to missle defense deal. ...,"b'Will the Russians conquer Tblisi? Bet on it,...",...,b'Bank analyst forecast Georgian crisis 2 days...,"b""Georgia confict could set back Russia's US r...",b'War in the Caucasus is as much the product o...,"b'""Non-media"" photos of South Ossetia/Georgia ...",b'Georgian TV reporter shot by Russian sniper ...,b'Saudi Arabia: Mother moves to block child ma...,b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s...",b'Darfur rebels accuse Sudan of mounting major...,b'Philippines : Peace Advocate say Muslims nee...


Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-07-01,17924.240234,18002.380859,17916.910156,17949.369141,82160000,17949.369141
1,2016-06-30,17712.759766,17930.609375,17711.800781,17929.990234,133030000,17929.990234
2,2016-06-29,17456.019531,17704.509766,17456.019531,17694.679688,106380000,17694.679688
3,2016-06-28,17190.509766,17409.720703,17190.509766,17409.720703,112190000,17409.720703
4,2016-06-27,17355.210938,17355.210938,17063.080078,17140.240234,138740000,17140.240234


Merged dataset: (1989, 4)


Unnamed: 0,Date,Label,combined_headlines,Close
0,2008-08-08,0,"b""Georgia 'downs two Russian warplanes' as cou...",11734.320312
1,2008-08-11,1,b'Why wont America and Nato help us? If they w...,11782.349609
2,2008-08-12,0,b'Remember that adorable 9-year-old who sang a...,11642.469727
3,2008-08-13,0,b' U.S. refuses Israel weapons to attack Iran:...,11532.959961
4,2008-08-14,1,b'All the experts admit that we should legalis...,11615.929688


Calculating sentiment scores... (this can take some seconds)
Sentiment Model Accuracy: 0.5244
Sentiment Model F1 Score: 0.6081


Unnamed: 0,Model,Accuracy,F1_Score
0,Sentiment (TextBlob),0.524384,0.608119


VBox(children=(HBox(children=(Dropdown(description='View:', options=('Sentiment vs Market',), value='Sentiment…