In [None]:
import numpy as np
import pandas as pd

In [None]:
from google.colab import files
files.upload()

In [None]:
import pandas as pd

df = pd.read_csv("all-data.csv", encoding="ISO-8859-1", names=["Sentiment", "News"])
df.head()


In [None]:
df.shape

In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def clean_text(text):
    text = re.sub(r"[^a-zA-Z]", " ", text.lower())
    words = text.split()
    words = [stemmer.stem(w) for w in words if w not in stop_words]
    return " ".join(words)

df["Cleaned_News"] = df["News"].apply(clean_text)


In [None]:
df['Label'] = df['Sentiment'].map({'neutral': 0, 'positive': 1, 'negative': -1})


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=3000)
X = tfidf.fit_transform(df["Cleaned_News"]).toarray()
y = df["Label"]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
print("LR Accuracy:", accuracy_score(y_test, lr_model.predict(X_test)))


In [None]:
!pip install gradio


In [None]:
# Step 1: Install necessary libraries
!pip install -q gradio sklearn pandas nltk

# Step 2: Import packages
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
import gradio as gr

# Step 3: Download NLTK stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

# Step 4: Text cleaning function
def clean_text(text):
    text = re.sub(r"[^a-zA-Z]", " ", text.lower())
    words = text.split()
    words = [stemmer.stem(w) for w in words if w not in stop_words]
    return " ".join(words)

# Step 5: Load and preprocess data
url = "all-data.csv"
df = pd.read_csv(url, encoding="ISO-8859-1", names=["Sentiment", "News"])
df["Cleaned_News"] = df["News"].apply(clean_text)
df["Label"] = df["Sentiment"].map({"neutral": 0, "positive": 1, "negative": -1})

# Step 6: Vectorize and train model
tfidf = TfidfVectorizer(max_features=3000)
X = tfidf.fit_transform(df["Cleaned_News"])
y = df["Label"]

model = MultinomialNB()
model.fit(X, y)

# Step 7: Prediction function
def predict_sentiment(news):
    cleaned = clean_text(news)
    vect = tfidf.transform([cleaned])
    label = model.predict(vect)[0]
    return {1: "Positive ", 0: "Neutral ", -1: "Negative "}[label]

# Step 8: Gradio UI
iface = gr.Interface(fn=predict_sentiment,
                     inputs=gr.Textbox(lines=2, placeholder="Enter a financial news headline..."),
                     outputs="text",
                     title="Financial News Sentiment Analyzer",
                     description="Enter a headline to get its sentiment: Positive, Negative, or Neutral.")

# Step 9: Launch UI
iface.launch()
