<a href="https://colab.research.google.com/github/rutripathi96/Financial_Sentiment_Analysis/blob/main/Financial_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Financial Sentiment Analysis Project

Welcome to my Financial Sentiment Analysis project on Google Colab! In this project, I leverage machine learning techniques to analyze sentiments related to financial data. The goal is to extract valuable insights from textual data, helping to understand market sentiment dynamics. Explore the notebook to dive into the world of sentiment analysis in the financial domain.






# Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing the dataset

In [None]:
dataset = pd.read_csv('Financial_Sentiment.csv')

# Text cleaning

In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
texts = []  #take all the cleans posts
for i in range(0,len(dataset)):
  text = re.sub('[^a-zA-Z]',' ',dataset['Sentence'][i])
  text = text.lower()
  text = text.split()
  ps = PorterStemmer()
  all_stopwords = stopwords.words('english')
  text = [ps.stem(word) for word in text if not word in set(all_stopwords)]
  text = ' '.join(text)
  texts.append(text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
print(texts)

['geosolut technolog leverag benefon gp solut provid locat base search technolog commun platform locat relev multimedia content new power commerci model', 'esi low bk real possibl', 'last quarter componenta net sale doubl eur eur period year earlier move zero pre tax profit pre tax loss eur', 'accord finnish russian chamber commerc major construct compani finland oper russia', 'swedish buyout firm sold remain percent stake almost eighteen month take compani public finland', 'spi surpris see green close', 'shell billion bg deal meet sharehold skeptic', 'ssh commun secur corp stock exchang releas octob pm compani updat full year outlook estim result remain loss full year', 'kone net sale rose year year first nine month', 'stockmann depart store total floor space squar metr stockmann invest project price tag eur million', 'circul revenu increas finland sweden', 'sap q disappoint softwar licens real problem cloud growth trail msft orcl goog crm adb http co jndphllzq', 'subdivis made sale r

# Creating the bag of words model

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(texts).toarray()
y = dataset.iloc[:,-1].values

In [None]:
print(y)

['positive' 'negative' 'positive' ... 'neutral' 'neutral' 'positive']


# Encoding the dependent variable vector

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)


In [None]:
print(y)

[2 0 2 ... 1 1 2]


# Splitting the data into training and testing set

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
print(X_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


# Training the logistic regression model

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
y_pred = classifier.predict(X_test)

# predicting the outcome of a single text

In [None]:
sentence = 'the company stocks are increasing amazingly well'
text = re.sub('[^a-zA-Z]',' ',sentence)
text = text.lower()
text = text.split()
ps = PorterStemmer()
all_stopwords = stopwords.words('english')
text = [ps.stem(word) for word in text if not word in set(all_stopwords)]
text = ' '.join(text)
new_corpus = [text]
new_X_test = cv.transform(new_corpus).toarray()
new_y_pred = classifier.predict(new_X_test)
print(new_y_pred)

[2]


# Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
print(cm)



[[ 42 118  32]
 [ 74 515  54]
 [ 13  89 232]]


In [None]:
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test,y_pred)
print(ac)

0.6749358426005133


# Training the KNN model

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 10)
classifier.fit(X_train,y_train)

In [None]:
y_pred = classifier.predict(X_test)

# predicting the outcome of a single text using KNN Model

In [None]:
sentence = 'the company stocks are increasing amazingly well'
text = re.sub('[^a-zA-Z]',' ',sentence)
text = text.lower()
text = text.split()
ps = PorterStemmer()
all_stopwords = stopwords.words('english')
text = [ps.stem(word) for word in text if not word in set(all_stopwords)]
text = ' '.join(text)
new_corpus = [text]
new_X_test = cv.transform(new_corpus).toarray()
new_y_pred = classifier.predict(new_X_test)
print(new_y_pred)

[1]


#  Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
print(cm)

[[ 15 158  19]
 [ 24 600  19]
 [  7 262  65]]


In [None]:
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test,y_pred)
print(ac)

0.58169375534645


# Training the naive bayes model

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train,y_train)

In [None]:
y_pred = classifier.predict(X_test)

# Predicting the outcome of a single text using Naive Byes

In [None]:
sentence = 'the company stocks are increasing amazingly superb well'
text = re.sub('[^a-zA-Z]',' ',sentence)
text = text.lower()
text = text.split()
ps = PorterStemmer()
all_stopwords = stopwords.words('english')
text = [ps.stem(word) for word in text if not word in set(all_stopwords)]
text = ' '.join(text)
new_corpus = [text]
new_X_test = cv.transform(new_corpus).toarray()
new_y_pred = classifier.predict(new_X_test)
print(new_y_pred)

[0]


# Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
print(cm)

[[ 98  46  48]
 [182 258 203]
 [107  61 166]]


In [None]:
from sklearn.metrics import accuracy_score
ac = accuracy_score(y_test,y_pred)
print(ac)

0.446535500427716
