# Stock Movement Analysis

In [39]:
# Importing required libraries
import praw
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [40]:
# Reddit API credentials
reddit = praw.Reddit(
    client_id='wLMcSW8KYWa3WtT8Eqcruw',
    client_secret='TB26qbgfYxtlBQNAXRsYemIxwbavhw',
    user_agent='Stock Movement Analysis',
    username='Puzzleheaded_22514',
    password='Sakshi@22514'
)

In [41]:
print(reddit.read_only) # Should print False if connected successfull

False


In [42]:
# Scraping data from r/stocks
def scrape_reddit(subreddit_name, limit=500):
    subreddit = reddit.subreddit(subreddit_name)
    posts = []
    for post in subreddit.hot(limit=limit):
        posts.append({
            'title': post.title,
            'selftext': post.selftext,
            'created_utc': post.created_utc
        })
    return pd.DataFrame(posts)

In [43]:
data = scrape_reddit('stocks')
data.head()

Unnamed: 0,title,selftext,created_utc
0,Rate My Portfolio - r/Stocks Quarterly Thread ...,Please use this thread to discuss your portfol...,1733047000.0
1,r/Stocks Daily Discussion & Options Trading Th...,"This is the daily discussion, so anything stoc...",1733395000.0
2,"Waymo to expand to Miami, aims to launch robot...",Waymo is setting its sights on its next locati...,1733415000.0
3,Gelsinger's push into software and edge comput...,Intel was making some interesting moves into s...,1733468000.0
4,Many stocks still down 50-90% from 2021 highs.,Lately I have been sifting through a lot of st...,1733400000.0


In [44]:
# Combining title and selftext for analysis
data['content'] = data['title'] + ' ' + data['selftext']
data.drop(['title', 'selftext'], axis=1, inplace=True)

In [45]:
data = data.dropna(subset=['content'])

In [46]:
# Converting timestamp to datetime
data['date'] = pd.to_datetime(data['created_utc'], unit='s')
data.drop(['created_utc'], axis=1, inplace=True)

In [47]:
data.head()

Unnamed: 0,content,date
0,Rate My Portfolio - r/Stocks Quarterly Thread ...,2024-12-01 10:00:22
1,r/Stocks Daily Discussion & Options Trading Th...,2024-12-05 10:30:13
2,"Waymo to expand to Miami, aims to launch robot...",2024-12-05 16:06:20
3,Gelsinger's push into software and edge comput...,2024-12-06 06:55:35
4,Many stocks still down 50-90% from 2021 highs....,2024-12-05 12:02:09


In [48]:
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Paras\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [49]:
data['sentiment'] = data['content'].apply(lambda x: sid.polarity_scores(x)['compound'])

In [50]:
data['sentiment_label'] = data['sentiment'].apply(
    lambda x: 'positive' if x > 0 else ('negative' if x < 0 else 'neutral')
)

In [51]:
data.head()

Unnamed: 0,content,date,sentiment,sentiment_label
0,Rate My Portfolio - r/Stocks Quarterly Thread ...,2024-12-01 10:00:22,0.9416,positive
1,r/Stocks Daily Discussion & Options Trading Th...,2024-12-05 10:30:13,0.9706,positive
2,"Waymo to expand to Miami, aims to launch robot...",2024-12-05 16:06:20,0.4735,positive
3,Gelsinger's push into software and edge comput...,2024-12-06 06:55:35,0.3736,positive
4,Many stocks still down 50-90% from 2021 highs....,2024-12-05 12:02:09,0.9483,positive


In [52]:
# Extracting relevant features
data['word_count'] = data['content'].apply(lambda x: len(x.split()))
data['char_count'] = data['content'].apply(lambda x: len(x))

In [53]:
# Preparing data for modeling
X = data[['sentiment', 'word_count', 'char_count']]
y = data['sentiment_label']
y = y.map({'positive': 1, 'negative': -1, 'neutral': 0})

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
# Training Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [56]:
y_pred = model.predict(X_test)

In [57]:
# Evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

          -1       1.00      1.00      1.00         6
           1       1.00      1.00      1.00        26

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32



In [None]:
# Sentiment distribution
sns.countplot(x=data['sentiment_label'], palette='viridis')
plt.title('Sentiment Distribution')
plt.show()