# Sentiment Analysis using Random Forest
This notebook demonstrates sentiment analysis on simulated LinkedIn post data using the Random Forest algorithm.

In [None]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Step 2: Simulated LinkedIn-style Sentiment Data
data = {
    'text': [
        "I love how data science is changing the world!",
        "Feeling confused about my career direction.",
        "Great networking event on LinkedIn yesterday!",
        "Not happy with the current job market trends.",
        "Excited to start my new data analyst role!",
        "This article was hard to follow and too technical.",
        "Enjoying the connections and opportunities here.",
        "Disappointed by the lack of response on applications.",
        "Proud to share that I completed a certification today!",
        "The experience was underwhelming and poorly managed."
    ],
    'sentiment': ['Positive', 'Negative', 'Positive', 'Negative', 'Positive', 
                  'Negative', 'Positive', 'Negative', 'Positive', 'Negative']
}
df = pd.DataFrame(data)
df.head()

In [None]:
# Step 3: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)

In [None]:
# Step 4: TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)

In [None]:
# Step 5: Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_vec, y_train)
y_pred = model.predict(X_test_vec)

In [None]:
# Step 6: Evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Step 7: Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=model.classes_, yticklabels=model.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()