In [4]:
# Import necessary libraries
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [8]:
# Function to clean tweet text
def clean_tweet(tweet):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
# Load dataset
data = pd.read_csv("test_train.csv")  # Replace with your training data path
# Clean the tweet content
data['cleaned_content'] = data['Content'].apply(clean_tweet)

In [10]:
# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['cleaned_content'])
y = data['Sentiment']  # Assuming the training data has a 'sentiment' column with labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
# Evaluate the model
y_pred = model.predict(X_test)
print(f"Model Accuracy: {accuracy_score(y_test, y_pred)}")

Model Accuracy: 0.92


In [16]:
# Function to predict the sentiment of a tweet
def get_tweet_sentiment(tweet, vectorizer, model):
    cleaned_tweet = clean_tweet(tweet)
    tweet_vector = vectorizer.transform([cleaned_tweet])
    sentiment = model.predict(tweet_vector)[0]
    return sentiment

In [17]:
# Load new tweets from a CSV file for prediction
def get_tweets_from_csv(file_path):
    try:
        data = pd.read_csv(file_path)
        tweets = data.to_dict(orient='records')
        for tweet in tweets:
            tweet['sentiment'] = get_tweet_sentiment(tweet['content'], vectorizer, model)
        return tweets
    except Exception as e:
        print(f"Error reading CSV: {str(e)}")
        return []

In [20]:
# Path to the new CSV file containing tweets to analyze
new_csv_path = "tweets_500.csv"  # Replace with the path to your new CSV file
predicted_tweets = get_tweets_from_csv(new_csv_path)


In [21]:
# Display the results
for tweet in predicted_tweets:
    print(f"Tweet: {tweet['content']}\nSentiment: {tweet['sentiment']}\n")

Tweet: Is history repeating itself...?#DONTNORMALIZEHATE https://t.co/ngG11quhmK
Sentiment: Positive

Tweet: @barackobama Thank you for your incredible grace in leadership and for being an exceptional… https://t.co/ZuQLZpt6df
Sentiment: Positive

Tweet: Life goals. https://t.co/XIn1qKMKQl
Sentiment: Positive

Tweet: Me right now 🙏🏻 https://t.co/gW55C1wrwd
Sentiment: Positive

Tweet: SISTERS ARE DOIN' IT FOR THEMSELVES! 🙌🏻💪🏻❤️ https://t.co/0shuUYUBEv
Sentiment: Positive

Tweet: happy 96th gma #fourmoreyears! 🎈 @ LACMA Los Angeles County Museum of Art https://t.co/M9n7X8xdmA
Sentiment: Positive

Tweet: Kyoto, Japan 
1. 5. 17. https://t.co/o28M0vw9lR
Sentiment: Positive

Tweet: 🇯🇵 @ Sanrio Puroland https://t.co/eXVev5UMBx
Sentiment: Positive

Tweet: 2017 resolution: to embody authenticity!
Sentiment: Positive

Tweet: sisters. https://t.co/5ZE21x2aNk
Sentiment: Positive

Tweet: Happy Holidays! Sending love and light to every corner of the earth 🎁
Sentiment: Positive

Tweet: Damn, it's hard