In [3]:
# # Festus.ipynb

# # =========================================
# # 0. Import Necessary Libraries
# # =========================================
# import pandas as pd
# import numpy as np
# import re
# import matplotlib.pyplot as plt
# import seaborn as sns
# import nltk
# from nltk.sentiment.vader import SentimentIntensityAnalyzer
# from wordcloud import WordCloud
# from sklearn.feature_extraction.text import CountVectorizer
# from sklearn.decomposition import LatentDirichletAllocation

# # Set plot styles for better aesthetics
# sns.set(style='whitegrid')
# plt.rcParams['figure.figsize'] = (12, 8)

# # Download the VADER lexicon for sentiment analysis
# nltk.download('vader_lexicon')

# # =========================================
# # 1. Load and Preprocess Tweet Data
# # =========================================

# # Load tweet data from 'Tesla.csv'
# try:
#     data = pd.read_csv('Tesla.csv')
#     print("Tesla.csv loaded successfully.")
# except FileNotFoundError:
#     raise FileNotFoundError("Tesla.csv file not found in the current directory.")

# # Display the first few rows to verify
# print("\nSample Tweet Data:")
# print(data.head())

# # Ensure necessary columns are present
# required_columns = ['tweet', 'date']
# missing_columns = [col for col in required_columns if col not in data.columns]
# if missing_columns:
#     raise ValueError(f"Missing columns in Tesla.csv: {missing_columns}")

# # Convert 'date' column to datetime
# data['date'] = pd.to_datetime(data['date'], errors='coerce')

# # Check for 'created_at' column for temporal analysis
# has_created_at = 'created_at' in data.columns

# if has_created_at:
#     data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce')

# # Preprocessing function for tweets
# def preprocess_tweet(tweet):
#     tweet = str(tweet)
#     tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet)      # Remove URLs
#     tweet = re.sub(r'@\w+', '', tweet)                         # Remove mentions
#     tweet = re.sub(r'#', '', tweet)                            # Remove hash symbol
#     tweet = re.sub(r'[^\w\s]', '', tweet)                      # Remove special characters/punctuation
#     tweet = re.sub(r'\d+', '', tweet)                          # Remove numbers
#     tweet = tweet.strip()                                      # Remove leading/trailing whitespace
#     return tweet

# # Apply preprocessing to tweets
# data['cleaned_tweet'] = data['tweet'].apply(preprocess_tweet)

# # =========================================
# # 2. Sentiment Analysis with VADER
# # =========================================

# # Initialize VADER sentiment analyzer
# sentiment_analyzer = SentimentIntensityAnalyzer()

# # Calculate sentiment scores for each tweet
# sentiment_scores = data['cleaned_tweet'].apply(sentiment_analyzer.polarity_scores)

# # Extract individual sentiment scores into separate columns
# data['compound_score'] = sentiment_scores.apply(lambda x: x['compound'])
# data['positive_score'] = sentiment_scores.apply(lambda x: x['pos'])
# data['negative_score'] = sentiment_scores.apply(lambda x: x['neg'])
# data['neutral_score'] = sentiment_scores.apply(lambda x: x['neu'])

# # Categorize tweets based on compound scores
# def categorize_sentiment(score):
#     if score >= 0.05:
#         return 'Positive'
#     elif score <= -0.05:
#         return 'Negative'
#     else:
#         return 'Neutral'

# data['sentiment_category'] = data['compound_score'].apply(categorize_sentiment)

# # Display sentiment distribution
# print("\nSentiment Category Counts:")
# print(data['sentiment_category'].value_counts())

# # =========================================
# # 3. Descriptive Sentiment Analysis
# # =========================================

# # Overall sentiment distribution
# sentiment_counts = data['sentiment_category'].value_counts(normalize=True) * 100

# # Pie Chart of Sentiment Distribution
# plt.figure(figsize=(8,6))
# plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', 
#         colors=['#66b3ff','#ff9999','#99ff99'], startangle=140)
# plt.title('Sentiment Distribution of TSLA Tweets (Single Day)')
# plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
# plt.tight_layout()
# plt.savefig("sentiment_distribution_pie.png", dpi=300)
# plt.close()
# print("\nSentiment distribution pie chart saved as 'sentiment_distribution_pie.png'.")

# # Distribution of compound scores
# plt.figure(figsize=(10,6))
# sns.histplot(data['compound_score'], bins=20, kde=True, color='skyblue')
# plt.title('Distribution of Compound Sentiment Scores')
# plt.xlabel('Compound Score')
# plt.ylabel('Number of Tweets')
# plt.grid(True)
# plt.tight_layout()
# plt.savefig("compound_score_distribution.png", dpi=300)
# plt.close()
# print("Compound score distribution histogram saved as 'compound_score_distribution.png'.")

# # Top 5 Positive Tweets
# top_positive = data.sort_values(by='compound_score', ascending=False).head(5)
# print("\nTop 5 Positive Tweets:")
# print(top_positive[['tweet', 'compound_score']].to_string(index=False))

# # Top 5 Negative Tweets
# top_negative = data.sort_values(by='compound_score').head(5)
# print("\nTop 5 Negative Tweets:")
# print(top_negative[['tweet', 'compound_score']].to_string(index=False))

# # =========================================
# # 4. Temporal Analysis (If 'created_at' Available)
# # =========================================

# if has_created_at:
#     # Check if 'created_at' has valid timestamps
#     if data['created_at'].notnull().any():
#         # Extract hour from 'created_at'
#         data['hour'] = data['created_at'].dt.hour
        
#         # Aggregate average compound scores per hour
#         hourly_sentiment = data.groupby('hour')['compound_score'].mean().reset_index()
        
#         # Line Plot of Hourly Sentiment
#         plt.figure(figsize=(12,6))
#         sns.lineplot(data=hourly_sentiment, x='hour', y='compound_score', marker='o', color='purple')
#         plt.title('Hourly Average Compound Sentiment Scores')
#         plt.xlabel('Hour of Day')
#         plt.ylabel('Average Compound Sentiment Score')
#         plt.xticks(range(0,24))
#         plt.grid(True)
#         plt.tight_layout()
#         plt.savefig("hourly_sentiment.png", dpi=300)
#         plt.close()
#         print("Hourly sentiment line plot saved as 'hourly_sentiment.png'.")
#     else:
#         print("No valid timestamps in 'created_at' to perform hourly analysis.")
# else:
#     print("No 'created_at' column present. Skipping temporal analysis.")

# # =========================================
# # 5. Content Analysis - Word Clouds
# # =========================================

# # Function to generate and save word clouds
# def generate_wordcloud(text_array, title, filename):
#     text_combined = ' '.join(text_array)
#     wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_combined)
#     plt.figure(figsize=(10,5))
#     plt.imshow(wordcloud, interpolation='bilinear')
#     plt.title(title, fontsize=20)
#     plt.axis('off')
#     plt.tight_layout()
#     plt.savefig(filename, dpi=300)
#     plt.close()
#     print(f"Word cloud '{filename}' generated and saved.")

# # Generate word cloud for Positive Tweets
# positive_tweets = data[data['sentiment_category'] == 'Positive']['cleaned_tweet'].values
# if len(positive_tweets) > 0:
#     generate_wordcloud(positive_tweets, 'Word Cloud of Positive Tweets', "positive_wordcloud.png")
# else:
#     print("No positive tweets available to generate a word cloud.")

# # Generate word cloud for Negative Tweets
# negative_tweets = data[data['sentiment_category'] == 'Negative']['cleaned_tweet'].values
# if len(negative_tweets) > 0:
#     generate_wordcloud(negative_tweets, 'Word Cloud of Negative Tweets', "negative_wordcloud.png")
# else:
#     print("No negative tweets available to generate a word cloud.")

# # =========================================
# # 6. Topic Modeling (LDA) on All Tweets
# # =========================================

# # Prepare data for LDA
# all_tweets = data['cleaned_tweet'].values

# # Vectorize the text data
# vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
# dtm = vectorizer.fit_transform(all_tweets)

# # Initialize and fit LDA model with 5 topics
# lda = LatentDirichletAllocation(n_components=5, random_state=42)
# lda.fit(dtm)

# # Display the top words for each topic
# print("\nTopics Identified via LDA:")
# for index, topic in enumerate(lda.components_):
#     top_features = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]]
#     print(f"Topic #{index + 1}: {', '.join(top_features)}")

# # =========================================
# # 7. Load and Inspect Stock Data
# # =========================================

# # Load stock data from 'TSLA_stock_data.csv'
# try:
#     stock_data = pd.read_csv('TSLA_stock_data.csv')
#     print("\nTSLA_stock_data.csv loaded successfully.")
# except FileNotFoundError:
#     raise FileNotFoundError("TSLA_stock_data.csv file not found in the current directory.")

# # Display the first few rows to verify
# print("\nSample Stock Data:")
# print(stock_data.head())

# # Ensure necessary columns are present
# required_stock_columns = ['Date', 'Close']
# missing_stock_columns = [col for col in required_stock_columns if col not in stock_data.columns]
# if missing_stock_columns:
#     raise ValueError(f"Missing columns in TSLA_stock_data.csv: {missing_stock_columns}")

# # Convert 'Date' to datetime
# stock_data['Date'] = pd.to_datetime(stock_data['Date'], errors='coerce')

# # Ensure 'Close' is numeric
# stock_data['Close'] = pd.to_numeric(stock_data['Close'], errors='coerce')

# # Drop rows with missing 'Close' or 'Date'
# stock_data.dropna(subset=['Close', 'Date'], inplace=True)

# # Sort stock data by date
# stock_data.sort_values('Date', inplace=True)

# # Calculate daily returns
# stock_data['Return'] = stock_data['Close'].pct_change()

# # Display the last 5 rows of stock data
# print("\nProcessed Stock Data with Returns:")
# print(stock_data.tail())

# # =========================================
# # 8. Acknowledging Correlation Limitation
# # =========================================

# print("\nNote: Correlation analysis between sentiment and stock performance is not feasible with only one day of sentiment data.")
# print("For correlation, multiple days of sentiment and corresponding stock returns are required.")
# print("However, you can still analyze and interpret the sentiment distribution and content for the single day.")

# # =========================================
# # 9. Save the DataFrames (Optional)
# # =========================================

# # Uncomment the following lines if you wish to save the processed data for future use.

# # data.to_csv('Processed_Tesla_Tweets.csv', index=False)
# # stock_data.to_csv('Processed_TSLA_stock_data.csv', index=False)
# # print("\nProcessed data saved as 'Processed_Tesla_Tweets.csv' and 'Processed_TSLA_stock_data.csv'.")

# # =========================================
# # 10. Summary of Outputs
# # =========================================

# print("\nAnalysis Complete.")
# print("Generated the following visualizations:")
# print("- 'sentiment_distribution_pie.png'")
# print("- 'compound_score_distribution.png'")
# if has_created_at and 'hour' in data.columns:
#     print("- 'hourly_sentiment.png'")
# if len(positive_tweets) > 0:
#     print("- 'positive_wordcloud.png'")
# if len(negative_tweets) > 0:
#     print("- 'negative_wordcloud.png'")
# print("\nTop positive and negative tweets have been printed to the console.")
# print("Identified topics via LDA have been printed to the console.")
# print("Sample and processed stock data have been printed to the console.")

# =========================================
# 0. Import Necessary Libraries
# =========================================
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from wordcloud import WordCloud
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Set plot styles for better aesthetics
sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

# Download the VADER lexicon for sentiment analysis
try:
    nltk.download('vader_lexicon')
except:
    print("Could not download VADER lexicon. Please ensure it is available.")

# =========================================
# 1. Load and Preprocess Tweet Data
# =========================================

# Load tweet data from 'Tesla.csv'
try:
    data = pd.read_csv('Tesla.csv')
    print("Tesla.csv loaded successfully.")
except FileNotFoundError:
    raise FileNotFoundError("Tesla.csv file not found in the current directory.")

# Display the first few rows to verify
print("\nSample Tweet Data:")
print(data.head())

# Ensure necessary columns are present
required_columns = ['tweet', 'date']
missing_columns = [col for col in required_columns if col not in data.columns]
if missing_columns:
    raise ValueError(f"Missing columns in Tesla.csv: {missing_columns}")

# Convert 'date' column to datetime
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Check for 'created_at' column for temporal analysis
has_created_at = 'created_at' in data.columns

if has_created_at:
    try:
        data['created_at'] = pd.to_datetime(data['created_at'], unit='ms', errors='coerce')
    except Exception as e:
        print(f"Error converting 'created_at': {e}")

# Preprocessing function for tweets
def preprocess_tweet(tweet):
    tweet = str(tweet)
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet)      # Remove URLs
    tweet = re.sub(r'@\w+', '', tweet)                         # Remove mentions
    tweet = re.sub(r'#', '', tweet)                            # Remove hash symbol
    tweet = re.sub(r'[^\w\s]', '', tweet)                      # Remove special characters/punctuation
    tweet = re.sub(r'\d+', '', tweet)                          # Remove numbers
    tweet = tweet.strip()                                      # Remove leading/trailing whitespace
    return tweet

# Apply preprocessing to tweets
data['cleaned_tweet'] = data['tweet'].apply(preprocess_tweet)

# =========================================
# 2. Sentiment Analysis with VADER
# =========================================

# Initialize VADER sentiment analyzer
sentiment_analyzer = SentimentIntensityAnalyzer()

# Calculate sentiment scores for each tweet
sentiment_scores = data['cleaned_tweet'].apply(sentiment_analyzer.polarity_scores)

# Extract individual sentiment scores into separate columns
data['compound_score'] = sentiment_scores.apply(lambda x: x['compound'])
data['positive_score'] = sentiment_scores.apply(lambda x: x['pos'])
data['negative_score'] = sentiment_scores.apply(lambda x: x['neg'])
data['neutral_score'] = sentiment_scores.apply(lambda x: x['neu'])

# Categorize tweets based on compound scores
def categorize_sentiment(score):
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

data['sentiment_category'] = data['compound_score'].apply(categorize_sentiment)

# Display sentiment distribution
print("\nSentiment Category Counts:")
print(data['sentiment_category'].value_counts())

# =========================================
# 3. Descriptive Sentiment Analysis
# =========================================

# Overall sentiment distribution
sentiment_counts = data['sentiment_category'].value_counts(normalize=True) * 100

# Pie Chart of Sentiment Distribution
plt.figure(figsize=(8,6))
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', 
        colors=['#66b3ff','#ff9999','#99ff99'], startangle=140)
plt.title('Sentiment Distribution of TSLA Tweets (Single Day)')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.tight_layout()
plt.savefig("sentiment_distribution_pie.png", dpi=300)
plt.close()
print("\nSentiment distribution pie chart saved as 'sentiment_distribution_pie.png'.")

# Distribution of compound scores
plt.figure(figsize=(10,6))
sns.histplot(data['compound_score'], bins=20, kde=True, color='skyblue')
plt.title('Distribution of Compound Sentiment Scores')
plt.xlabel('Compound Score')
plt.ylabel('Number of Tweets')
plt.grid(True)
plt.tight_layout()
plt.savefig("compound_score_distribution.png", dpi=300)
plt.close()
print("Compound score distribution histogram saved as 'compound_score_distribution.png'.")

# Top 5 Positive Tweets
top_positive = data.sort_values(by='compound_score', ascending=False).head(5)
print("\nTop 5 Positive Tweets:")
print(top_positive[['tweet', 'compound_score']].to_string(index=False))

# Top 5 Negative Tweets
top_negative = data.sort_values(by='compound_score').head(5)
print("\nTop 5 Negative Tweets:")
print(top_negative[['tweet', 'compound_score']].to_string(index=False))

# =========================================
# 4. Temporal Analysis (If 'created_at' Available)
# =========================================

if has_created_at:
    # Check if 'created_at' has valid timestamps
    if data['created_at'].notnull().any():
        # Extract hour from 'created_at'
        data['hour'] = data['created_at'].dt.hour
        
        # Aggregate average compound scores per hour
        hourly_sentiment = data.groupby('hour')['compound_score'].mean().reset_index()
        
        # Line Plot of Hourly Sentiment
        plt.figure(figsize=(12,6))
        sns.lineplot(data=hourly_sentiment, x='hour', y='compound_score', marker='o', color='purple')
        plt.title('Hourly Average Compound Sentiment Scores')
        plt.xlabel('Hour of Day')
        plt.ylabel('Average Compound Sentiment Score')
        plt.xticks(range(0,24))
        plt.grid(True)
        plt.tight_layout()
        plt.savefig("hourly_sentiment.png", dpi=300)
        plt.close()
        print("Hourly sentiment line plot saved as 'hourly_sentiment.png'.")
    else:
        print("No valid timestamps in 'created_at' to perform hourly analysis.")
else:
    print("No 'created_at' column present. Skipping temporal analysis.")

# =========================================
# 5. Content Analysis - Word Clouds
# =========================================

# Function to generate and save word clouds
def generate_wordcloud(text_array, title, filename):
    text_combined = ' '.join(text_array)
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_combined)
    plt.figure(figsize=(10,5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.title(title, fontsize=20)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(filename, dpi=300)
    plt.close()
    print(f"Word cloud '{filename}' generated and saved.")

# Generate word cloud for Positive Tweets
positive_tweets = data[data['sentiment_category'] == 'Positive']['cleaned_tweet'].values
if len(positive_tweets) > 0:
    generate_wordcloud(positive_tweets, 'Word Cloud of Positive Tweets', "positive_wordcloud.png")
else:
    print("No positive tweets available to generate a word cloud.")

# Generate word cloud for Negative Tweets
negative_tweets = data[data['sentiment_category'] == 'Negative']['cleaned_tweet'].values
if len(negative_tweets) > 0:
    generate_wordcloud(negative_tweets, 'Word Cloud of Negative Tweets', "negative_wordcloud.png")
else:
    print("No negative tweets available to generate a word cloud.")

# =========================================
# 6. Topic Modeling (LDA) on All Tweets
# =========================================

# Prepare data for LDA
all_tweets = data['cleaned_tweet'].values

# Vectorize the text data
vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
dtm = vectorizer.fit_transform(all_tweets)

# Initialize and fit LDA model with 5 topics
lda = LatentDirichletAllocation(n_components=5, random_state=42)
lda.fit(dtm)

# Display the top words for each topic
print("\nTopics Identified via LDA:")
for index, topic in enumerate(lda.components_):
    top_features = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]]
    print(f"Topic #{index + 1}: {', '.join(top_features)}")

# =========================================
# 7. Load and Inspect Stock Data
# =========================================

# Load stock data from 'TSLA_stock_data.csv'
try:
    stock_data = pd.read_csv('TSLA_stock_data.csv')
    print("\nTSLA_stock_data.csv loaded successfully.")
except FileNotFoundError:
    raise FileNotFoundError("TSLA_stock_data.csv file not found in the current directory.")

# Display the first few rows to verify
print("\nSample Stock Data:")
print(stock_data.head())

# Ensure necessary columns are present
required_stock_columns = ['Date', 'Close']
missing_stock_columns = [col for col in required_stock_columns if col not in stock_data.columns]
if missing_stock_columns:
    raise ValueError(f"Missing columns in TSLA_stock_data.csv: {missing_stock_columns}")

# Convert 'Date' to datetime
stock_data['Date'] = pd.to_datetime(stock_data['Date'], errors='coerce')

# Ensure 'Close' is numeric
stock_data['Close'] = pd.to_numeric(stock_data['Close'], errors='coerce')

# Drop rows with missing 'Close' or 'Date'
stock_data.dropna(subset=['Close', 'Date'], inplace=True)

# Sort stock data by date
stock_data.sort_values('Date', inplace=True)

# Calculate daily returns
stock_data['Return'] = stock_data['Close'].pct_change()

# Display the last 5 rows of stock data
print("\nProcessed Stock Data with Returns:")
print(stock_data.tail())

# =========================================
# 8. Acknowledging Correlation Limitation
# =========================================

print("\nNote: Correlation analysis between sentiment and stock performance is not feasible with only one day of sentiment data.")
print("For correlation, multiple days of sentiment and corresponding stock returns are required.")
print("However, you can still analyze and interpret the sentiment distribution and content for the single day.")

# =========================================
# 9. Save the DataFrames (Optional)
# =========================================

# Uncomment the following lines if you wish to save the processed data for future use.

# data.to_csv('Processed_Tesla_Tweets.csv', index=False)
# stock_data.to_csv('Processed_TSLA_stock_data.csv', index=False)
# print("\nProcessed data saved as 'Processed_Tesla_Tweets.csv' and 'Processed_TSLA_stock_data.csv'.")

# =========================================
# 10. Summary of Outputs
# =========================================

print("\nAnalysis Complete.")
print("Generated the following visualizations:")
print("- 'sentiment_distribution_pie.png'")
print("- 'compound_score_distribution.png'")
if has_created_at and 'hour' in data.columns:
    print("- 'hourly_sentiment.png'")
if len(positive_tweets) > 0:
    print("- 'positive_wordcloud.png'")
if len(negative_tweets) > 0:
    print("- 'negative_wordcloud.png'")
print("\nTop positive and negative tweets have been printed to the console.")
print("Identified topics via LDA have been printed to the console.")
print("Sample and processed stock data have been printed to the console.")



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\folan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Tesla.csv loaded successfully.

Sample Tweet Data:
   Unnamed: 0                   id      conversation_id    created_at  \
0           0  1546541426317590528  1545826164564000768  1.657559e+12   
1           1  1546541415857102850  1545826164564000768  1.657559e+12   
2           2  1546541411897581568  1546541411897581568  1.657559e+12   
3           3  1546541379110805508  1546340000500813824  1.657559e+12   
4           4  1546541363470028800  1546541363470028800  1.657559e+12   

                  date  timezone place  \
0  2022-07-11 17:06:24         0   NaN   
1  2022-07-11 17:06:21         0   NaN   
2  2022-07-11 17:06:20         0   NaN   
3  2022-07-11 17:06:12         0   NaN   
4  2022-07-11 17:06:09         0   NaN   

                                               tweet language  \
0  @GailAlfarATX @elonmusk @Tesla @teslacn @Tesla...       en   
1  @elonmusk @GailAlfarATX @Tesla @teslacn @Tesla...       en   
2  @elonmusk #Think about buying a country , #Mex...       en 

  with pd.option_context('mode.use_inf_as_na', True):


Compound score distribution histogram saved as 'compound_score_distribution.png'.

Top 5 Positive Tweets:
                                                                                                                                                                                                                                                                                                       tweet  compound_score
                            @quineatal A great portion of wealthy people can trace their success to luck but the greatest part succeeds through hard work. Life is a choice and I made mine which is hard work because Newton, Einstein, and Tesla are my heroes. Sometimes, luck= opportunity + preparation          0.9829
A mind of Einstein and Tesla , a pure rising talent in India , a pure talent house of a passionate actor and most importantly a passionate human being whose heart is filled with love , affection and kindness. This is our Sushant Singh Rajput  IntellectualPower

  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):


Hourly sentiment line plot saved as 'hourly_sentiment.png'.
Word cloud 'positive_wordcloud.png' generated and saved.
Word cloud 'negative_wordcloud.png' generated and saved.

Topics Identified via LDA:
Topic #1: plaid, auto, new, nikola, di, nie, amp, speed, model, tesla
Topic #2: da, ceo, el, la, en, que, elon, twitter, musk, tesla
Topic #3: das, el, es, der, und, en, die, que, la, tesla
Topic #4: car, dont, musk, buy, like, elon, stock, just, twitter, tesla
Topic #5: teslas, model, children, people, future, car, new, electric, ev, tesla

TSLA_stock_data.csv loaded successfully.

Sample Stock Data:
         Date Adj Close   Close    High     Low    Open     Volume
0         NaN      TSLA    TSLA    TSLA    TSLA    TSLA       TSLA
1  2022-07-05    233.07  233.07  233.15  216.17   223.0   84581100
2  2022-07-06    231.73  231.73  234.56  227.19  230.78   71853600
3  2022-07-07    244.54  244.54  245.36  232.21  233.92   81930600
4  2022-07-08    250.76  250.76  254.98  241.16  242.33  1