### **Importing Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from scipy import stats as stat

%matplotlib inline
sns.set()

# To avoid Warning message inbetween ...
import warnings
warnings.filterwarnings('ignore')

### **Importing Data**

In [None]:
company_tweet = pd.read_csv('/kaggle/input/tweets-about-the-top-companies-from-2015-to-2020/Company_Tweet.csv')
tweet = pd.read_csv('/kaggle/input/tweets-about-the-top-companies-from-2015-to-2020/Tweet.csv')
company_stocks = pd.read_csv('/kaggle/input/values-of-top-nasdaq-copanies-from-2010-to-2020/CompanyValues.csv')

### **Exploratory Data Analysis (EDA)**

In [None]:
# Quick Analysis on Dataset : DataTypes, Rows and Columns ,Null values, Unique values ...

def quick_analysis(df):
    print("-------------****----------------\nData info:\n")
    print(df.info())

    print("\n\n-------------****----------------\nData Types:\n")
    print(df.dtypes)

    print("\n\n-------------****----------------\nRows and Columns:\n")
    print(df.shape)

    print("\n\n-------------****----------------\nColumn names:\n")
    print(df.columns)

    print("\n\n-------------****----------------\nNull Values:\n")
    print(df.apply(lambda x: sum(x.isnull()) / len(df)))

    print("\n\n-------------****----------------\nUnique values:\n")
    print(df.nunique())

    print("\n\n-------------****----------------\nDescribes:\n")
    print(df.describe())


In [None]:
quick_analysis(company_tweet)

In [None]:
company_tweet

In [None]:
quick_analysis(tweet)

In [None]:
tweet

In both "**company_tweet**" and "**tweet**" data sets have comman attribute called **tweet_id**. So, It can be merged company_tweet and tweet dataframes via tweet_id to new dataframe which is "**company_tweet_new**" dataframe.

In [None]:
company_tweet_new = pd.merge(company_tweet , tweet , on="tweet_id", how= "inner")
company_tweet_new

In [None]:
quick_analysis(company_stocks)

In [None]:
company_stocks

With **company_stocks** and **company_tweet_new** dataframe, it can be joined them by "**ticker_symbol**" column with "**post_date**" in tweet_df and "**day_date**" in company_value correspondingly when analyzing.

***Total engangement***

It can be supposed that **total of comment number, retweet number, and like number** shows the impact of that tweet on the community. Because only influential articles can affect the stock price of that company.

So we will add all total of comment, number, and like to a new column which has a new name **total_engangement**.

In [None]:
company_tweet_new["total_engangement"] = company_tweet_new["comment_num"] + company_tweet_new["retweet_num"] + company_tweet_new["like_num"]

Now, It can be ploted the histogram of "total_engagement" on tweets and **filter the low-impact tweet**.

In [None]:
def column_distplot(df, column_name, bins=50, describe=True):
    if describe:
        print(df[column_name].describe())
    plt.figure(figsize=(9, 7))
    sns.distplot(df[column_name], color='b', bins=bins, hist_kws={'alpha': 0.4});

In [None]:
column_distplot(company_tweet_new, column_name="total_engangement")

**To obtain high-impact** tweets, we will drop rows which has a value less than engangement_threshold count.

**engagement_threshold** is a heuristic parameter that we chose.

In [None]:
engagement_threshold = 40

In [None]:
company_tweet_new = company_tweet_new[company_tweet_new["total_engangement"] > engagement_threshold]
company_tweet_new.shape

**Day date**

Converting "post_date" in tweet_df and "day_date" in company_value to datetime type

In [None]:
company_tweet_new['post_date'] = pd.to_datetime(company_tweet_new['post_date'], unit='s')
company_tweet_new['post_date']

Because we just care about day of post then compare with stock data in day. So we will get create new columne which names "**day_date**".

In [None]:
company_tweet_new['day_date'] = pd.to_datetime(company_tweet_new['post_date'].apply(lambda date: date.date()))
company_tweet_new['day_date']

In [None]:
company_tweet_new.columns

Then, "company_tweet_new" **data set is sorted** based on 'day_date'

In [None]:
company_tweet_new = company_tweet_new.sort_values(by="day_date")
company_tweet_new

We will drop some column which are "post_date", "tweet_id", "comment_num", "retweet_num", and "like_num". Because we do not use it anymore.

In [None]:
company_tweet_new = company_tweet_new.drop(['tweet_id', 'post_date', 'comment_num', 'retweet_num', 'like_num'], axis=1)

In [None]:
company_stocks['day_date']  = pd.to_datetime(company_stocks['day_date'])
company_stocks.sample(5)

We only keep the value of company_value if it is greater than start_day and less than end_day of tweet_df

In [None]:
start_day = min(company_tweet_new['day_date'])
print(f"Start day: {start_day}")

end_day = max(company_tweet_new['day_date'])
print(f"End day: {end_day}")

In [None]:
company_stocks = company_stocks[(company_stocks['day_date'] >= start_day) & (company_stocks['day_date'] <= end_day)]

Then, "company_stocks" data set is sorted based on 'day_date'

In [None]:
company_stocks = company_stocks.sort_values(by="day_date")

Then, followings are the current updated datasets

**company_tweet_new** dataset

In [None]:
company_tweet_new.sample(5)

In [None]:
company_tweet_new.shape

**company_stocks** dataset

In [None]:
company_stocks.sample(5)

In [None]:
company_stocks.shape

### **Text preprocessing for Body tweet in tweet_df**

There are many text preprocessing which is produced data input for next step such likes Machine learning task.

* Tokenizing the string
* Lowercasing
* Removing stop words and punctuation
* Stemming
* Lemmatization

But in this project, here used the lowercase processing. Because here used **Affin** *algorithm* which has text processing itself and use the whole sentences to gain sentiment score.

In [None]:
import nltk
# from nltk.corpus import twitter_samples
# from nltk.corpus import stopwords
# from nltk.stem import PorterStemmer
# from nltk.tokenize import TweetTokenizer

import random
import re
import string

# nltk.download('stopwords')

In [None]:
tweets = company_tweet_new["body"]
tweets

**Remove hyperlinks, Twitter marks and styles**

In [None]:
def remove_special_character(tweet):
#     print(tweet)
    # remove the old style retweet text "RT"
    tweet = re.sub(r'^RT[\s]+', '', tweet)

    # remove hyperlinks
    tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)

    # remove hashtags. only removing the hash # sign from the word
    tweet = re.sub(r'#', '', tweet)

    # remove single numeric terms in the tweet.
    tweet = re.sub(r'[0-9]', '', tweet)

    return tweet

In [None]:
company_tweet_new["tweet"] = company_tweet_new["body"].apply(lambda tweet: remove_special_character(tweet))

**Convert to lowercase**

We will also convert all words to lowercase.

In [None]:
company_tweet_new["tweet"] = company_tweet_new["tweet"].str.lower()

In [None]:
company_tweet_new[["body", "tweet"]].sample(10)

In [None]:
company_tweet_new.to_csv('D:\Final Yr Project\company_tweet_new.csv', index=False)

### **Sentiment Analysis**

In [None]:
# pip install vaderSentiment
#     ERROR: Could not find a version that satisfies the requirement vaderSentiment (from versions: none)
#     ERROR: No matching distribution found for vaderSentiment
# Therefor, I downloaded the company_tweet_new.csv as follows and did the vader sentimant analysis using google colab
#company_tweet_new.to_csv('D:\Final Yr Project\company_tweet_new.csv', index=False)

In [None]:
company_tweet_new_sentimant_updated = pd.read_csv('/kaggle/input/twitter-data-with-sentiments/twitter_data_with_sentiments_updated.csv')

In [None]:
company_tweet_new_sentimant_updated.head()

In [None]:
company_stocks.head(10)

As you can see, we cannot mearge these two data sets based only on ['day_date'] because of there are many tweets had been posted on same day. That is why it was merged based on both **['ticker_symbol', 'day_date']** as follows.

In [None]:
import pandas as pd

# Ensure that both DataFrames have the 'day_date' column as datetime type
company_tweet_new_sentimant_updated['day_date'] = pd.to_datetime(company_tweet_new_sentimant_updated['day_date'])
company_stocks['day_date'] = pd.to_datetime(company_stocks['day_date'])

# Now, you can merge the DataFrames
company_tweets_stocks = pd.merge(company_tweet_new_sentimant_updated,
                                 company_stocks[['ticker_symbol', 'day_date', 'open_value', 'high_value', 'low_value', 'volume', 'close_value']],
                                 on=['ticker_symbol', 'day_date'],
                                 how='left')

# Rename the 'close_value' column to 'stock_price'
company_tweets_stocks.rename(columns={'close_value': 'stock_price'}, inplace=True)

print("Merged DataFrame:")
company_tweets_stock_price = company_tweets_stocks[['ticker_symbol', 'day_date', 'polarity_score','negative','neutral','positive','open_value', 'high_value', 'low_value', 'volume', 'stock_price']]
company_tweets_volume = company_tweets_stocks[['ticker_symbol', 'day_date', 'polarity_score','negative','neutral','positive','open_value', 'high_value', 'low_value', 'stock_price', 'volume']]
company_tweets_stocks['volatility'] = company_tweets_stocks['high_value'] - company_tweets_stocks['low_value']
company_tweets_volatility = company_tweets_stocks[['ticker_symbol', 'day_date', 'polarity_score','negative','neutral','positive','open_value', 'high_value', 'low_value', 'volume', 'stock_price', 'volatility']]

In [None]:
company_tweets_volume.head()

In [None]:
# company_tweet_new_vader['day_date']  = pd.to_datetime(company_tweet_new_vader['day_date'])
# company_stocks['day_date']  = pd.to_datetime(company_stocks['day_date'])

# #company_tweets_stocks = pd.merge(company_tweet_new_vader, company_stocks, on=['ticker_symbol', 'day_date'])
# # Merge the two DataFrames based on 'ticker_symbol' and 'day_date'

# company_tweet_new_sentimant_updated = company_tweet_new_sentimant[['ticker_symbol','day_date','polarity_score','negative','neutral','positive']]

# company_tweets_stocks = pd.merge(company_tweet_new_sentimant_updated, company_stocks[['ticker_symbol', 'day_date', 'open_value', 'high_value', 'low_value', 'volume', 'close_value']], on=['ticker_symbol', 'day_date'], how='left')

# # Rename the 'Close' column to 'stock_price'
# company_tweets_stocks.rename(columns={'close_value': 'stock_price'}, inplace=True)

In [None]:
try:
    company_tweets_stocks.to_csv('D:\Final Yr Project\company_tweets_stocks.csv', index=False)
    print("File successfully saved.")
except Exception as e:
    print("An error occurred:", e)
company_tweets_stocks.to_csv('D:\Final Yr Project\company_tweets_stocks.csv', index=False)

In [None]:
company_stocks.head()

In [None]:
company_tweets_stocks.head(10)

In [None]:
company_tweets_stocks.shape

In [None]:
missing_values_percentage = company_tweets_stocks['stock_price'].isnull().mean() * 100
print("Percentage of missing values in 'stock_price' column:", missing_values_percentage)


### **Stock Price**

### **The correlation between different stocks closing prices**

It seems that the error is caused by duplicate entries in the index column 'day_date'. Before pivoting the DataFrame, we need to ensure that the index does not contain duplicate entries. One way to handle this is by aggregating the stock prices for each day, so that there is only one value per day. We can use the **groupby function** to achieve this. Here's how you can modify the code:

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming company_tweets_stocks is your DataFrame
# Group by 'day_date' and 'ticker_symbol', and aggregate the stock prices

stock_prices_df = company_tweets_stocks.groupby(['day_date', 'ticker_symbol'])['stock_price'].mean().unstack()

# Calculate the correlation matrix
correlation_matrix = stock_prices_df.corr()

# Plot the heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix, annot=True, cmap='summer')
plt.title('Correlation of stock prices between ticker symbols')
plt.show()


## **Forecasting the stock price with Twitter Sentiment Analysis throught MLP, 1D_CNN, LSTM & CNN-LSTM**

#### **Scale the data**

In [None]:
from sklearn.preprocessing import MinMaxScaler

def scalevalue(dataset):
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = scaler.fit_transform(dataset)
    return scaled_data

#### **Train-Test Dataset Generating Function**


In [None]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):

    X, y = list(), list()

    # Removed sequences = sequences.values because sequences is already a NumPy array.
    # sequences = sequences.values

    for i in range(len(sequences)):

        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1

        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break

        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, : -1], sequences[end_ix-1:out_end_ix, -1]

        X.append(seq_x)
        y.append(seq_y)

    return np.array(X), np.array(y)

#### **Training & Testing Datasets Generator**

In [None]:
def training_dataset_generator(dataset) :

    # Number of rows for training
    training_df_len = math.ceil(len(dataset) * .7) # The best portion - 70% for Train & 30% for Test for all 5 companies

    # Split the data into training and testing sets
    train_sequences = dataset[:training_df_len]
    test_sequences = dataset[training_df_len:]

    return train_sequences, test_sequences

### **Making Dataset For Each Company**

In [None]:
apple_df = company_tweets_stocks.loc[company_tweets_stocks['ticker_symbol'] == 'AAPL']
google_df = company_tweets_stocks.loc[company_tweets_stocks['ticker_symbol'].isin(['GOOG', 'GOOGL'])]
microsoft_df = company_tweets_stocks.loc[company_tweets_stocks['ticker_symbol'] == 'MSFT']
amazon_df = company_tweets_stocks.loc[company_tweets_stocks['ticker_symbol'] == 'AMZN']
tesla_df = company_tweets_stocks.loc[company_tweets_stocks['ticker_symbol'] == 'TSLA']

print(f"apple_df_shape: {apple_df.shape}")
print(f"google_df_shape: {google_df.shape}")
print(f"microsoft_df_shapse: {microsoft_df.shape}")
print(f"amazon_df_shape: {amazon_df.shape}")
print(f"tesla_df_shape: {tesla_df.shape}")

### **Apple Company**

#### ***Data Processing with Twitter Data***

In [None]:
dataset = apple_df[['day_date', 'polarity_score','negative','neutral','positive','open_value','high_value','low_value','volume','stock_price']]
apple_df_train,apple_df_test = training_dataset_generator(dataset)
print(apple_df_train.shape, apple_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_train = apple_df_train.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_train.shape

In [None]:
apple_df_train.head()

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_test = apple_df_test.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_df_train['day_date'], apple_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_df_test['day_date'], apple_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### Generating X-train & y_train

In [None]:
apple_df_train = apple_df_train.drop(columns=['day_date'])
dataset = apple_df_train

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple, y_train_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple.shape, y_train_apple.shape)

In [None]:
dataset = apple_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple, y_test_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple.shape, y_test_apple.shape)

#### Flattening the Input for Training

In [None]:
# flatten input
n_input = X_train_apple.shape[1] * X_train_apple.shape[2]

X_train_apple_flatteded = X_train_apple.reshape((X_train_apple.shape[0], n_input))
X_test_apple_flatteded = X_test_apple.reshape((X_test_apple.shape[0], n_input))

print(X_train_apple_flatteded.shape, X_test_apple_flatteded.shape)

#### Data Processing without Twitter Data

In [None]:
dataset = apple_df[['day_date','open_value','high_value','low_value','volume','stock_price']]
apple_stock_df_train,apple_stock_df_test = training_dataset_generator(dataset)
print(apple_stock_df_train.shape, apple_stock_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_train = apple_stock_df_train.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_train.shape

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_test = apple_stock_df_test.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_stock_df_train['day_date'], apple_stock_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_stock_df_test['day_date'], apple_stock_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### **Generating X-train & y_train**

In [None]:
dataset = apple_stock_df_train.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple_stock, y_train_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple_stock.shape, y_train_apple_stock.shape)

In [None]:
dataset = apple_stock_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple_stock, y_test_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple_stock.shape, y_test_apple_stock.shape)

#### **Flattening the Input for Training**

In [None]:
# flatten input
n_input = X_train_apple_stock.shape[1] * X_train_apple_stock.shape[2]

X_train_apple_stock_flatteded = X_train_apple_stock.reshape((X_train_apple_stock.shape[0], n_input))
X_test_apple_stock_flatteded = X_test_apple_stock.reshape((X_test_apple_stock.shape[0], n_input))

print(X_train_apple_stock_flatteded.shape, X_test_apple_stock_flatteded.shape)

#### **Deep Learning Model Feeding**

##### **MLP Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple = model
MLP_Apple.add(Dense(50, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(n_steps_out))
MLP_Apple.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Apple.summary())

# Fit the model
MLP_Apple.fit(X_train_apple_flatteded, y_train_apple,batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = MLP_Apple.predict(X_test_apple_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **MLP Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple_Stock = model
MLP_Apple_Stock.add(Dense(50, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.2))
MLP_Apple_Stock.add(Dense(50, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.2))
MLP_Apple_Stock.add(Dense(n_steps_out))
MLP_Apple_Stock.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Apple_Stock.summary())

# Fit the model
MLP_Apple_Stock.fit(X_train_apple_stock_flatteded, y_train_apple_stock,batch_size=16, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_apple = MLP_Apple.predict(X_test_apple_flatteded)
predicted_stock_prices_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Aapple Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Aapple Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


##### **1D-CNN Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple = Sequential()
One_D_CNN_Apple.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,8)))
One_D_CNN_Apple.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple.add(Flatten())
One_D_CNN_Apple.add(Dense(100, activation='relu'))
One_D_CNN_Apple.add(Dropout(0.1))
One_D_CNN_Apple.add(Dense(n_steps_out))
One_D_CNN_Apple.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Apple.summary())

# Fit the model
One_D_CNN_Apple.fit(X_train_apple, y_train_apple, batch_size=64, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = One_D_CNN_Apple.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **1D-CNN Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple_Stock = Sequential()
One_D_CNN_Apple_Stock.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,4)))
One_D_CNN_Apple_Stock.add(MaxPooling1D(pool_size=3))
One_D_CNN_Apple_Stock.add(Flatten())
One_D_CNN_Apple_Stock.add(Dense(100, activation='relu'))
One_D_CNN_Apple_Stock.add(Dropout(0.1))
One_D_CNN_Apple_Stock.add(Dense(n_steps_out))
One_D_CNN_Apple_Stock.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(One_D_CNN_Apple_Stock.summary())

# Fit the model
One_D_CNN_Apple_Stock.fit(X_train_apple_stock, y_train_apple_stock, batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = One_D_CNN_Apple.predict(X_test_apple)
predicted_stock_prices_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Apple Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Apple Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the LSTM model
LSTMM_A = Sequential()
LSTMM_A.add(LSTM(50, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(Dropout(0.3))
LSTMM_A.add(Dense(n_steps_out))
LSTMM_A.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_A.summary())

# Fit the model
history = LSTMM_A.fit(X_train_apple, y_train_apple, batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = LSTMM_A.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **LSTM Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the model
LSTMM_AM = Sequential()
LSTMM_AM.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps_in, 4)))
LSTMM_AM.add(Dropout(0.1))
LSTMM_AM.add(Dense(n_steps_out))
LSTMM_AM.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_AM.summary())

# Fit the model
LSTMM_AM.fit(X_train_apple_stock, y_train_apple_stock,batch_size=16, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = LSTMM_A.predict(X_test_apple)
predicted_stock_prices_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Apple Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Apple Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **CNN-LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 5
n_steps = X_train_apple.shape[1] // n_seq

# Define the model
CNN_LSTM = Sequential()
CNN_LSTM.add(TimeDistributed(Conv1D(filters=32, kernel_size=1, activation='relu'), input_shape=(n_seq, n_steps, 8)))
CNN_LSTM.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM.add(TimeDistributed(Flatten()))
CNN_LSTM.add(LSTM(150, activation='relu'))
CNN_LSTM.add(Dropout(0.1))
CNN_LSTM.add(Dense(20))  # Match y_train shape

# Compile the model
CNN_LSTM.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM.summary())

# Reshape X_train to have an additional dimension
X_train_reshaped = np.reshape(X_train_apple, (X_train_apple.shape[0], n_seq, n_steps, X_train_apple.shape[2]))

# Fit the model
history = CNN_LSTM.fit(X_train_reshaped, y_train_apple, batch_size=32, epochs=100, verbose=0)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict the values for the test set
y_pred_apple = CNN_LSTM.predict(X_test_reshaped)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **CNN-LSTM Model Feeding without Twitter Data**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_apple_stock.shape[1] // n_seq

# Define the model
CNN_LSTM_stock = Sequential()
CNN_LSTM_stock.add(TimeDistributed(Conv1D(filters=32, kernel_size=2, activation='relu'), input_shape=(n_seq, n_steps, 4)))
CNN_LSTM_stock.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM_stock.add(TimeDistributed(Flatten()))
CNN_LSTM_stock.add(LSTM(100, activation='relu'))
CNN_LSTM_stock.add(Dropout(0.1))
CNN_LSTM_stock.add(Dense(20))  # Match y_train_apple_stock shape

# Compile the model
CNN_LSTM_stock.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM_stock.summary())

# Reshape X_train to have an additional dimension
X_train_stock_reshaped = np.reshape(X_train_apple_stock, (X_train_apple_stock.shape[0], n_seq, n_steps, X_train_apple_stock.shape[2]))

# Fit the model
history = CNN_LSTM_stock.fit(X_train_stock_reshaped, y_train_apple_stock, batch_size=32, epochs=100, verbose=0)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped_stock = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict the values for the test set
y_pred_apple_stock = CNN_LSTM_stock.predict(X_test_reshaped_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Parameters
n_seq = 6
n_steps = X_test_apple.shape[1] // n_seq

# Reshape X_test to match the required input shape
X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = CNN_LSTM.predict(X_test_reshaped)

# Reshape X_test_apple_stock to match the required input shape
X_test_stock_reshaped = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict stock prices using the models for Apple stock without Twitter
predicted_stock_prices_apple_stock = CNN_LSTM_stock.predict(X_test_stock_reshaped)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(actual_stock_prices.reshape(-1, 1))

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Apple Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='darkblue', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Apple Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


### **Google Company**

#### ***Data Processing with Twitter Data***

In [None]:
dataset = google_df[['day_date', 'polarity_score','negative','neutral','positive','open_value','high_value','low_value','volume','stock_price']]
google_df_train,google_df_test = training_dataset_generator(dataset)
print(google_df_train.shape, google_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
google_df_train = google_df_train.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_train.shape

In [None]:
google_df_train.head()

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
google_df_test = google_df_test.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

google_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(google_df_train['day_date'], google_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Google - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(google_df_test['day_date'], google_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Google - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### Generating X-train & y_train

In [None]:
google_df_train = google_df_train.drop(columns=['day_date'])
dataset = google_df_train

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_google, y_train_google = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_google.shape, y_train_google.shape)

In [None]:
dataset = google_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_google, y_test_google = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_google.shape, y_test_google.shape)

#### Flattening the Input for Training

In [None]:
# flatten input
n_input = X_train_google.shape[1] * X_train_google.shape[2]

X_train_google_flatteded = X_train_google.reshape((X_train_google.shape[0], n_input))
X_test_google_flatteded = X_test_google.reshape((X_test_google.shape[0], n_input))

print(X_train_google_flatteded.shape, X_test_google_flatteded.shape)

#### Data Processing without Twitter Data

In [None]:
dataset = google_df[['day_date','open_value','high_value','low_value','volume','stock_price']]
google_stock_df_train,google_stock_df_test = training_dataset_generator(dataset)
print(google_stock_df_train.shape, google_stock_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
google_stock_df_train = google_stock_df_train.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

google_stock_df_train.shape

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
google_stock_df_test = google_stock_df_test.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

google_stock_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(google_stock_df_train['day_date'], google_stock_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Google - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(google_stock_df_test['day_date'], google_stock_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Google - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### **Generating X-train & y_train**

In [None]:
dataset = google_stock_df_train.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_google_stock, y_train_google_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_google_stock.shape, y_train_google_stock.shape)

In [None]:
dataset = google_stock_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_google_stock, y_test_google_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_google_stock.shape, y_test_google_stock.shape)

#### **Flattening the Input for Training**

In [None]:
# flatten input
n_input = X_train_google_stock.shape[1] * X_train_google_stock.shape[2]

X_train_google_stock_flatteded = X_train_google_stock.reshape((X_train_google_stock.shape[0], n_input))
X_test_google_stock_flatteded = X_test_google_stock.reshape((X_test_google_stock.shape[0], n_input))

print(X_train_google_stock_flatteded.shape, X_test_google_stock_flatteded.shape)

#### **Deep Learning Model Feeding**

##### **MLP Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Google = model
MLP_Google.add(Dense(100, activation='relu', input_dim=n_input))
MLP_Google.add(Dropout(0.3))
MLP_Google.add(Dense(n_steps_out))
MLP_Google.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Google.summary())

# Fit the model
MLP_Google.fit(X_train_google_flatteded, y_train_google,batch_size=16, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google = MLP_Google.predict(X_test_google_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_google, y_pred_google)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google, y_pred_google)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google, y_pred_google)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **MLP Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Google_Stock = model
MLP_Google_Stock.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Google.add(Dropout(0.1))
MLP_Google_Stock.add(Dense(n_steps_out))
MLP_Google_Stock.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Google_Stock.summary())

# Fit the model
MLP_Google_Stock.fit(X_train_google_stock_flatteded, y_train_google_stock,batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google_stock = MLP_Google_Stock.predict(X_test_google_stock_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_google = MLP_Google.predict(X_test_google_flatteded)
predicted_stock_prices_google_stock = MLP_Google_Stock.predict(X_test_google_stock_flatteded)

# Extract the actual stock prices and the dates
actual_stock_prices = google_df_test['stock_price'].values
dates = google_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_google_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google)
last_predicted_values_google = predicted_stock_prices_google_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google)]
    dates = dates[:len(last_predicted_values_google)]
elif len(last_predicted_values_google) > len(actual_stock_prices):
    last_predicted_values_google = last_predicted_values_google[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_google_stock
predicted_stock_prices_google_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google_stock)
last_predicted_values_google_stock = predicted_stock_prices_google_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google_stock) < len(actual_stock_prices):
    last_predicted_values_google_stock = last_predicted_values_google_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_google_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google_stock)]
    dates = dates[:len(last_predicted_values_google_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_google
ax1.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_google, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Google Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_google_stock
ax2.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_google_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Google Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


##### **1D-CNN Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Google = Sequential()
One_D_CNN_Google.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,8)))
One_D_CNN_Google.add(MaxPooling1D(pool_size=2))
One_D_CNN_Google.add(Flatten())
One_D_CNN_Google.add(Dense(50, activation='relu'))
One_D_CNN_Google.add(Dropout(0.2))
One_D_CNN_Google.add(Dense(n_steps_out))
One_D_CNN_Google.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Google.summary())

# Fit the model
One_D_CNN_Google.fit(X_train_google, y_train_google, batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google = One_D_CNN_Google.predict(X_test_google)

# Calculate MSE
mse = mean_squared_error(y_test_google, y_pred_google)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google, y_pred_google)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google, y_pred_google)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **1D-CNN Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Google_Stock = Sequential()
One_D_CNN_Google_Stock.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,4)))
One_D_CNN_Google_Stock.add(MaxPooling1D(pool_size=2))
One_D_CNN_Google_Stock.add(Flatten())
One_D_CNN_Google_Stock.add(Dense(100, activation='relu'))
One_D_CNN_Google_Stock.add(Dropout(0.1))
One_D_CNN_Google_Stock.add(Dense(n_steps_out))
One_D_CNN_Google_Stock.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(One_D_CNN_Google_Stock.summary())

# Fit the model
One_D_CNN_Google_Stock.fit(X_train_google_stock, y_train_google_stock, batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google_stock = One_D_CNN_Google_Stock.predict(X_test_google_stock)

# Calculate MSE
mse = mean_squared_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_google = One_D_CNN_Google.predict(X_test_google)
predicted_stock_prices_google_stock = One_D_CNN_Google_Stock.predict(X_test_google_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = google_df_test['stock_price'].values
dates = google_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_google_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google)
last_predicted_values_google = predicted_stock_prices_google_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google)]
    dates = dates[:len(last_predicted_values_google)]
elif len(last_predicted_values_google) > len(actual_stock_prices):
    last_predicted_values_google = last_predicted_values_google[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_google_stock
predicted_stock_prices_google_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google_stock)
last_predicted_values_google_stock = predicted_stock_prices_google_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google_stock) < len(actual_stock_prices):
    last_predicted_values_google_stock = last_predicted_values_google_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_google_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google_stock)]
    dates = dates[:len(last_predicted_values_google_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_google
ax1.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_google, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Google Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_google_stock
ax2.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_google_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Google Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the LSTM model
LSTMM_A = Sequential()
LSTMM_A.add(LSTM(150, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(Dropout(0.2))
LSTMM_A.add(Dense(n_steps_out))
LSTMM_A.compile(optimizer='adam', loss='mse')

# Print model summary
print(LSTMM_A.summary())

# Fit the model
history = LSTMM_A.fit(X_train_google, y_train_google, batch_size=16, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google = LSTMM_A.predict(X_test_google)

# Calculate MSE
mse = mean_squared_error(y_test_google, y_pred_google)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google, y_pred_google)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google, y_pred_google)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **LSTM Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the model
LSTMM_AM = Sequential()
LSTMM_AM.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps_in, 4)))
LSTMM_AM.add(Dropout(0.1))
LSTMM_AM.add(Dense(n_steps_out))
LSTMM_AM.compile(optimizer='adam', loss='mse')

# Print model summary
print(LSTMM_AM.summary())

# Fit the model
LSTMM_AM.fit(X_train_google_stock, y_train_google_stock,batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_google_stock = LSTMM_AM.predict(X_test_google_stock)

# Calculate MSE
mse = mean_squared_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_google = LSTMM_A.predict(X_test_google)
predicted_stock_prices_google_stock = LSTMM_AM.predict(X_test_google_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = google_df_test['stock_price'].values
dates = google_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_google_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google)
last_predicted_values_google = predicted_stock_prices_google_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google)]
    dates = dates[:len(last_predicted_values_google)]
elif len(last_predicted_values_google) > len(actual_stock_prices):
    last_predicted_values_google = last_predicted_values_google[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_google_stock
predicted_stock_prices_google_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google_stock)
last_predicted_values_google_stock = predicted_stock_prices_google_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google_stock) < len(actual_stock_prices):
    last_predicted_values_google_stock = last_predicted_values_google_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_google_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google_stock)]
    dates = dates[:len(last_predicted_values_google_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_google
ax1.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_google, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Google Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_google_stock
ax2.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_google_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Google Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **CNN-LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_google.shape[1] // n_seq

# Define the model
CNN_LSTM = Sequential()
CNN_LSTM.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(n_seq, n_steps, 8)))
CNN_LSTM.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM.add(TimeDistributed(Flatten()))
CNN_LSTM.add(LSTM(150, activation='relu'))
CNN_LSTM.add(Dropout(0.1))
CNN_LSTM.add(Dense(20))  # Match y_train shape

# Compile the model
CNN_LSTM.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM.summary())

# Reshape X_train to have an additional dimension
X_train_reshaped = np.reshape(X_train_google, (X_train_google.shape[0], n_seq, n_steps, X_train_google.shape[2]))

# Fit the model
history = CNN_LSTM.fit(X_train_reshaped, y_train_google, batch_size=16, epochs=100, verbose=0)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped = np.reshape(X_test_google, (X_test_google.shape[0], n_seq, n_steps, X_test_google.shape[2]))

# Predict the values for the test set
y_pred_google = CNN_LSTM.predict(X_test_reshaped)

# Calculate MSE
mse = mean_squared_error(y_test_google, y_pred_google)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google, y_pred_google)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google, y_pred_google)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **CNN-LSTM Model Feeding without Twitter Data**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_google_stock.shape[1] // n_seq

# Define the model
CNN_LSTM_stock = Sequential()
CNN_LSTM_stock.add(TimeDistributed(Conv1D(filters=32, kernel_size=1, activation='relu'), input_shape=(n_seq, n_steps, 4)))
CNN_LSTM_stock.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM_stock.add(TimeDistributed(Flatten()))
CNN_LSTM_stock.add(LSTM(150, activation='relu'))
CNN_LSTM_stock.add(Dropout(0.1))
CNN_LSTM_stock.add(Dense(20))  # Match y_train_google_stock shape

# Compile the model
CNN_LSTM_stock.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM_stock.summary())

# Reshape X_train to have an additional dimension
X_train_stock_reshaped = np.reshape(X_train_google_stock, (X_train_google_stock.shape[0], n_seq, n_steps, X_train_google_stock.shape[2]))

# Fit the model
history = CNN_LSTM_stock.fit(X_train_stock_reshaped, y_train_google_stock, batch_size=32, epochs=200, verbose=0)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped_stock = np.reshape(X_test_google_stock, (X_test_google_stock.shape[0], n_seq, n_steps, X_test_google_stock.shape[2]))

# Predict the values for the test set
y_pred_google_stock = CNN_LSTM_stock.predict(X_test_reshaped_stock)

# Calculate MSE
mse = mean_squared_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_google_stock, y_pred_google_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Parameters
n_seq = 6
n_steps = X_test_google.shape[1] // n_seq

# Reshape X_test to match the required input shape
X_test_reshaped = np.reshape(X_test_google, (X_test_google.shape[0], n_seq, n_steps, X_test_google.shape[2]))

# Predict stock prices using the models for Google
predicted_stock_prices_google = CNN_LSTM.predict(X_test_reshaped)

# Reshape X_test_google_stock to match the required input shape
X_test_stock_reshaped = np.reshape(X_test_google_stock, (X_test_google_stock.shape[0], n_seq, n_steps, X_test_google_stock.shape[2]))

# Predict stock prices using the models for Google stock without Twitter
predicted_stock_prices_google_stock = CNN_LSTM_stock.predict(X_test_stock_reshaped)

# Extract the actual stock prices and the dates
actual_stock_prices = google_df_test['stock_price'].values
dates = google_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(actual_stock_prices.reshape(-1, 1))

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_google_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google)
last_predicted_values_google = predicted_stock_prices_google_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google)]
    dates = dates[:len(last_predicted_values_google)]
elif len(last_predicted_values_google) > len(actual_stock_prices):
    last_predicted_values_google = last_predicted_values_google[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_google_stock
predicted_stock_prices_google_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_google_stock)
last_predicted_values_google_stock = predicted_stock_prices_google_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_google_stock) < len(actual_stock_prices):
    last_predicted_values_google_stock = last_predicted_values_google_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_google_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_google_stock)]
    dates = dates[:len(last_predicted_values_google_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})

# Plot for predicted_stock_prices_google
ax1.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_google, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Google Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_google_stock
ax2.plot(dates, actual_stock_prices, color='darkgreen', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_google_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Google Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


### **Microsoft Company**

#### ***Data Processing with Twitter Data***

In [None]:
dataset = microsoft_df[['day_date', 'polarity_score','negative','neutral','positive','open_value','high_value','low_value','volume','stock_price']]
apple_df_train,apple_df_test = training_dataset_generator(dataset)
print(apple_df_train.shape, apple_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_train = apple_df_train.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_train.shape

In [None]:
apple_df_train.head()

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_test = apple_df_test.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_df_train['day_date'], apple_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_df_test['day_date'], apple_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### Generating X-train & y_train

In [None]:
apple_df_train = apple_df_train.drop(columns=['day_date'])
dataset = apple_df_train

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple, y_train_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple.shape, y_train_apple.shape)

In [None]:
dataset = apple_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple, y_test_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple.shape, y_test_apple.shape)

#### Flattening the Input for Training

In [None]:
# flatten input
n_input = X_train_apple.shape[1] * X_train_apple.shape[2]

X_train_apple_flatteded = X_train_apple.reshape((X_train_apple.shape[0], n_input))
X_test_apple_flatteded = X_test_apple.reshape((X_test_apple.shape[0], n_input))

print(X_train_apple_flatteded.shape, X_test_apple_flatteded.shape)

#### Data Processing without Twitter Data

In [None]:
dataset = microsoft_df[['day_date','open_value','high_value','low_value','volume','stock_price']]
apple_stock_df_train,apple_stock_df_test = training_dataset_generator(dataset)
print(apple_stock_df_train.shape, apple_stock_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_train = apple_stock_df_train.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_train.shape

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_test = apple_stock_df_test.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_stock_df_train['day_date'], apple_stock_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_stock_df_test['day_date'], apple_stock_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### **Generating X-train & y_train**

In [None]:
dataset = apple_stock_df_train.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple_stock, y_train_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple_stock.shape, y_train_apple_stock.shape)

In [None]:
dataset = apple_stock_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple_stock, y_test_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple_stock.shape, y_test_apple_stock.shape)

#### **Flattening the Input for Training**

In [None]:
# flatten input
n_input = X_train_apple_stock.shape[1] * X_train_apple_stock.shape[2]

X_train_apple_stock_flatteded = X_train_apple_stock.reshape((X_train_apple_stock.shape[0], n_input))
X_test_apple_stock_flatteded = X_test_apple_stock.reshape((X_test_apple_stock.shape[0], n_input))

print(X_train_apple_stock_flatteded.shape, X_test_apple_stock_flatteded.shape)

#### **Deep Learning Model Feeding**

##### **MLP Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple = model
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(n_steps_out))
MLP_Apple.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Apple.summary())

# Fit the model
MLP_Apple.fit(X_train_apple_flatteded, y_train_apple,batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = MLP_Apple.predict(X_test_apple_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **MLP Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple_Stock = model
MLP_Apple_Stock.add(Dense(100, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple_Stock.add(Dense(n_steps_out))
MLP_Apple_Stock.compile(optimizer='adam', loss='mse')

# Print model summary
print(MLP_Apple_Stock.summary())

# Fit the model
MLP_Apple_Stock.fit(X_train_apple_stock_flatteded, y_train_apple_stock,batch_size=16, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_apple = MLP_Apple.predict(X_test_apple_flatteded)
predicted_stock_prices_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Microsoft Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Microsoft Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


##### **1D-CNN Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple = Sequential()
One_D_CNN_Apple.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,8)))
One_D_CNN_Apple.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple.add(Flatten())
One_D_CNN_Apple.add(Dense(50, activation='relu'))
One_D_CNN_Apple.add(Dropout(0.2))
One_D_CNN_Apple.add(Dense(n_steps_out))
One_D_CNN_Apple.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Apple.summary())

# Fit the model
One_D_CNN_Apple.fit(X_train_apple, y_train_apple, batch_size=64, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = One_D_CNN_Apple.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **1D-CNN Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple_Stock = Sequential()
One_D_CNN_Apple_Stock.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps_in,4)))
One_D_CNN_Apple_Stock.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple_Stock.add(Flatten())
One_D_CNN_Apple_Stock.add(Dense(50, activation='relu'))
One_D_CNN_Apple_Stock.add(Dropout(0.2))
One_D_CNN_Apple_Stock.add(Dense(n_steps_out))
One_D_CNN_Apple_Stock.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Apple_Stock.summary())

# Fit the model
One_D_CNN_Apple_Stock.fit(X_train_apple_stock, y_train_apple_stock, batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = One_D_CNN_Apple.predict(X_test_apple)
predicted_stock_prices_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Microsoft Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Microsoft Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the LSTM model
LSTMM_A = Sequential()
LSTMM_A.add(LSTM(100, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(Dropout(0.1))
LSTMM_A.add(Dense(n_steps_out))
LSTMM_A.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_A.summary())

# Fit the model
history = LSTMM_A.fit(X_train_apple, y_train_apple, batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = LSTMM_A.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **LSTM Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the model
LSTMM_AM = Sequential()
LSTMM_AM.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps_in, 4)))
LSTMM_AM.add(Dropout(0.1))
LSTMM_AM.add(Dense(n_steps_out))
LSTMM_AM.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_AM.summary())

# Fit the model
LSTMM_AM.fit(X_train_apple_stock, y_train_apple_stock,batch_size=16, epochs=200, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = LSTMM_A.predict(X_test_apple)
predicted_stock_prices_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Microsoft Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Microsoft Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **CNN-LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 5
n_steps = X_train_apple.shape[1] // n_seq

# Define the model
CNN_LSTM = Sequential()
CNN_LSTM.add(TimeDistributed(Conv1D(filters=32, kernel_size=1, activation='relu'), input_shape=(n_seq, n_steps, 8)))
CNN_LSTM.add(TimeDistributed(MaxPooling1D(pool_size=3)))
CNN_LSTM.add(TimeDistributed(Flatten()))
CNN_LSTM.add(LSTM(150, activation='relu'))
CNN_LSTM.add(Dropout(0.3))
CNN_LSTM.add(Dense(20))  # Match y_train shape

# Compile the model
CNN_LSTM.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM.summary())

# Reshape X_train to have an additional dimension
X_train_reshaped = np.reshape(X_train_apple, (X_train_apple.shape[0], n_seq, n_steps, X_train_apple.shape[2]))

# Fit the model
history = CNN_LSTM.fit(X_train_reshaped, y_train_apple, batch_size=32, epochs=100, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict the values for the test set
y_pred_apple = CNN_LSTM.predict(X_test_reshaped)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **CNN-LSTM Model Feeding without Twitter Data**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_apple_stock.shape[1] // n_seq

# Define the model
CNN_LSTM_stock = Sequential()
CNN_LSTM_stock.add(TimeDistributed(Conv1D(filters=32, kernel_size=2, activation='relu'), input_shape=(n_seq, n_steps, 4)))
CNN_LSTM_stock.add(TimeDistributed(MaxPooling1D(pool_size=3)))
CNN_LSTM_stock.add(TimeDistributed(Flatten()))
CNN_LSTM_stock.add(LSTM(150, activation='relu'))
CNN_LSTM_stock.add(Dropout(0.1))
CNN_LSTM_stock.add(Dense(20))  # Match y_train_apple_stock shape

# Compile the model
CNN_LSTM_stock.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM_stock.summary())

# Reshape X_train to have an additional dimension
X_train_stock_reshaped = np.reshape(X_train_apple_stock, (X_train_apple_stock.shape[0], n_seq, n_steps, X_train_apple_stock.shape[2]))

# Fit the model
history = CNN_LSTM_stock.fit(X_train_stock_reshaped, y_train_apple_stock, batch_size=16, epochs=100, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped_stock = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict the values for the test set
y_pred_apple_stock = CNN_LSTM_stock.predict(X_test_reshaped_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Parameters
n_seq = 6
n_steps = X_test_apple.shape[1] // n_seq

# Reshape X_test to match the required input shape
X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = CNN_LSTM.predict(X_test_reshaped)

# Reshape X_test_apple_stock to match the required input shape
X_test_stock_reshaped = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict stock prices using the models for Apple stock without Twitter
predicted_stock_prices_apple_stock = CNN_LSTM_stock.predict(X_test_stock_reshaped)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(actual_stock_prices.reshape(-1, 1))

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Microsoft Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='indigo', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Microsoft Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


### **Amazon Company**

#### ***Data Processing with Twitter Data***

In [None]:
dataset = amazon_df[['day_date', 'polarity_score','negative','neutral','positive','open_value','high_value','low_value','volume','stock_price']]
apple_df_train,apple_df_test = training_dataset_generator(dataset)
print(apple_df_train.shape, apple_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_train = apple_df_train.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_train.shape

In [None]:
apple_df_train.head()

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_test = apple_df_test.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_df_train['day_date'], apple_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_df_test['day_date'], apple_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### Generating X-train & y_train

In [None]:
apple_df_train = apple_df_train.drop(columns=['day_date'])
dataset = apple_df_train

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple, y_train_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple.shape, y_train_apple.shape)

In [None]:
dataset = apple_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple, y_test_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple.shape, y_test_apple.shape)

#### Flattening the Input for Training

In [None]:
# flatten input
n_input = X_train_apple.shape[1] * X_train_apple.shape[2]

X_train_apple_flatteded = X_train_apple.reshape((X_train_apple.shape[0], n_input))
X_test_apple_flatteded = X_test_apple.reshape((X_test_apple.shape[0], n_input))

print(X_train_apple_flatteded.shape, X_test_apple_flatteded.shape)

#### Data Processing without Twitter Data

In [None]:
dataset = amazon_df[['day_date','open_value','high_value','low_value','volume','stock_price']]
apple_stock_df_train,apple_stock_df_test = training_dataset_generator(dataset)
print(apple_stock_df_train.shape, apple_stock_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_train = apple_stock_df_train.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_train.shape

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_test = apple_stock_df_test.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_stock_df_train['day_date'], apple_stock_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_stock_df_test['day_date'], apple_stock_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### **Generating X-train & y_train**

In [None]:
dataset = apple_stock_df_train.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple_stock, y_train_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple_stock.shape, y_train_apple_stock.shape)

In [None]:
dataset = apple_stock_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple_stock, y_test_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple_stock.shape, y_test_apple_stock.shape)

#### **Flattening the Input for Training**

In [None]:
# flatten input
n_input = X_train_apple_stock.shape[1] * X_train_apple_stock.shape[2]

X_train_apple_stock_flatteded = X_train_apple_stock.reshape((X_train_apple_stock.shape[0], n_input))
X_test_apple_stock_flatteded = X_test_apple_stock.reshape((X_test_apple_stock.shape[0], n_input))

print(X_train_apple_stock_flatteded.shape, X_test_apple_stock_flatteded.shape)

#### **Deep Learning Model Feeding**

##### **MLP Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple = model
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple.add(Dense(n_steps_out))
MLP_Apple.compile(optimizer='adam', loss='mse')

# Print model summary
print(MLP_Apple.summary())

# Fit the model
MLP_Apple.fit(X_train_apple_flatteded, y_train_apple,batch_size=32, epochs=200, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = MLP_Apple.predict(X_test_apple_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **MLP Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple_Stock = model
MLP_Apple_Stock.add(Dense(100, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple_Stock.add(Dense(n_steps_out))
MLP_Apple_Stock.compile(optimizer='adam', loss='mse')

# Print model summary
print(MLP_Apple_Stock.summary())

# Fit the model
MLP_Apple_Stock.fit(X_train_apple_stock_flatteded, y_train_apple_stock,batch_size=16, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_apple = MLP_Apple.predict(X_test_apple_flatteded)
predicted_stock_prices_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Amazon Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Amazon Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


##### **1D-CNN Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple = Sequential()
One_D_CNN_Apple.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,8)))
One_D_CNN_Apple.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple.add(Flatten())
One_D_CNN_Apple.add(Dense(50, activation='relu'))
One_D_CNN_Apple.add(Dropout(0.1))
One_D_CNN_Apple.add(Dense(n_steps_out))
One_D_CNN_Apple.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Apple.summary())

# Fit the model
One_D_CNN_Apple.fit(X_train_apple, y_train_apple, batch_size=64, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = One_D_CNN_Apple.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **1D-CNN Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple_Stock = Sequential()
One_D_CNN_Apple_Stock.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,4)))
One_D_CNN_Apple_Stock.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple_Stock.add(Flatten())
One_D_CNN_Apple_Stock.add(Dense(100, activation='relu'))
One_D_CNN_Apple_Stock.add(Dropout(0.2))
One_D_CNN_Apple_Stock.add(Dense(n_steps_out))
One_D_CNN_Apple_Stock.compile(optimizer='adam', loss='mse')

# Print model summary
print(One_D_CNN_Apple_Stock.summary())

# Fit the model
One_D_CNN_Apple_Stock.fit(X_train_apple_stock, y_train_apple_stock, batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = One_D_CNN_Apple.predict(X_test_apple)
predicted_stock_prices_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Amazon Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Amazon Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the LSTM model
LSTMM_A = Sequential()
LSTMM_A.add(LSTM(50, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(Dropout(0.1))
LSTMM_A.add(LSTM(50, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(Dropout(0.1))
LSTMM_A.add(Dense(n_steps_out))
LSTMM_A.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_A.summary())

# Fit the model
history = LSTMM_A.fit(X_train_apple, y_train_apple, batch_size=16, epochs=100, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = LSTMM_A.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **LSTM Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the model
LSTMM_AM = Sequential()
LSTMM_AM.add(LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, 4)))
LSTMM_AM.add(Dropout(0.1))
LSTMM_AM.add(Dense(n_steps_out))
LSTMM_AM.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(LSTMM_AM.summary())

# Fit the model
LSTMM_AM.fit(X_train_apple_stock, y_train_apple_stock,batch_size=16, epochs=100, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = LSTMM_A.predict(X_test_apple)
predicted_stock_prices_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Amazon Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Amazon Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **CNN-LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_apple.shape[1] // n_seq

# Define the model
CNN_LSTM = Sequential()
CNN_LSTM.add(TimeDistributed(Conv1D(filters=32, kernel_size=1, activation='relu'), input_shape=(n_seq, n_steps, 8)))
CNN_LSTM.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM.add(TimeDistributed(Flatten()))
CNN_LSTM.add(LSTM(100, activation='relu'))
CNN_LSTM.add(Dropout(0.1))
CNN_LSTM.add(Dense(20))  # Match y_train shape

# Compile the model
CNN_LSTM.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM.summary())

# Reshape X_train to have an additional dimension
X_train_reshaped = np.reshape(X_train_apple, (X_train_apple.shape[0], n_seq, n_steps, X_train_apple.shape[2]))

# Fit the model
history = CNN_LSTM.fit(X_train_reshaped, y_train_apple, batch_size=32, epochs=100, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict the values for the test set
y_pred_apple = CNN_LSTM.predict(X_test_reshaped)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **CNN-LSTM Model Feeding without Twitter Data**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 5
n_steps = X_train_apple_stock.shape[1] // n_seq

# Define the model
CNN_LSTM_stock = Sequential()
CNN_LSTM_stock.add(TimeDistributed(Conv1D(filters=32, kernel_size=2, activation='relu'), input_shape=(n_seq, n_steps, 4)))
CNN_LSTM_stock.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM_stock.add(TimeDistributed(Flatten()))
CNN_LSTM_stock.add(LSTM(150, activation='relu'))
CNN_LSTM_stock.add(Dropout(0.1))
CNN_LSTM_stock.add(Dense(20))  # Match y_train_apple_stock shape

# Compile the model
CNN_LSTM_stock.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM_stock.summary())

# Reshape X_train to have an additional dimension
X_train_stock_reshaped = np.reshape(X_train_apple_stock, (X_train_apple_stock.shape[0], n_seq, n_steps, X_train_apple_stock.shape[2]))

# Fit the model
history = CNN_LSTM_stock.fit(X_train_stock_reshaped, y_train_apple_stock, batch_size=16, epochs=100, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped_stock = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict the values for the test set
y_pred_apple_stock = CNN_LSTM_stock.predict(X_test_reshaped_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Parameters
n_seq = 6
n_steps = X_test_apple.shape[1] // n_seq

# Reshape X_test to match the required input shape
X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = CNN_LSTM.predict(X_test_reshaped)

# Reshape X_test_apple_stock to match the required input shape
X_test_stock_reshaped = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict stock prices using the models for Apple stock without Twitter
predicted_stock_prices_apple_stock = CNN_LSTM_stock.predict(X_test_stock_reshaped)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(actual_stock_prices.reshape(-1, 1))

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Amazon Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='deeppink', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Amazon Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


### **Tesla Company**

#### ***Data Processing with Twitter Data***

In [None]:
dataset = tesla_df[['day_date', 'polarity_score','negative','neutral','positive','open_value','high_value','low_value','volume','stock_price']]
apple_df_train,apple_df_test = training_dataset_generator(dataset)
print(apple_df_train.shape, apple_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_train = apple_df_train.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_train.shape

In [None]:
apple_df_train.head()

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_df_test = apple_df_test.groupby(['day_date'], as_index=False).agg({
    'polarity_score': 'mean',
    'negative': 'mean',
    'neutral': 'mean',
    'positive': 'mean',
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_df_train['day_date'], apple_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_df_test['day_date'], apple_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### Generating X-train & y_train

In [None]:
apple_df_train = apple_df_train.drop(columns=['day_date'])
dataset = apple_df_train

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple, y_train_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple.shape, y_train_apple.shape)

In [None]:
dataset = apple_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple, y_test_apple = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple.shape, y_test_apple.shape)

#### Flattening the Input for Training

In [None]:
# flatten input
n_input = X_train_apple.shape[1] * X_train_apple.shape[2]

X_train_apple_flatteded = X_train_apple.reshape((X_train_apple.shape[0], n_input))
X_test_apple_flatteded = X_test_apple.reshape((X_test_apple.shape[0], n_input))

print(X_train_apple_flatteded.shape, X_test_apple_flatteded.shape)

#### Data Processing without Twitter Data

In [None]:
dataset = tesla_df[['day_date','open_value','high_value','low_value','volume','stock_price']]
apple_stock_df_train,apple_stock_df_test = training_dataset_generator(dataset)
print(apple_stock_df_train.shape, apple_stock_df_test.shape)

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_train = apple_stock_df_train.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_train.shape

In [None]:
# Group by 'ticker_symbol' and 'day_date' and calculate the mean of 'vander_score'
apple_stock_df_test = apple_stock_df_test.groupby(['day_date'], as_index=False).agg({
    'open_value': 'first',
    'high_value': 'first',
    'low_value': 'first',
    'volume': 'first',
    'stock_price': 'first'
})

apple_stock_df_test.shape

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [1, 1]})

# Plot the training data
ax1.plot(apple_stock_df_train['day_date'], apple_stock_df_train['stock_price'], color='c')
ax1.set_ylabel('Stock Price')
ax1.set_xlabel('Day Date')
ax1.set_title('Closing Price of Apple - Training Data')
ax1.tick_params(axis='x', rotation=60)

# Plot the testing data
ax2.plot(apple_stock_df_test['day_date'], apple_stock_df_test['stock_price'], color='m')
ax2.set_ylabel('Stock Price')
ax2.set_xlabel('Day Date')
ax2.set_title('Closing Price of Apple - Testing Data')
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


#### **Generating X-train & y_train**

In [None]:
dataset = apple_stock_df_train.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for training sets
X_train_apple_stock, y_train_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_train_apple_stock.shape, y_train_apple_stock.shape)

In [None]:
dataset = apple_stock_df_test.drop(columns=['day_date'])

# Scale the dataset
scaled_dataset = scalevalue(dataset)

# choose a number of time steps
n_steps_in, n_steps_out = 60, 20

# Generate input/output for testing sets
X_test_apple_stock, y_test_apple_stock = split_sequences(scaled_dataset, n_steps_in, n_steps_out)

print(X_test_apple_stock.shape, y_test_apple_stock.shape)

#### **Flattening the Input for Training**

In [None]:
# flatten input
n_input = X_train_apple_stock.shape[1] * X_train_apple_stock.shape[2]

X_train_apple_stock_flatteded = X_train_apple_stock.reshape((X_train_apple_stock.shape[0], n_input))
X_test_apple_stock_flatteded = X_test_apple_stock.reshape((X_test_apple_stock.shape[0], n_input))

print(X_train_apple_stock_flatteded.shape, X_test_apple_stock_flatteded.shape)

#### **Deep Learning Model Feeding**

##### **MLP Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple = model
MLP_Apple.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.3))
MLP_Apple.add(Dense(n_steps_out))
MLP_Apple.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(MLP_Apple.summary())

# Fit the model
MLP_Apple.fit(X_train_apple_flatteded, y_train_apple,batch_size=16, epochs=200, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = MLP_Apple.predict(X_test_apple_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **MLP Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Define the model
model = Sequential()
MLP_Apple_Stock = model
MLP_Apple_Stock.add(Dense(150, activation='relu', input_dim=n_input))
MLP_Apple.add(Dropout(0.1))
MLP_Apple_Stock.add(Dense(n_steps_out))
MLP_Apple_Stock.compile(optimizer='adam', loss='mse')

# Print model summary
print(MLP_Apple_Stock.summary())

# Fit the model
MLP_Apple_Stock.fit(X_train_apple_stock_flatteded, y_train_apple_stock,batch_size=16, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')


##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Google
predicted_stock_prices_apple = MLP_Apple.predict(X_test_apple_flatteded)
predicted_stock_prices_apple_stock = MLP_Apple_Stock.predict(X_test_apple_stock_flatteded)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Tesla Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Tesla Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()


##### **1D-CNN Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple = Sequential()
One_D_CNN_Apple.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,8)))
One_D_CNN_Apple.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple.add(Flatten())
One_D_CNN_Apple.add(Dense(50, activation='relu'))
One_D_CNN_Apple.add(Dense(50, activation='relu'))
One_D_CNN_Apple.add(Dropout(0.2))
One_D_CNN_Apple.add(Dense(n_steps_out))
One_D_CNN_Apple.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(One_D_CNN_Apple.summary())

# Fit the model
One_D_CNN_Apple.fit(X_train_apple, y_train_apple, batch_size=32, epochs=100, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = One_D_CNN_Apple.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **1D-CNN Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout

# Define the model
One_D_CNN_Apple_Stock = Sequential()
One_D_CNN_Apple_Stock.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(n_steps_in,4)))
One_D_CNN_Apple_Stock.add(MaxPooling1D(pool_size=2))
One_D_CNN_Apple_Stock.add(Flatten())
One_D_CNN_Apple_Stock.add(Dense(50, activation='relu'))
One_D_CNN_Apple_Stock.add(Dense(50, activation='relu'))
One_D_CNN_Apple_Stock.add(Dropout(0.1))
One_D_CNN_Apple_Stock.add(Dense(n_steps_out))
One_D_CNN_Apple_Stock.compile(optimizer='rmsprop', loss='mse')

# Print model summary
print(One_D_CNN_Apple_Stock.summary())

# Fit the model
One_D_CNN_Apple_Stock.fit(X_train_apple_stock, y_train_apple_stock, batch_size=32, epochs=200, verbose=0)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = One_D_CNN_Apple.predict(X_test_apple)
predicted_stock_prices_apple_stock = One_D_CNN_Apple_Stock.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Tesla Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Tesla Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the LSTM model
LSTMM_A = Sequential()
LSTMM_A.add(LSTM(100, activation='relu', return_sequences=False, input_shape=(n_steps_in, 8)))
LSTMM_A.add(LSTM(100, activation='relu'))
LSTMM_A.add(Dropout(0.2))
LSTMM_A.add(Dense(n_steps_out))
LSTMM_A.compile(optimizer='adam', loss='mse')

# Print model summary
print(LSTMM_A.summary())

# Fit the model
history = LSTMM_A.fit(X_train_apple, y_train_apple, batch_size=64, epochs=200, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple = LSTMM_A.predict(X_test_apple)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **LSTM Model Feeding without Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Define the model
LSTMM_AM = Sequential()
LSTMM_AM.add(LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, 4)))
LSTMM_AM.add(LSTM(100, activation='relu'))
LSTMM_AM.add(Dropout(0.1))
LSTMM_AM.add(Dense(n_steps_out))
LSTMM_AM.compile(optimizer='adam', loss='mse')

# Print model summary
print(LSTMM_AM.summary())

# Fit the model
LSTMM_AM.fit(X_train_apple_stock, y_train_apple_stock,batch_size=16, epochs=100, verbose=1)

##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Predict the values for the test set
y_pred_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = LSTMM_A.predict(X_test_apple)
predicted_stock_prices_apple_stock = LSTMM_AM.predict(X_test_apple_stock)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(dataset[['stock_price']])

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Tesla Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Tesla Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

##### **CNN-LSTM Model Feeding with Twitter Data**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 5
n_steps = X_train_apple.shape[1] // n_seq

# Define the model
CNN_LSTM = Sequential()
CNN_LSTM.add(TimeDistributed(Conv1D(filters=32, kernel_size=2, activation='relu'), input_shape=(n_seq, n_steps, 8)))
CNN_LSTM.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM.add(TimeDistributed(Flatten()))
CNN_LSTM.add(LSTM(100, activation='relu'))
CNN_LSTM.add(Dropout(0.2))
CNN_LSTM.add(Dense(20))  # Match y_train shape

# Compile the model
CNN_LSTM.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM.summary())

# Reshape X_train to have an additional dimension
X_train_reshaped = np.reshape(X_train_apple, (X_train_apple.shape[0], n_seq, n_steps, X_train_apple.shape[2]))

# Fit the model
history = CNN_LSTM.fit(X_train_reshaped, y_train_apple, batch_size=32, epochs=150, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict the values for the test set
y_pred_apple = CNN_LSTM.predict(X_test_reshaped)

# Calculate MSE
mse = mean_squared_error(y_test_apple, y_pred_apple)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple, y_pred_apple)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **CNN-LSTM Model Feeding without Twitter Data**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np

# Parameters
n_seq = 6
n_steps = X_train_apple_stock.shape[1] // n_seq

# Define the model
CNN_LSTM_stock = Sequential()
CNN_LSTM_stock.add(TimeDistributed(Conv1D(filters=32, kernel_size=2, activation='relu'), input_shape=(n_seq, n_steps, 4)))
CNN_LSTM_stock.add(TimeDistributed(MaxPooling1D(pool_size=2)))
CNN_LSTM_stock.add(TimeDistributed(Flatten()))
CNN_LSTM_stock.add(LSTM(100, activation='relu'))
CNN_LSTM_stock.add(Dropout(0.1))
CNN_LSTM_stock.add(Dense(20))  # Match y_train_apple_stock shape

# Compile the model
CNN_LSTM_stock.compile(optimizer=RMSprop(), loss='mse')

# Print model summary
print(CNN_LSTM_stock.summary())

# Reshape X_train to have an additional dimension
X_train_stock_reshaped = np.reshape(X_train_apple_stock, (X_train_apple_stock.shape[0], n_seq, n_steps, X_train_apple_stock.shape[2]))

# Fit the model
history = CNN_LSTM_stock.fit(X_train_stock_reshaped, y_train_apple_stock, batch_size=16, epochs=100, verbose=1)


##### **Model Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

X_test_reshaped_stock = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict the values for the test set
y_pred_apple_stock = CNN_LSTM_stock.predict(X_test_reshaped_stock)

# Calculate MSE
mse = mean_squared_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate MAE
mae = mean_absolute_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MAPE
mape = mean_absolute_percentage_error(y_test_apple_stock, y_pred_apple_stock)
print(f'Mean Absolute Percentage Error (MAPE): {mape}')

##### **Visualiz the Final Output**

In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Parameters
n_seq = 6
n_steps = X_test_apple.shape[1] // n_seq

# Reshape X_test to match the required input shape
X_test_reshaped = np.reshape(X_test_apple, (X_test_apple.shape[0], n_seq, n_steps, X_test_apple.shape[2]))

# Predict stock prices using the models for Apple
predicted_stock_prices_apple = CNN_LSTM.predict(X_test_reshaped)

# Reshape X_test_apple_stock to match the required input shape
X_test_stock_reshaped = np.reshape(X_test_apple_stock, (X_test_apple_stock.shape[0], n_seq, n_steps, X_test_apple_stock.shape[2]))

# Predict stock prices using the models for Apple stock without Twitter
predicted_stock_prices_apple_stock = CNN_LSTM_stock.predict(X_test_stock_reshaped)

# Extract the actual stock prices and the dates
actual_stock_prices = apple_df_test['stock_price'].values
dates = apple_df_test['day_date'].values

# Fit the scaler on the stock prices
stock_price_scaler = MinMaxScaler(feature_range=(0, 1))
stock_price_scaler.fit(actual_stock_prices.reshape(-1, 1))

# Inverse transform the scaled predicted stock prices
predicted_stock_prices_apple_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple)
last_predicted_values_apple = predicted_stock_prices_apple_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple) < len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple)]
    dates = dates[:len(last_predicted_values_apple)]
elif len(last_predicted_values_apple) > len(actual_stock_prices):
    last_predicted_values_apple = last_predicted_values_apple[:len(actual_stock_prices)]

# Inverse transform for predicted_stock_prices_apple_stock
predicted_stock_prices_apple_stock_original = stock_price_scaler.inverse_transform(predicted_stock_prices_apple_stock)
last_predicted_values_apple_stock = predicted_stock_prices_apple_stock_original[:, -1]

# Ensure lengths match for plotting
if len(last_predicted_values_apple_stock) < len(actual_stock_prices):
    last_predicted_values_apple_stock = last_predicted_values_apple_stock[:len(actual_stock_prices)]
elif len(last_predicted_values_apple_stock) > len(actual_stock_prices):
    actual_stock_prices = actual_stock_prices[:len(last_predicted_values_apple_stock)]
    dates = dates[:len(last_predicted_values_apple_stock)]

# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), gridspec_kw={'width_ratios': [1, 1]})

# Plot for predicted_stock_prices_apple
ax1.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax1.plot(dates, last_predicted_values_apple, color='darkred', label='Predicted Stock Prices (With Twitter)', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Stock Price USD ($)')
ax1.set_title('Tesla Stock Prices - Actual vs Predicted (With Twitter)')
ax1.legend()
ax1.tick_params(axis='x', rotation=60)

# Plot for predicted_stock_prices_apple_stock
ax2.plot(dates, actual_stock_prices, color='black', label='Actual Stock Prices', linewidth=2)
ax2.plot(dates, last_predicted_values_apple_stock, color='olive', label='Predicted Stock Prices (Without Twitter)', linewidth=2)
ax2.set_xlabel('Date')
ax2.set_ylabel('Stock Price USD ($)')
ax2.set_title('Tesla Stock Prices - Actual vs Predicted (Without Twitter)')
ax2.legend()
ax2.tick_params(axis='x', rotation=60)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
