<a href="https://colab.research.google.com/github/mercuryandmercury/address/blob/main/CNNonfinance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf
import seaborn as sb
import plotly.express as px
from matplotlib import pyplot as plt
import yfinance as yf

In [None]:
# Retrieve historical stock data from Yahoo Finance
symbol = 'AAPL'  # Example stock symbol (Apple Inc.)
data = yf.download(symbol, start='2000-01-01', end='2023-01-01')

In [None]:

# Feature extraction - statistical features
data['mean'] = data['Close'].rolling(window=20).mean()
data['median'] = data['Close'].rolling(window=20).median()
data['std_dev'] = data['Close'].rolling(window=20).std()
data['skewness'] = data['Close'].rolling(window=20).skew()
data['kurtosis'] = data['Close'].rolling(window=20).kurt()
# Drop rows with NaN values
data.dropna(inplace=True)
#Check for missing values and handle them if present
if data.isnull().values.any():
    data = data.fillna(method='ffill')  # Forward-fill missing values
# Drop rows with NaN values
data.dropna(inplace=True)

# Visualizing statistical features
plt.figure(figsize=(15, 15))

# Visualization code for statistical features (same as before)
# ... (omitting for brevity)
# ... (you can use the visualization code provided earlier)
plt.subplot(2, 2, 1)
plt.plot(data.index, data['mean'], label='Mean')
plt.legend()
plt.title('Rolling Mean')

plt.subplot(2, 2, 2)
plt.plot(data.index, data['median'], label='Median', color='orange')
plt.legend()
plt.title('Rolling Median')

plt.subplot(2, 2, 3)
plt.plot(data.index, data['std_dev'], label='Standard Deviation', color='green')
plt.legend()
plt.title('Rolling Standard Deviation')

plt.subplot(2, 2, 4)
plt.plot(data.index, data['skewness'], label='Skewness', color='red')
plt.plot(data.index, data['kurtosis'], label='Kurtosis', color='blue')  # Include Kurtosis plot
plt.legend()
plt.legend()
plt.title('Rolling Skewness')
plt.tight_layout()
plt.show()

In [None]:
# Check for missing values
print(data.isnull().sum())  # Check for missing values in each column

# Handling missing values
data = data.fillna(method='ffill')  # Forward-fill missing values

# Removing duplicates if any
data = data[~data.index.duplicated(keep='first')]

# Checking for outliers and potential treatments
# Example: Detecting outliers using standard deviation
std = data['Close'].std()
mean = data['Close'].mean()
outlier_threshold = mean + (3 * std)  # Consider values 3 standard deviations away as outliers
outliers = data[data['Close'] > outlier_threshold]

# Remove outliers
data = data[data['Close'] <= outlier_threshold]

# Ensuring data consistency - Not applicable in this context

# Confirming changes
print(data.head())  # Display cleaned data

In [None]:
# Check for missing values and handle them if present
if data.isnull().values.any():
    data = data.fillna(method='ffill')  # Forward-fill missing values

# Plotting Date vs Volume
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Volume'], color='blue')
plt.title('Date vs Volume for ' + symbol)
plt.xlabel('Date')
plt.ylabel('Volume')
plt.grid(True)
plt.show()

In [None]:
# Plotting Date vs High Prices
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['High'], color='green')
plt.title('Date vs High Prices for ' + symbol)
plt.xlabel('Date')
plt.ylabel('High Prices')
plt.grid(True)
plt.show()

In [None]:
# Calculate Simple Moving Average (SMA)
window = 5  # Define the window for the SMA
data['SMA'] = data['Close'].rolling(window=window).mean()

# Check for missing values and handle them if present
if data.isnull().values.any():
    data = data.fillna(method='ffill')  # Forward-fill missing values

# Plotting Date vs Close Prices with SMA
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'], label='Close Prices', color='blue')
plt.plot(data.index, data['SMA'], label='SMA (' + str(window) + ')', color='red')
plt.title('Simple Moving Average (SMA) for ' + symbol)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
# Check for missing values and handle them if present
if data.isnull().values.any():
    data = data.fillna(method='ffill')  # Forward-fill missing values

# Plotting Autocorrelation
plt.figure(figsize=(10, 6))
plot_acf(data['Close'], lags=50)  # Change 'Close' to any other column if needed
plt.title('Autocorrelation Plot of Close Prices for ' + symbol)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.grid(True)
plt.show()

In [None]:
import seaborn as sns
# Compute the correlation matrix
correlation_matrix = data.corr()
# Plotting the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap of Stock Market Data for ' + symbol)
plt.show()

In [None]:
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

# Retrieve historical stock data from Yahoo Finance
symbol = 'AAPL'  # Example stock symbol (Apple Inc.)
data = yf.download(symbol, start='2021-01-01', end='2023-01-01')

# Check for missing values and handle them if present
if data.isnull().values.any():
    data = data.fillna(method='ffill')  # Forward-fill missing values

# Feature extraction (assuming 'Close' prices as the target variable)
target = data['Close'].values

# Normalize the data
data = (data - data.mean()) / data.std()

# Convert data to 3D array for CNN input
window_size = 20  # Example window size
X = np.array([data[i-window_size:i].values for i in range(window_size, len(data))])
y = target[window_size:]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Constructing the CNN model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))  # Output layer

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Plotting training history (loss)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.show()

# Evaluating model on test data
test_loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')

# Generating predictions
predicted_prices = model.predict(X_test)

# Visualizing predictions vs actual prices
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Prices')
plt.plot(predicted_prices, label='Predicted Prices', linestyle='--')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Actual vs Predicted Prices')
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Calculating MAE, RMSE, MSE
mae = mean_absolute_error(y_test, predicted_prices)
mse = mean_squared_error(y_test, predicted_prices)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
