# Stock Market Analysis and Prediction

## Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import plotly.graph_objects as go
import seaborn as sns
import dask.dataframe as dd
from pyspark.sql import SparkSession

## Fetch Historical Stock Data

In [None]:
ticker = 'AAPL'
start_date = '2020-01-01'
end_date = '2023-01-01'
data = yf.download(ticker, start=start_date, end=end_date)
print(data.head())

## Fetch Real-Time Stock Data

In [None]:
real_time_data = yf.Ticker(ticker).history(period='1d')
print(real_time_data)

## Big Data Concepts: Processing Multiple Stocks

In [None]:
tickers = ['AAPL', 'GOOGL', 'MSFT']
all_data = []
for t in tickers:
    stock_data = yf.download(t, start=start_date, end=end_date)
    stock_data = stock_data.reset_index()
    stock_data['Ticker'] = t
    all_data.append(stock_data)

combined_df = pd.concat(all_data)

# Use Dask for parallel processing
dask_df = dd.from_pandas(combined_df, npartitions=4)
mean_prices = dask_df.groupby('Ticker')['Close'].mean().compute()
print("Mean close prices using Dask:")
print(mean_prices)

# Use Spark for distributed processing
spark = SparkSession.builder.appName("StockAnalysis").getOrCreate()
spark_df = spark.createDataFrame(combined_df)
spark_df.createOrReplaceTempView("stocks")
result = spark.sql("SELECT Ticker, AVG(Close) as avg_close FROM stocks GROUP BY Ticker")
print("Average close prices using Spark:")
result.show()
spark.stop()

## Preprocess the Data

In [None]:
close_prices = data['Close']
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices.values.reshape(-1, 1))

sequence_length = 60
X, y = [], []
for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i-sequence_length:i, 0])
    y.append(scaled_data[i, 0])

X, y = np.array(X), np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

## Perform Time-Series Analysis

In [None]:
plt.plot(data.index, close_prices)
plt.title('Historical Close Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

## Build and Train LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(LSTM(50))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=1, epochs=1)  # Use more epochs for better results

## Make Predictions

In [None]:
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(mean_squared_error(scaler.inverse_transform(y_test.reshape(-1,1)), predictions))
print(f'RMSE: {rmse}')

## Visualize Trends and Results

In [None]:
plt.figure(figsize=(14,7))
plt.plot(data.index[-len(y_test):], scaler.inverse_transform(y_test.reshape(-1,1)), label='Actual Prices')
plt.plot(data.index[-len(predictions):], predictions, label='Predicted Prices')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()