In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the dataset
data = pd.read_csv('../datasets/hemanth-final/TCS_funda_final.csv')
data['Year'] = pd.to_datetime(data['Year'], format='%d-%b', errors='coerce')
data.set_index('Year', inplace=True)

# Extract the 'Close' prices and drop any missing values
closing_prices = data['Close'].dropna().values.reshape(-1, 1)

# Scale the data to be between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_closing_prices = scaler.fit_transform(closing_prices)|

# Function to create sequences of data for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Define sequence length (e.g., using 60 previous days to predict the next day)
seq_length = 10
X, y = create_sequences(scaled_closing_prices, seq_length)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Reshape X for LSTM [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the LSTM model
model = Sequential()

# Adding the LSTM layer and some Dropout regularization
model.add(LSTM(units=50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(Dropout(0.2))

# Adding another LSTM layer
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))

# Adding the output layer
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Predict on training and test data
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

# Inverse transform predictions and actual values to their original scale
train_predictions = scaler.inverse_transform(train_predictions)
test_predictions = scaler.inverse_transform(test_predictions)

y_train_scaled = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))

# Prepare the data for plotting
full_closing_prices = scaler.inverse_transform(scaled_closing_prices)
all_predictions = np.concatenate([train_predictions, test_predictions], axis=0)

# Generate 30 additional future predictions
future_predictions = []
last_sequence = scaled_closing_prices[-seq_length:]  # Start with the last available sequence

num_future_predictions = 4
for _ in range(num_future_predictions):
    next_pred = model.predict(last_sequence.reshape(1, seq_length, 1))
    future_predictions.append(next_pred[0, 0])
    # Update the sequence by removing the first value and adding the predicted value
    last_sequence = np.append(last_sequence[1:], next_pred).reshape(seq_length, 1)

# Inverse transform future predictions to original scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

# Combine all predictions for plotting
extended_predictions = np.concatenate([all_predictions, future_predictions])

# Plotting observed, predicted, and future closing prices
plt.figure(figsize=(8,4))
plt.plot(full_closing_prices, color='blue', label='Observed Closing Prices')
plt.plot(np.arange(seq_length, seq_length + len(train_predictions)), train_predictions, color='orange', label='Train Predictions')
plt.plot(np.arange(seq_length + len(train_predictions), seq_length + len(train_predictions) + len(test_predictions)), test_predictions, color='red', label='Test Predictions')
plt.plot(np.arange(len(full_closing_prices), len(full_closing_prices) + num_future_predictions), future_predictions, color='green', linestyle='--', label='Future Predictions')
plt.axvline(x=len(full_closing_prices) - len(test_predictions), color='green', linestyle='--', label='Prediction Start')
plt.title('Closing Price Prediction with Future Points')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# Performance metrics
mae = mean_absolute_error(y_test_scaled, test_predictions)
mse = mean_squared_error(y_test_scaled, test_predictions)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_test_scaled - test_predictions) / y_test_scaled)) * 100

# Print performance metrics
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Percentage Error (MAPE): {mape}%')

SyntaxError: invalid syntax (1049528873.py, line 20)