<a href="https://colab.research.google.com/github/polydeuces32/s-p500-predicter-on-Python-/blob/main/k_means_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Gather Data
# Download historical data for S&P 500 stocks
tickers = ['AAPL', 'LINK', 'BTC', 'AMZN', 'TSLA']  # Example tickers
data = yf.download(tickers, start="2020-01-01", end="2024-07-01")

# Calculate daily returns
returns = data['Adj Close'].pct_change().dropna()

# Step 2: Preprocess Data
# Calculate annualized mean return and volatility
mean_returns = returns.mean() * 252
volatility = returns.std() * np.sqrt(252)

# Create a DataFrame with the features
features = pd.DataFrame({'Mean Returns': mean_returns, 'Volatility': volatility})

# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Step 3: Apply k-means Clustering
# Set number of clusters
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(scaled_features)

# Assign clusters to the original data
features['Cluster'] = kmeans.labels_

# Step 4: Analyze and Interpret Results
print(features)

# Plot the clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Volatility', y='Mean Returns', hue='Cluster', data=features, palette='viridis')
plt.title('K-means Clustering of S&P 500 Stocks')
plt.xlabel('Volatility (Annualized)')
plt.ylabel('Mean Returns (Annualized)')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Step 1: Gather Data
ticker = 'GBTC'
data = yf.download(ticker, start="2010-01-01", end="2025-01-01")

# Step 2: Preprocess Data
# Using 'Close' price for prediction
data = data[['Close']]

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Create training and testing datasets
train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

# Create sequences for training
def create_sequences(data, seq_length):
    x, y = [], []
    for i in range(len(data) - seq_length):
        x.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(x), np.array(y)

seq_length = 60  # Using 60 days of data to predict the next day's price
x_train, y_train = create_sequences(train_data, seq_length)
x_test, y_test = create_sequences(test_data, seq_length)

# Step 3: Build LSTM Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Step 4: Train the Model
model.fit(x_train, y_train, epochs=20, batch_size=32)

# Step 5: Make Predictions
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Compare with actual values
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot the results
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(actual, color='blue', label='Actual Stock Price')
plt.plot(predictions, color='red', label='Predicted Stock Price')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
