<a href="https://colab.research.google.com/github/techiejay215/forexanalysis/blob/main/REALFINANCE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


In [None]:
# Function to load data from Yahoo Finance
def load_data(ticker, period='1y', interval='1d'):
    """
    Load the dataset for the given ticker symbol from Yahoo Finance.
    """
    # Download data from Yahoo Finance
    df = yf.download(ticker, period=period, interval=interval)

    # Check if the data contains 'Adj Close' or 'Close' and calculate returns and moving averages
    if 'Adj Close' in df.columns:
        df['Return'] = df['Adj Close'].pct_change()
        df['MA_50'] = df['Adj Close'].rolling(window=50).mean()
        df['MA_200'] = df['Adj Close'].rolling(window=200).mean()
    else:
        df['Return'] = df['Close'].pct_change()
        df['MA_50'] = df['Close'].rolling(window=50).mean()
        df['MA_200'] = df['Close'].rolling(window=200).mean()

    # Calculate volatility (50-day rolling standard deviation of returns)
    df['Volatility'] = df['Return'].rolling(window=50).std()

    # Drop rows with NaN values generated by rolling window calculations
    df.dropna(inplace=True)

    return df


In [None]:
# Modify the load_data function to include the next day's direction prediction
def load_data_with_direction(ticker, period='1y', interval='1d'):
    """
    Load the dataset for the given ticker symbol from Yahoo Finance and calculate additional features.
    """
    df = load_data(ticker, period, interval)  # Load the data

    # Create a new column for Next Day's Direction (1 for up, 0 for down)
    df['Next_Day_Direction'] = (df['Close'].shift(-1) > df['Close']).astype(int)

    return df


In [None]:
# Load data for EUR/USD exchange rate
ticker = "USDCHF=X"
df = load_data(ticker)
print(df.head())  # Verify the downloaded data


In [None]:
# Function to perform clustering and market regime detection
def detect_market_regimes(df, optimal_k=3):
    """
    Detect market regimes by clustering the data based on return, moving averages, and volatility.
    """
    # Selecting the features for clustering
    features = df[['Return', 'MA_50', 'MA_200', 'Volatility']]

    # Normalize the features using StandardScaler
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Apply KMeans clustering
    kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
    df['Regime'] = kmeans.fit_predict(features_scaled)

    # Assign regime labels (modify as needed)
    regime_labels = {0: 'Bull', 1: 'Bear', 2: 'Neutral'}  # Example labels; adjust based on analysis
    df['Regime_Label'] = df['Regime'].map(regime_labels)

    return df


In [None]:
# Detect market regimes for the EUR/USD data
df = detect_market_regimes(df, optimal_k=3)


In [None]:
# Function to plot stock price and market regimes
def plot_market_regimes(df):
    """
    Plot the stock price and highlight different market regimes.
    """
    plt.figure(figsize=(14, 7))
    plt.plot(df.index, df['Close'], label='Stock Price', color='black')

    # Plot different regimes with different colors
    colors = ['red', 'green', 'blue']
    for i in range(df['Regime'].nunique()):
        plt.scatter(df.index[df['Regime'] == i],
                    df['Close'][df['Regime'] == i],
                    label=f'Regime {i}', alpha=0.6, color=colors[i])

    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Market Regimes Detected by Clustering')
    plt.legend()
    plt.show()


In [None]:
# Plot market regimes for the EUR/USD data
plot_market_regimes(df)


In [None]:
# Function to calculate and plot cumulative returns by regime
def plot_cumulative_returns(df):
    """
    Plot cumulative returns for each market regime.
    """
    df['Cumulative_Return'] = (1 + df['Return']).cumprod()

    plt.figure(figsize=(14, 7))
    for label in df['Regime_Label'].unique():
        regime_returns = df[df['Regime_Label'] == label]['Cumulative_Return']
        plt.plot(regime_returns, label=f'Regime: {label}')

    plt.xlabel('Date')
    plt.ylabel('Cumulative Return')
    plt.title('Cumulative Returns by Market Regime')
    plt.legend()
    plt.show()


In [None]:
# Plot cumulative returns for the EUR/USD data
plot_cumulative_returns(df)


In [None]:
# Function to train and evaluate a Logistic Regression model
def train_logistic_regression(df):
    """
    Train a Logistic Regression model to predict market regimes and evaluate its performance.
    """
    # Prepare features and target variable
    features = df[['Return', 'MA_50', 'MA_200', 'Volatility']]
    X = StandardScaler().fit_transform(features)  # Normalize features
    y = df['Regime']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train the Logistic Regression model
    model = LogisticRegression(random_state=42)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    print(classification_report(y_test, y_pred))


In [None]:
# Train and evaluate a logistic regression model on the EUR/USD data
train_logistic_regression(df)


In [None]:
# Load data for S&P 500 index
ticker = "^GSPC"  # S&P 500 Index
df_sp500 = load_data(ticker)

# Detect market regimes
df_sp500 = detect_market_regimes(df_sp500, optimal_k=3)

# Plot market regimes
plot_market_regimes(df_sp500)

# Plot cumulative returns
plot_cumulative_returns(df_sp500)

# Train and evaluate logistic regression
train_logistic_regression(df_sp500)


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

def train_logistic_regression_cross_val(df):
    """
    Train a Logistic Regression model with cross-validation to evaluate the performance more robustly.
    """
    # Prepare features and target variable
    features = df[['Return', 'MA_50', 'MA_200', 'Volatility']]
    X = StandardScaler().fit_transform(features)  # Normalize features
    y = df['Regime']

    # Train the Logistic Regression model with cross-validation (e.g., 5-fold cross-validation)
    model = LogisticRegression(random_state=42)
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')  # 5-fold cross-validation

    # Print the cross-validation accuracy results
    print(f"Cross-Validation Accuracy: {scores.mean() * 100:.2f}% (+/- {scores.std() * 100:.2f}%)")

# Usage example
train_logistic_regression_cross_val(df)
