In [1]:
import streamlit as st
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

def fetch_data(ticker="MXN=X", lookback_days=365 * 3):
    """Fetch historical exchange rate data."""
    from datetime import datetime, timedelta
    end_date = datetime.now()
    start_date = end_date - timedelta(days=lookback_days)
    stock = yf.Ticker(ticker)
    df = stock.history(start=start_date.strftime("%Y-%m-%d"), 
                       end=end_date.strftime("%Y-%m-%d"), 
                       interval='1d')
    return df[['Close']].dropna()

def perform_eda(df):
    st.title("Exploratory Data Analysis (EDA) - MXN/USD Exchange Rate")
    
    st.subheader("Data Overview")
    st.write(df.describe())
    
    st.subheader("Missing Values Check")
    st.write(df.isnull().sum())
    
    st.subheader("Closing Price Distribution")
    fig, ax = plt.subplots()
    sns.histplot(df['Close'], bins=30, kde=True, ax=ax)
    ax.set_title("Distribution of Closing Prices")
    st.pyplot(fig)
    
    st.subheader("Rolling Mean and Volatility")
    df['Rolling_Mean'] = df['Close'].rolling(window=30).mean()
    df['Volatility'] = df['Close'].rolling(window=30).std()
    
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(df.index, df['Close'], label='Closing Price', alpha=0.5)
    ax.plot(df.index, df['Rolling_Mean'], label='30-day Rolling Mean', color='red')
    ax.set_title("Closing Price with 30-Day Moving Average")
    ax.legend()
    st.pyplot(fig)
    
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(df.index, df['Volatility'], label='30-day Volatility', color='green')
    ax.set_title("30-Day Rolling Volatility")
    ax.legend()
    st.pyplot(fig)
    
    st.subheader("Correlation Matrix")
    df['day_of_week'] = df.index.dayofweek
    df['month'] = df.index.month
    df['day_of_year'] = df.index.dayofyear
    corr_matrix = df.corr()
    fig, ax = plt.subplots()
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
    ax.set_title("Correlation Matrix")
    st.pyplot(fig)

def compare_models(df):
    st.subheader("Model Comparison: Linear Regression vs Decision Tree")
    df['S_3'] = df['Close'].rolling(window=3).mean()
    df['S_9'] = df['Close'].rolling(window=9).mean()
    df['volatility'] = df['Close'].rolling(window=5).std()
    df['next_day_rate'] = df['Close'].shift(-1)
    df = df.dropna()
    
    X = df[['S_3', 'S_9', 'volatility', 'day_of_week', 'month', 'day_of_year']]
    y = df['next_day_rate']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Linear Regression Model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)
    lr_preds = lr_model.predict(X_test)
    
    # Decision Tree Model
    dt_model = DecisionTreeRegressor()
    dt_model.fit(X_train, y_train)
    dt_preds = dt_model.predict(X_test)
    
    # Metrics
    metrics = {
        "Linear Regression": {
            "MSE": mean_squared_error(y_test, lr_preds),
            "MAE": mean_absolute_error(y_test, lr_preds),
            "R²": lr_model.score(X_test, y_test)
        },
        "Decision Tree": {
            "MSE": mean_squared_error(y_test, dt_preds),
            "MAE": mean_absolute_error(y_test, dt_preds),
            "R²": dt_model.score(X_test, y_test)
        }
    }
    
    st.write(pd.DataFrame(metrics))
    
    fig, ax = plt.subplots()
    ax.plot(y_test.index, y_test, label='Actual Rate', color='blue')
    ax.plot(y_test.index, lr_preds, label='Linear Regression Predictions', color='red', linestyle='dashed')
    ax.plot(y_test.index, dt_preds, label='Decision Tree Predictions', color='green', linestyle='dotted')
    ax.set_title("Model Predictions vs Actual Rates")
    ax.legend()
    st.pyplot(fig)

def main():
    st.set_page_config(page_title="EDA - MXN/USD Exchange Rate", layout="wide")
    df = fetch_data()
    perform_eda(df)
    compare_models(df)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'matplotlib'