In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import detrend
from pandas.tseries.offsets import BDay
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from dash import Dash, html, dcc, Input, Output, State, no_update
import yfinance as yf
from datetime import datetime, timedelta
from scipy.signal import find_peaks
# User choice for data source
data_source = input("Enter '1' for yfinance or '2' for CSV: ")

if data_source == '1':
    # Load from yfinance
    symbol = input("Enter symbol (e.g., GLD, AAPL): ")
    # Load more data to allow historical lookback
    stock_df_new2 = yf.download(symbol, start='2000-01-01')
    stock_df_new2['mid_price'] = stock_df_new2['Close']
    stock_df_new2.reset_index(inplace=True)
    # Flatten MultiIndex columns
    stock_df_new2.columns = [col[0] if col[1] == '' else col[1] for col in stock_df_new2.columns]
else:
    # Load from CSV
    file = "/Users/mohammadsattar/Desktop/ML/Storage/Gold/FFt/gold_usd_data2.csv"
    stock_df_new2 = pd.read_csv(file)

# Clean the data
stock_df_new2 = stock_df_new2.dropna(subset=['Date', 'mid_price']).copy()
stock_df_new2['Date'] = pd.to_datetime(stock_df_new2['Date'])
stock_df_new2.set_index('Date', inplace=True)

# ====== User Inputs ======
print("\n=== Defining Analysis Parameters ===")
start_date_str = input("Enter start date (YYYY-MM-DD): ")
start_date = pd.to_datetime(start_date_str)
period_length = int(input("Enter period length in days (e.g., 88): "))
num_periods = int(input("Enter number of periods (e.g., 5): "))

# Enter interval between each period and the next
intervals_between_periods = []
for i in range(num_periods):
    if i == 0:
        interval = int(input(f"Enter days from start date to first period: "))
    else:
        interval = int(input(f"Enter days from period {i} to period {i+1}: "))
    intervals_between_periods.append(interval)

top_n_components = int(input("Enter number of wave components per period (e.g., 2): "))

# ====== Function to extract wave components from a given period ======
def extract_frequency_components(df, top_n=2):
    """
    Extract frequency components from time series
    """
    if len(df) < 2:
        return []
    
    # Apply FFT
    fft_result = np.fft.fft(df['mid_price'].values)
    frequencies = np.fft.fftfreq(len(fft_result), d=1)
    amplitudes = np.abs(fft_result)
    
    # Select only positive frequencies
    positive_freq_indices = np.where(frequencies > 0)[0]
    
    if len(positive_freq_indices) == 0:
        return []
    
    # Select highest frequencies
    sorted_indices = np.argsort(amplitudes[positive_freq_indices])[::-1]
    top_indices = min(top_n, len(sorted_indices))
    top_frequencies = positive_freq_indices[sorted_indices[:top_indices]]
    
    # Extract parameters
    component_params = []
    for freq_idx in top_frequencies:
        amplitude = 2 * amplitudes[freq_idx] / len(fft_result)
        phase = np.angle(fft_result[freq_idx])
        frequency = frequencies[freq_idx]
        period_days = 1 / frequency if frequency != 0 else np.inf
        component_params.append({
            'amplitude': amplitude,
            'frequency': frequency,
            'phase': phase,
            'period_days': period_days
        })
    
    return component_params

# ====== Collect components from all periods ======
# ====== Collect components from all periods ======
all_components = []
period_dates = []
cumulative_days = 0  # To track total accumulated days

print(f"\n=== Analyzing {num_periods} periods ===")
for i in range(num_periods):
    # Calculate end date of current period
    cumulative_days += intervals_between_periods[i]
    period_end = start_date - timedelta(days=cumulative_days)
    period_start = period_end - timedelta(days=period_length)
    
    print(f"\nPeriod {i+1}: from {period_start.date()} to {period_end.date()}")
    print(f"  (lookback {cumulative_days} days from start date)")
    
    # Extract data for the period
    try:
        period_data = stock_df_new2.loc[period_start:period_end].copy()
        
        if len(period_data) < 2:
            print(f"  Warning: insufficient data for period {i+1}")
            continue
        
        # Extract frequency components
        components = extract_frequency_components(period_data, top_n=top_n_components)
        
        if components:
            all_components.extend(components)
            period_dates.append((period_start, period_end))
            print(f"  Extracted {len(components)} components")
            for j, comp in enumerate(components, 1):
                print(f"    Component {j}: frequency={comp['frequency']:.4f}, period={comp['period_days']:.1f} days, amplitude={comp['amplitude']:.2f}")
        else:
            print(f"  Warning: no components extracted for period {i+1}")
    
    except Exception as e:
        print(f"  Error processing period {i+1}: {str(e)}")
        continue

# ====== Merge similar components (optional) ======
print(f"\n=== Merging Components ===")
print(f"Total components extracted: {len(all_components)}")


# ====== Create composite wave for future period ======
# Create time axis for future period
future_dates = pd.date_range(start=start_date, periods=period_length, freq='D')
time_points = np.arange(period_length)

# Calculate composite wave from all components
composite_wave = np.zeros(period_length)

for comp in all_components:
    amplitude = comp['amplitude']
    frequency = comp['frequency']
    phase = comp['phase']
    
    # Create wave for this component
    wave = amplitude * np.sin(2 * np.pi * frequency * time_points + phase)
    composite_wave += wave

# Normalize composite wave (optional)
if len(all_components) > 0:
    composite_wave = composite_wave / len(period_dates)  # Average by number of periods

# ====== Plot results ======
fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# 1. Plot historical periods used
ax1 = axes[0]
for i, (p_start, p_end) in enumerate(period_dates):
    period_data = stock_df_new2.loc[p_start:p_end]['mid_price']
    ax1.plot(period_data.index, period_data.values, alpha=0.5, label=f'Period {i+1}')

ax1.set_title('Historical Periods Used in Analysis')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price')
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# 2. Plot individual components
ax2 = axes[1]
for i, comp in enumerate(all_components[:10]):  # Display first 10 components only
    amplitude = comp['amplitude']
    frequency = comp['frequency']
    phase = comp['phase']
    
    wave = amplitude * np.sin(2 * np.pi * frequency * time_points + phase)
    ax2.plot(time_points, wave, alpha=0.3, label=f'f={frequency:.3f}')

ax2.set_title('Individual Wave Components (First 10)')
ax2.set_xlabel('Days')
ax2.set_ylabel('Amplitude')
ax2.legend(loc='best', ncol=2, fontsize='small')
ax2.grid(True, alpha=0.3)

# 3. Plot final composite wave
ax3 = axes[2]
ax3.plot(future_dates, composite_wave, 'b-', linewidth=2, label='Composite Wave')
ax3.fill_between(future_dates, composite_wave, alpha=0.3)
ax3.set_title(f'Predicted Composite Wave for {period_length} days after {start_date.date()}')
ax3.set_xlabel('Date')
ax3.set_ylabel('Predicted Value')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Improve layout
plt.tight_layout()
plt.show()

# ====== Plot composite signal in real price domain ======

# Calculate reference price (last known price or average of analyzed periods)
last_known_price = stock_df_new2.loc[:start_date]['mid_price'].iloc[-1] if start_date in stock_df_new2.index else stock_df_new2['mid_price'].iloc[-1]

# Calculate average price from all analyzed periods
all_period_prices = []
for p_start, p_end in period_dates:
    period_prices = stock_df_new2.loc[p_start:p_end]['mid_price'].values
    all_period_prices.extend(period_prices)

if len(all_period_prices) > 0:
    mean_price = np.mean(all_period_prices)
    std_price = np.std(all_period_prices)
else:
    mean_price = last_known_price
    std_price = 0

# Convert composite wave to actual price values
# Option 1: Add oscillations to last known price
predicted_prices_from_last = last_known_price + composite_wave

# Option 2: Add oscillations to average historical prices
predicted_prices_from_mean = mean_price + composite_wave

# Option 3: Normalize wave based on historical standard deviation
if std_price > 0 and len(all_components) > 0:
    # Normalize wave to get realistic range
    normalized_wave = (composite_wave / np.std(composite_wave)) * std_price if np.std(composite_wave) > 0 else composite_wave
    predicted_prices_normalized = mean_price + normalized_wave
else:
    predicted_prices_normalized = predicted_prices_from_mean

# ====== Plotting ======
fig3, axes = plt.subplots(3, 1, figsize=(15, 12))

# 1. Plot actual historical prices with analyzed periods
ax1 = axes[0]
# Plot full historical price (last two years for clarity)
two_years_ago = start_date - timedelta(days=730)
historical_data = stock_df_new2.loc[two_years_ago:start_date]['mid_price']
if len(historical_data) > 0:
    ax1.plot(historical_data.index, historical_data.values, 'gray', alpha=0.5, label='Historical Price')

# Highlight analyzed periods
colors = plt.cm.rainbow(np.linspace(0, 1, len(period_dates)))
for i, (p_start, p_end) in enumerate(period_dates):
    period_data = stock_df_new2.loc[p_start:p_end]['mid_price']
    ax1.plot(period_data.index, period_data.values, color=colors[i], 
             linewidth=2, label=f'Period {i+1}')

ax1.axvline(x=start_date, color='r', linestyle='--', alpha=0.7, label='Start Date')
ax1.set_title('Historical Prices and Analyzed Periods')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price')
ax1.legend(loc='best', fontsize='small')
ax1.grid(True, alpha=0.3)

# 2. Plot predictions from last known price
ax2 = axes[1]
ax2.plot(future_dates, predicted_prices_from_last, 'b-', linewidth=2, label='Prediction from Last Price')
ax2.axhline(y=last_known_price, color='r', linestyle='--', alpha=0.5, 
            label=f'Last Known Price: {last_known_price:.2f}')
ax2.fill_between(future_dates, predicted_prices_from_last, last_known_price, alpha=0.2)

# Add confidence interval
upper_bound = predicted_prices_from_last + std_price
lower_bound = predicted_prices_from_last - std_price
ax2.fill_between(future_dates, upper_bound, lower_bound, alpha=0.1, color='gray', 
                 label=f'Confidence Interval (Â±{std_price:.2f})')

ax2.set_title(f'Price Prediction for Next {period_length} Days (from Last Price)')
ax2.set_xlabel('Date')
ax2.set_ylabel('Predicted Price')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Plot normalized predictions (most realistic)
ax3 = axes[2]
ax3.plot(future_dates, predicted_prices_normalized, 'g-', linewidth=2, 
         label='Normalized Prediction')
ax3.axhline(y=mean_price, color='orange', linestyle='--', alpha=0.5, 
            label=f'Historical Average Price: {mean_price:.2f}')

# Identify support and resistance levels
resistance_level = np.max(predicted_prices_normalized)
support_level = np.min(predicted_prices_normalized)
ax3.axhline(y=resistance_level, color='r', linestyle=':', alpha=0.7, 
            label=f'Resistance: {resistance_level:.2f}')
ax3.axhline(y=support_level, color='g', linestyle=':', alpha=0.7, 
            label=f'Support: {support_level:.2f}')

# Identify important turning points
from scipy.signal import find_peaks
peaks, _ = find_peaks(predicted_prices_normalized)
valleys, _ = find_peaks(-predicted_prices_normalized)

if len(peaks) > 0:
    ax3.scatter(future_dates[peaks], predicted_prices_normalized[peaks], 
               color='red', s=50, zorder=5, label='Predicted Peaks')
if len(valleys) > 0:
    ax3.scatter(future_dates[valleys], predicted_prices_normalized[valleys], 
               color='green', s=50, zorder=5, label='Predicted Valleys')

ax3.set_title(f'Normalized Price Prediction for Next {period_length} Days')
ax3.set_xlabel('Date')
ax3.set_ylabel('Predicted Price')
ax3.legend(loc='best', fontsize='small')
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# ====== Print predicted price information ======
print("\n=== Price Predictions ===")
print(f"Last Known Price: {last_known_price:.2f}")
print(f"Historical Average Price: {mean_price:.2f}")
print(f"Historical Standard Deviation: {std_price:.2f}")
print(f"\nPredicted Price Range:")
print(f"  Upper Bound: {np.max(predicted_prices_normalized):.2f}")
print(f"  Lower Bound: {np.min(predicted_prices_normalized):.2f}")
print(f"  Predicted Average: {np.mean(predicted_prices_normalized):.2f}")

if len(peaks) > 0:
    print(f"\nPredicted Peak Dates:")
    for i, peak in enumerate(peaks[:5], 1):  # First 5 peaks
        print(f"  {i}. {future_dates[peak].date()}: {predicted_prices_normalized[peak]:.2f}")

if len(valleys) > 0:
    print(f"\nPredicted Valley Dates:")
    for i, valley in enumerate(valleys[:5], 1):  # First 5 valleys
        print(f"  {i}. {future_dates[valley].date()}: {predicted_prices_normalized[valley]:.2f}")

# Save predictions with prices
save_price_predictions = input("\nDo you want to save price predictions to CSV file? (y/n): ")
if save_price_predictions.lower() == 'y':
    predictions_df = pd.DataFrame({
        'Date': future_dates,
        'Composite_Wave': composite_wave,
        'Price_From_Last': predicted_prices_from_last,
        'Price_Normalized': predicted_prices_normalized,
        'Upper_Bound': predicted_prices_normalized + std_price,
        'Lower_Bound': predicted_prices_normalized - std_price
    })
    filename = f"price_predictions_{start_date.date()}_{period_length}days.csv"
    predictions_df.to_csv(filename, index=False)
    print(f"Predictions saved to: {filename}")

# ====== Print results summary ======
print("\n=== Results Summary ===")
print(f"Number of analyzed periods: {len(period_dates)}")
print(f"Total components extracted: {len(all_components)}")
print(f"Prediction period: from {future_dates[0].date()} to {future_dates[-1].date()}")
print(f"Maximum composite wave value: {np.max(composite_wave):.2f}")
print(f"Minimum composite wave value: {np.min(composite_wave):.2f}")
print(f"Average: {np.mean(composite_wave):.2f}")
print(f"Standard Deviation: {np.std(composite_wave):.2f}")

# Save results to CSV file (optional)
save_results = input("\nDo you want to save results to CSV file? (y/n): ")
if save_results.lower() == 'y':
    results_df = pd.DataFrame({
        'Date': future_dates,
        'Composite_Wave': composite_wave
    })
    filename = f"composite_wave_{start_date.date()}_{period_length}days.csv"
    results_df.to_csv(filename, index=False)
    print(f"Results saved to: {filename}")