In [5]:
# import numpy as np
# import pandas as pd

# # Set random seed for reproducibility
# np.random.seed(42)

# # Parameters for synthetic data generation
# num_stocks = 10      # Number of stocks (columns)
# num_days = 1000      # Number of days (rows)

# # Generate random initial prices for each stock
# initial_prices = np.random.uniform(50, 150, num_stocks)

# # Simulate daily returns (normally distributed)
# daily_returns = np.random.normal(0, 0.02, (num_days, num_stocks))

# # Initialize price matrix
# prices = np.zeros((num_days, num_stocks))
# prices[0] = initial_prices

# # Simulate stock prices over time
# for t in range(1, num_days):
#     prices[t] = prices[t-1] * (1 + daily_returns[t])

# # Create a DataFrame with dates and stock prices
# dates = pd.date_range(start="2020-01-01", periods=num_days)
# columns = [f"Stock_{i+1}" for i in range(num_stocks)]
# price_data = pd.DataFrame(prices, index=dates, columns=columns)

# # Save to CSV
# csv_file_path = "synthetic_stock_prices.csv"
# price_data.to_csv(csv_file_path)

# print(f"Synthetic stock price data saved to: {csv_file_path}")



#### Dimension Reduction via Linear Programming (LP)

In [7]:
import pandas as pd
import numpy as np
import cvxpy as cp

def lp_portfolio_optimization_from_csv(file_path, gamma=0.5):
    """
    LP-based portfolio optimization using Mean Absolute Deviation (MAD)
    Parameters:
        file_path : str : Path to the CSV file containing stock prices (rows: dates, columns: stocks)
        gamma     : float : Risk-return tradeoff parameter (higher gamma = more return focus)
    Returns:
        pd.Series : Optimal portfolio weights (indexed by stock name)
    """
    # 1. Read the CSV file (dates as index, stocks as columns)
    price_data = pd.read_csv(file_path, index_col=0)
    
    # 2. Calculate daily returns
    returns = price_data.pct_change().dropna()
    T, N = returns.shape
    mu = returns.mean(axis=0).values  # Mean returns for each stock

    # 3. Deviation matrix (centered returns)
    A = returns.values - mu

    # 4. Decision variables
    w = cp.Variable(N)  # Portfolio weights
    v = cp.Variable(T)  # Auxiliary variables for absolute deviations

    # 5. Objective function: Maximize return - risk penalty
    objective = cp.Maximize(gamma * mu @ w - (1 / T) * cp.sum(v))

    # 6. Constraints
    constraints = [
        A @ w <= v,       # Upper bound on deviations
        A @ w >= -v,      # Lower bound on deviations
        cp.sum(w) == 1,   # Fully invested
        w >= 0,
        v >= 0            # No short selling
    ]

    # 7. Solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.ECOS)

    # 8. Return results as a Series with stock names
    return pd.Series(w.value, index=returns.columns)

# Example Usage
if __name__ == "__main__":
    # Path to your CSV file (e.g., "synthetic_stock_prices.csv")
    file_path = "synthetic_stock_prices.csv"

    # Run optimization
    weights = lp_portfolio_optimization_from_csv(file_path, gamma=0.7)

    # Print results (nonzero weights)
    print("Optimal Portfolio Weights (nonzero):")
    print(weights[weights > 1e-4].sort_values(ascending=False))


Optimal Portfolio Weights (nonzero):
Stock_7     0.113510
Stock_3     0.105155
Stock_2     0.104827
Stock_1     0.102969
Stock_9     0.102633
Stock_4     0.099270
Stock_8     0.094658
Stock_5     0.092877
Stock_6     0.092450
Stock_10    0.091653
dtype: float64
