## Setup and Data Loading

In [None]:
#imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error

#file paths
file_path_non_prime = "/home/jovyan/Capstone/Capstone/Data/MasterCapstoneData.xlsx - ProductNonPrime.csv"
file_path_prime = "/home/jovyan/Capstone/Capstone/Data/MasterCapstoneData.xlsx - ProductPrime.csv"
file_path_walmart = "/home/jovyan/Capstone/Capstone/Data/MasterCapstoneData.xlsx - ProductWalmart.csv"
file_path_tiktok = "/home/jovyan/Capstone/Capstone/Data/MasterCapstoneData.xlsx - ProductTikTok.csv"

#data loading
data_non_prime = pd.read_csv(file_path_non_prime)
data_prime = pd.read_csv(file_path_prime)
data_walmart = pd.read_csv(file_path_walmart)
data_tiktok = pd.read_csv(file_path_tiktok)

# Define historical and target dates
historical_dates = ["10/7", "10/8", "10/9", "10/10", "10/11", "10/12", "10/13", "10/14"]
target_dates = ["11/26", "11/27", "11/28", "11/29", "11/30", "12/1", "12/2", "12/3"]

# Data preparation function
def prepare_data(df, historical_dates, target_dates):
    """
    Prepare features and labels for training.
    Args:
        df: Input dataframe with price columns.
        historical_dates: List of historical date columns.
        target_dates: List of target date columns.
    Returns:
        X: Features (historical prices).
        y: Labels (target prices for regression).
    """
    X = df[historical_dates].fillna(df[historical_dates].mean()).values
    y = df[target_dates].values
    return X, y

X_non_prime, y_non_prime = prepare_data(data_non_prime, historical_dates, target_dates)
X_prime, y_prime = prepare_data(data_prime, historical_dates, target_dates)
X_walmart, y_walmart = prepare_data(data_walmart, historical_dates, target_dates)
X_tiktok, y_tiktok = prepare_data(data_tiktok, historical_dates, target_dates)

## Simplified Pricing Model

In [2]:
# Simplified Pricing Model
def simplified_predict_prices(df):
    """
    Simplified model that uses the mean of historical prices to predict target dates.
    Args:
        df: Input dataframe with historical prices.
    Returns:
        predictions: Predicted prices for target dates.
    """
    predictions = {}
    mean_price = df[historical_dates].mean(axis=1)

    for target_date in target_dates:
        predictions[target_date] = mean_price

    return pd.DataFrame(predictions, index=df.index)

# Apply Simplified Pricing
data_non_prime_simplified = simplified_predict_prices(data_non_prime)
data_prime_simplified = simplified_predict_prices(data_prime)
data_walmart_simplified = simplified_predict_prices(data_walmart)
data_tiktok_simplified = simplified_predict_prices(data_tiktok)

## Dynamic Pricing Model

In [3]:
# Dynamic Pricing Model
def predict_dynamic_pricing(df):
    """
    Predict prices using linear regression for dynamic pricing.
    Args:
        df: Input dataframe with historical prices.
    Returns:
        predictions: Predicted prices for target dates.
    """
    predictions = {}

    for target_date in target_dates:
        model = LinearRegression()
        model.fit(df[historical_dates], df[target_date])
        predictions[target_date] = model.predict(df[historical_dates])

    return pd.DataFrame(predictions, index=df.index)

# Apply Dynamic Pricing
data_non_prime_dynamic = predict_dynamic_pricing(data_non_prime)
data_prime_dynamic = predict_dynamic_pricing(data_prime)
data_walmart_dynamic = predict_dynamic_pricing(data_walmart)
data_tiktok_dynamic = predict_dynamic_pricing(data_tiktok)

## Value-Based Pricing Model

In [4]:
# Value-Based Pricing Model
def predict_value_based_pricing(df):
    """
    Predict prices using polynomial regression for value-based pricing.
    Args:
        df: Input dataframe with historical prices.
    Returns:
        predictions: Predicted prices for target dates.
    """
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(df[historical_dates])
    predictions = {}

    for target_date in target_dates:
        model = LinearRegression()
        model.fit(X_poly, df[target_date])
        predictions[target_date] = model.predict(X_poly)

    return pd.DataFrame(predictions, index=df.index)

# Apply Value-Based Pricing
data_non_prime_value = predict_value_based_pricing(data_non_prime)
data_prime_value = predict_value_based_pricing(data_prime)
data_walmart_value = predict_value_based_pricing(data_walmart)
data_tiktok_value = predict_value_based_pricing(data_tiktok)

## Competition-Based Pricing Model

In [8]:
# Corrected Competition-Based Pricing Model
def predict_competition_based_pricing(df, competitors):
    """
    Predict prices using competition-based regression (Random Forest).
    Args:
        df: Input dataframe with historical prices.
        competitors: List of competitor dataframes.
    Returns:
        predictions: Predicted prices for target dates.
    """
    # Calculate the average competitor prices for each row
    avg_competitor_prices = pd.concat([comp[historical_dates] for comp in competitors], axis=0).groupby(level=0).mean()

    # Ensure the lengths of `X` and `y` match
    X = df[historical_dates].fillna(df[historical_dates].mean())
    y = avg_competitor_prices.mean(axis=1)  # Target variable is now aligned row-wise

    predictions = {}
    for target_date in target_dates:
        model = RandomForestRegressor(n_estimators=100)
        model.fit(X, y)
        predictions[target_date] = model.predict(X)

    return pd.DataFrame(predictions, index=df.index)

# Apply Competition-Based Pricing
competitors = [data_walmart, data_tiktok]
data_non_prime_competition = predict_competition_based_pricing(data_non_prime, competitors)
data_prime_competition = predict_competition_based_pricing(data_prime, competitors)
data_walmart_competition = predict_competition_based_pricing(data_walmart, [data_non_prime, data_tiktok])
data_tiktok_competition = predict_competition_based_pricing(data_tiktok, [data_non_prime, data_walmart])

## XG Boosting Model

In [6]:
# XGBoost Model
def train_xgboost_model(X, y):
    model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
    model.fit(X, y)
    return model

# Train XGBoost and predict
xgb_model_non_prime = train_xgboost_model(X_non_prime, y_non_prime)
xgb_predictions_non_prime = xgb_model_non_prime.predict(X_non_prime)

xgb_model_prime = train_xgboost_model(X_prime, y_prime)
xgb_predictions_prime = xgb_model_prime.predict(X_prime)

xgb_model_walmart = train_xgboost_model(X_walmart, y_walmart)
xgb_predictions_walmart = xgb_model_walmart.predict(X_walmart)

xgb_model_tiktok = train_xgboost_model(X_tiktok, y_tiktok)
xgb_predictions_tiktok = xgb_model_tiktok.predict(X_tiktok)

# Convert predictions to DataFrame
xgb_pred_non_prime_df = pd.DataFrame(xgb_predictions_non_prime, columns=target_dates)
xgb_pred_prime_df = pd.DataFrame(xgb_predictions_prime, columns=target_dates)
xgb_pred_walmart_df = pd.DataFrame(xgb_predictions_walmart, columns=target_dates)
xgb_pred_tiktok_df = pd.DataFrame(xgb_predictions_tiktok, columns=target_dates)

## Logistical Regression

In [9]:
# Logistic Regression Model
def train_logistic_regression_models(X, y, target_dates):
    """
    Train separate Logistic Regression models for each target date.
    Args:
        X: Features (historical prices).
        y: Labels (target prices for regression).
        target_dates: List of target dates.
    Returns:
        models: Dictionary of trained models for each target date.
        predictions: DataFrame with predictions for each target date.
    """
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X_scaled)

    models = {}
    predictions = {}

    for i, target_date in enumerate(target_dates):
        # Train a separate model for each target date
        model = LogisticRegression(max_iter=1000, solver='lbfgs')
        model.fit(X_poly, np.round(y[:, i]).astype(int))  # Use only the i-th column of y
        models[target_date] = model

        # Predict for this target date
        predictions[target_date] = model.predict(X_poly)

    # Convert predictions to DataFrame
    predictions_df = pd.DataFrame(predictions, columns=target_dates)

    return models, predictions_df

# Train and predict for Logistic Regression
lr_models_non_prime, lr_pred_non_prime_df = train_logistic_regression_models(X_non_prime, y_non_prime, target_dates)
lr_models_prime, lr_pred_prime_df = train_logistic_regression_models(X_prime, y_prime, target_dates)
lr_models_walmart, lr_pred_walmart_df = train_logistic_regression_models(X_walmart, y_walmart, target_dates)
lr_models_tiktok, lr_pred_tiktok_df = train_logistic_regression_models(X_tiktok, y_tiktok, target_dates)

## Results

In [10]:
# Save all results to their respective files
output_paths = {
    "dynamic": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_Dynamic.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_Dynamic.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_Dynamic.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_Dynamic.csv",
    },
    "value": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_Value.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_Value.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_Value.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_Value.csv",
    },
    "competition": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_Competition.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_Competition.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_Competition.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_Competition.csv",
    },
    "simplified": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_Simplified.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_Simplified.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_Simplified.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_Simplified.csv",
    },
    "xgboost": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_XGBoost.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_XGBoost.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_XGBoost.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_XGBoost.csv",
    },
    "logistic": {
        "non_prime": "/home/jovyan/Capstone/Capstone/Data/NonPrime_Logistic.csv",
        "prime": "/home/jovyan/Capstone/Capstone/Data/Prime_Logistic.csv",
        "walmart": "/home/jovyan/Capstone/Capstone/Data/Walmart_Logistic.csv",
        "tiktok": "/home/jovyan/Capstone/Capstone/Data/TikTok_Logistic.csv",
    },
}

# Save results for each model
# Dynamic Pricing
data_non_prime_dynamic.to_csv(output_paths["dynamic"]["non_prime"], index=False)
data_prime_dynamic.to_csv(output_paths["dynamic"]["prime"], index=False)
data_walmart_dynamic.to_csv(output_paths["dynamic"]["walmart"], index=False)
data_tiktok_dynamic.to_csv(output_paths["dynamic"]["tiktok"], index=False)

# Value-Based Pricing
data_non_prime_value.to_csv(output_paths["value"]["non_prime"], index=False)
data_prime_value.to_csv(output_paths["value"]["prime"], index=False)
data_walmart_value.to_csv(output_paths["value"]["walmart"], index=False)
data_tiktok_value.to_csv(output_paths["value"]["tiktok"], index=False)

# Competition-Based Pricing
data_non_prime_competition.to_csv(output_paths["competition"]["non_prime"], index=False)
data_prime_competition.to_csv(output_paths["competition"]["prime"], index=False)
data_walmart_competition.to_csv(output_paths["competition"]["walmart"], index=False)
data_tiktok_competition.to_csv(output_paths["competition"]["tiktok"], index=False)

# Simplified Pricing
data_non_prime_simplified.to_csv(output_paths["simplified"]["non_prime"], index=False)
data_prime_simplified.to_csv(output_paths["simplified"]["prime"], index=False)
data_walmart_simplified.to_csv(output_paths["simplified"]["walmart"], index=False)
data_tiktok_simplified.to_csv(output_paths["simplified"]["tiktok"], index=False)

# XGBoost Predictions
xgb_pred_non_prime_df.to_csv(output_paths["xgboost"]["non_prime"], index=False)
xgb_pred_prime_df.to_csv(output_paths["xgboost"]["prime"], index=False)
xgb_pred_walmart_df.to_csv(output_paths["xgboost"]["walmart"], index=False)
xgb_pred_tiktok_df.to_csv(output_paths["xgboost"]["tiktok"], index=False)

# Logistic Regression Predictions
lr_pred_non_prime_df.to_csv(output_paths["logistic"]["non_prime"], index=False)
lr_pred_prime_df.to_csv(output_paths["logistic"]["prime"], index=False)
lr_pred_walmart_df.to_csv(output_paths["logistic"]["walmart"], index=False)
lr_pred_tiktok_df.to_csv(output_paths["logistic"]["tiktok"], index=False)