In [71]:
import pandas as pd

# Load your Excel sheets (replace with actual file path)
# Sheet1: quarterly revenue + ecommerce
df_revenue = pd.read_excel("02_final-dataset.xlsx", sheet_name="quarterly to python")

df_revenue = df_revenue.drop(['tic'], axis=1)

# Create time index (sequential quarter number)
df_revenue['Time index'] = range(1, len(df_revenue)+1) # Extract quarter (Q1, Q2, Q3, Q4)
df_revenue['qtr'] = df_revenue['quarter'].str[-1:] # Create dummy variables
df_revenue = pd.get_dummies(df_revenue, columns=['qtr'], drop_first=True)

# Convert boolean dummies to 0/1 safely
for col in df_revenue.select_dtypes(include=['bool']).columns:
    df_revenue[col] = df_revenue[col].map({True: 1, False: 0})

# Round quarterly totals
df_revenue['Walmart US Sales revenue'] = df_revenue['wmt us net sales'].round(0).astype(int)
df_revenue['Walmart US Ecommerce revenue'] = df_revenue['wmt ecommerce revt'].round(0).astype(int)
df_revenue['Q2 dummy variable'] = df_revenue['qtr_2']
df_revenue['Q3 dummy variable'] = df_revenue['qtr_3']
df_revenue['Q4 dummy variable'] = df_revenue['qtr_4']
df_revenue = df_revenue.drop(['wmt us net sales','wmt ecommerce revt','qtr_2','qtr_3','qtr_4'], axis=1)

df_revenue

Unnamed: 0,quarter,Time index,Walmart US Sales revenue,Walmart US Ecommerce revenue,Q2 dummy variable,Q3 dummy variable,Q4 dummy variable
0,21Q4,1,99585,1390,0,0,1
1,22Q1,2,93167,1130,0,0,0
2,22Q2,3,98192,1120,1,0,0
3,22Q3,4,96609,1110,0,1,0
4,22Q4,5,105279,1420,0,0,1
5,23Q1,6,96904,1140,0,0,0
6,23Q2,7,105130,1250,1,0,0
7,23Q3,8,104775,1290,0,1,0
8,23Q4,9,113744,1660,0,0,1
9,24Q1,10,103901,1450,0,0,0


In [72]:
from sklearn.linear_model import LinearRegression

# Independent variables: time trend + seasonal dummies
X = df_revenue[['Time index', 'Q2 dummy variable', 'Q3 dummy variable', 'Q4 dummy variable']]
y = df_revenue['Walmart US Sales revenue']

linreg = LinearRegression()
linreg.fit(X, y)

print("Intercept:", linreg.intercept_)
print("Coefficients:", linreg.coef_)
print("R^2:", linreg.score(X, y))

# Find time index for 26Q4
time_index_26Q4 = df_revenue[df_revenue['quarter'] == '26Q3']['Time index'].iloc[0] + 1

# Build input row for 26Q4
X_new = pd.DataFrame({ 'Time index': [time_index_26Q4], 'Q2 dummy variable': [0], 'Q3 dummy variable': [0], 'Q4 dummy variable': [1] })

forecast_26Q4 = linreg.predict(X_new)
print("Forecasted Net Sales (Revenue - Total) for 2026 Q4:", forecast_26Q4[0])


Intercept: 88967.8125
Coefficients: [ 1399.31875  5726.48125  3511.5625  10393.11875]
R^2: 0.9879280101442904
Forecasted Net Sales (Revenue - Total) for 2026 Q4: 128746.625


In [78]:
import statsmodels.api as sm

# Define X and y
X_revenue_ts = df_revenue['Walmart US Ecommerce revenue']
X_revenue_ts = sm.add_constant(X_revenue_ts)   # add intercept
y_revenue_ts = df_revenue['Walmart US Sales revenue']

# Fit OLS model
model_revenue_ts1 = sm.OLS(y_revenue_ts, X_revenue_ts).fit()

# Extract numbers
coef1 = model_revenue_ts1.params
pval1 = model_revenue_ts1.pvalues
rsq1 = model_revenue_ts1.rsquared

print("Coefficients:\n", coef1)
print("p-values:\n", pval1)
print("r-square:\n", rsq1)

import statsmodels.api as sm

# Define X and y
X_revenue_ts = df_revenue[['Time index', 'Q2 dummy variable', 'Q3 dummy variable', 'Q4 dummy variable']]
X_revenue_ts = sm.add_constant(X_revenue_ts)   # add intercept
y_revenue_ts = df_revenue['Walmart US Sales revenue']

# Fit OLS model
model_revenue_ts2 = sm.OLS(y_revenue_ts, X_revenue_ts).fit()

# Extract numbers
coef2 = model_revenue_ts2.params
pval2 = model_revenue_ts2.pvalues
ci2   = model_revenue_ts2.conf_int(alpha=0.05)
rsq2 = model_revenue_ts2.rsquared

print("Coefficients:\n", coef2)
print("P-values:\n", pval2)
print("95% CI:\n", ci2)
print("R-square:\n", rsq2)

import statsmodels.api as sm

# Define X and y
X_revenue_ts = df_revenue[['Time index', 'Q2 dummy variable', 'Q3 dummy variable', 'Q4 dummy variable']]
X_revenue_ts = sm.add_constant(X_revenue_ts)   # add intercept
y_revenue_ts = df_revenue['Walmart US Ecommerce revenue']

# Fit OLS model
model_revenue_ts3 = sm.OLS(y_revenue_ts, X_revenue_ts).fit()

# Extract numbers
coef3 = model_revenue_ts3.params
pval3 = model_revenue_ts3.pvalues
ci3   = model_revenue_ts3.conf_int(alpha=0.05)
rsq3 = model_revenue_ts3.rsquared

print("Coefficients:\n", coef3)
print("p-values:\n", pval3)
print("95% CI:\n", ci3)
print("r-square:\n", rsq3)

Coefficients:
 const                           77767.107840
Walmart US Ecommerce revenue       18.684466
dtype: float64
p-values:
 const                           8.082979e-16
Walmart US Ecommerce revenue    2.809342e-09
dtype: float64
r-square:
 0.8657122278056228
Coefficients:
 const                88967.81250
Time index            1399.31875
Q2 dummy variable     5726.48125
Q3 dummy variable     3511.56250
Q4 dummy variable    10393.11875
dtype: float64
P-values:
 const                1.372467e-24
Time index           2.892375e-15
Q2 dummy variable    6.107396e-07
Q3 dummy variable    1.539480e-04
Q4 dummy variable    2.077546e-10
dtype: float64
95% CI:
                               0             1
const              87570.098915  90365.526085
Time index          1306.752907   1491.884593
Q2 dummy variable   4242.537892   7210.424608
Q3 dummy variable   2018.983123   5004.141877
Q4 dummy variable   8909.175392  11877.062108
R-square:
 0.9879280101442905
Coefficients:
 const        

In [74]:
def regression_equation(model):
    coef = model.params
    eq = f"{model.model.endog_names} = {coef.iloc[0]:.2f}"
    for i, name in enumerate(model.model.exog_names[1:]):  # skip 'const'
        eq += f" + ({coef.iloc[i+1]:.2f} * {name})"
    return eq

print("Regression Equation (relationship between wmt revt and wmt ecom revt):\n", regression_equation(model_revenue_ts1))
print("Regression Equation (time-series forecst of quarterly wmt revt):\n", regression_equation(model_revenue_ts2))
print("Regression Equation (time-series forecst of quarterly wmt ecom revt):\n", regression_equation(model_revenue_ts3))

Regression Equation (relationship between wmt revt and wmt ecom revt):
 Walmart US Sales revenue = 77767.11 + (18.68 * Walmart US Ecommerce revenue)
Regression Equation (time-series forecst of quarterly wmt revt):
 Walmart US Sales revenue = 88967.81 + (1399.32 * Time index) + (5726.48 * Q2 dummy variable) + (3511.56 * Q3 dummy variable) + (10393.12 * Q4 dummy variable)
Regression Equation (time-series forecst of quarterly wmt ecom revt):
 Walmart US Ecommerce revenue = 800.25 + (72.37 * Time index) + (39.62 * Q2 dummy variable) + (17.25 * Q3 dummy variable) + (296.38 * Q4 dummy variable)


In [75]:
import pandas as pd
import statsmodels.api as sm
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import get_column_letter

# Helper function to run regression and extract results
def run_regression(X, y):
    X = sm.add_constant(X)
    model = sm.OLS(y, X).fit()
    results = pd.DataFrame({
        "Predictor": model.params.index,
        "Coefficient": model.params.round(3),
        "P-value": model.pvalues.round(3),
        "95% Confidence Inteveral_low": model.conf_int(alpha=0.05)[0].round(3),
        "95% Confidence Inteveral_high": model.conf_int(alpha=0.05)[1].round(3)
    })
    results["R-square"] = None
    results.loc[0, "R-square"] = f"{model.rsquared * 100:.3f}%"  # R-squared as percentage with 3 decimals
    return results

# --- Run all six models ---
results1 = run_regression(df_revenue['Walmart US Ecommerce revenue'],
                          df_revenue['Walmart US Sales revenue'])

results2 = run_regression(df_revenue[['Time index', 'Q2 dummy variable', 'Q3 dummy variable', 'Q4 dummy variable']],
                          df_revenue['Walmart US Sales revenue'])

results3 = run_regression(df_revenue[['Time index', 'Q2 dummy variable', 'Q3 dummy variable', 'Q4 dummy variable']],
                          df_revenue['Walmart US Ecommerce revenue'])

# Combine into one Excel workbook with separate sheets
with pd.ExcelWriter("WMT_Regression_Results.xlsx", engine="openpyxl") as writer:
    results1.to_excel(writer, sheet_name="Quarterly_Revenue_Ecom", index=False)
    results2.to_excel(writer, sheet_name="Quarterly_Revenue_Time", index=False)
    results3.to_excel(writer, sheet_name="Quarterly_Ecom_Time", index=False)
    
# --- Professional formatting ---
wb = load_workbook("02a_WMT-Regression-Results.xlsx")
for sheet in wb.sheetnames:
    ws = wb[sheet]
    # Header formatting
    for cell in ws[1]:
        cell.font = Font(bold=True, name="Segoe UI", size=12)
    # Column width
    for col in ws.columns:
        max_len = max(len(str(cell.value)) for cell in col)
        ws.column_dimensions[get_column_letter(col[0].column)].width = max_len + 10
    # Highlight significant p-values
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            if cell.column == 3 and isinstance(cell.value, (float,int)):  # p-value column
                if cell.value <= 0.05:
                    cell.fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid")  # light green
wb.save("02a_WMT-Regression-Results.xlsx")
print('complete')

complete


In [77]:
import pandas as pd

# ----------------------------
# 1) Load regression results
# ----------------------------
file = "02a_WMT-Regression-Results.xlsx"

rev_time = pd.read_excel(file, sheet_name="Quarterly_Revenue_Time")
ecom_time = pd.read_excel(file, sheet_name="Quarterly_Ecom_Time")

# ----------------------------
# 2) Extract coefficients
# ----------------------------
def get_coef_dict(df):
    return dict(zip(df["Predictor"], df["Coefficient"]))

coef_rev_q = get_coef_dict(rev_time)
coef_ecom_q = get_coef_dict(ecom_time)

# ----------------------------
# 3) Quarterly Forecast (next 12 quarters)
# ----------------------------
last_index = 20  # replace with actual max time_index from your df_revenue
future_idx = range(last_index+1, last_index+13)

quarters = []
for i, idx in enumerate(future_idx):
    q = idx % 4
    quarters.append({
        "Time index": idx,
        "Q2 dummy variable": 1 if q==3 else 0,
        "Q3 dummy variable": 1 if q==0 else 0,
        "Q4 dummy variable": 1 if q==1 else 0  # Q4 when mod 4 == 0
    })
future_q = pd.DataFrame(quarters)

def forecast(df, coef):
    yhat = []
    for _, row in df.iterrows():
        val = coef.get("const",0) \
              + coef.get("Time index",0)*row["Time index"] \
              + coef.get("Q2 dummy variable",0)*row["Q2 dummy variable"] \
              + coef.get("Q3 dummy variable",0)*row["Q3 dummy variable"] \
              + coef.get("Q4 dummy variable",0)*row["Q4 dummy variable"]
        yhat.append(val)
    return yhat

future_q["Walmart US Sales revenue Forecast"] = forecast(future_q, coef_rev_q)
future_q["Walmart US Ecommerce revenue Forecast"]  = forecast(future_q, coef_ecom_q)

# ----------------------------
# 3b) Add quarter_ labels
# ----------------------------
# Assume time_index 21 corresponds to 26Q4
# That means: time_index 21 â†’ year 26, quarter 4
# General formula: year = 26 + (idx - 21) // 4, quarter = ((idx - 21) % 4) + 1

def make_quarter_label(idx, base_index=21, base_year=26, base_quarter=4):
    offset = idx - base_index
    total_quarters = (base_year * 4 + (base_quarter - 1)) + offset
    year = total_quarters // 4
    quarter = (total_quarters % 4) + 1
    return f"{year}Q{quarter}"

future_q["Quarter"] = future_q["Time index"].apply(make_quarter_label)

# ----------------------------
# 4) Export forecast tables
# ----------------------------
with pd.ExcelWriter("02b_WMT-Forecast-Output.xlsx", engine="openpyxl") as writer:
    future_q.to_excel(writer, sheet_name="Quarterly_Forecast", index=False)

# --- Professional formatting ---
wb = load_workbook("02b_WMT-Forecast-Output.xlsx")
for sheet in wb.sheetnames:
    ws = wb[sheet]
    # Header formatting
    for cell in ws[1]:
        cell.font = Font(bold=True, name="Segoe UI", size=12)
    # Column width
    for col in ws.columns:
        max_len = max(len(str(cell.value)) for cell in col)
        ws.column_dimensions[get_column_letter(col[0].column)].width = max_len + 10
wb.save("02b_WMT-Forecast-Output.xlsx")
print("Forecasts saved to 02b_WMT-Forecast-Output.xlsx")


Forecasts saved to WMT_Forecast_Output.xlsx
