In [1]:
# %load improved_find_data_start_row\(file_path\).py
from openpyxl import load_workbook
import pandas as pd

def improved_find_data_start_row(file_path):
    """
    Improved version to find the data start row in an Excel file.
    This version scans the first sheet to find the likely start of the data based on a heuristic that considers
    both the number of non-empty cells in a row and the consistency of data types in the row.
    """
    workbook = load_workbook(filename=file_path, read_only=True)
    sheet = workbook.worksheets[0]  # Assuming data is in the first sheet
    
    min_filled_cells = 2  # Minimum number of non-empty cells to consider a row as potential data start
    likely_data_start_row = 0
    consecutive_data_like_rows = 0
    
    for i, row in enumerate(sheet.iter_rows(values_only=True)):
        # Count non-empty cells and types of data in the row
        non_empty_cells = [cell for cell in row if cell is not None]
        filled_cells = len(non_empty_cells)
        data_types = {type(cell) for cell in non_empty_cells}
        
        # Check for a row with enough filled cells and more than one type of data (mixed types suggest data row)
        if filled_cells >= min_filled_cells and len(data_types) > 1:
            consecutive_data_like_rows += 1
        else:
            consecutive_data_like_rows = 0  # Reset if the row doesn't seem like a data row
        
        # If we find 2 consecutive rows that look like data, it's likely the start of the data
        if consecutive_data_like_rows >= 2:
            likely_data_start_row = i - 2  # Adjusting for 0-based index and to include the first data-like row
            break
    
    return likely_data_start_row

def improved_load_excel_with_autodetect(file_path):
    """
    Load an Excel file into a pandas DataFrame, attempting to auto-detect the start of the actual data using
    an improved mechanism.
    """
    start_row = improved_find_data_start_row(file_path)
    df = pd.read_excel(file_path, sheet_name=0, skiprows=start_row)
    df.dropna(axis=1, how='all', inplace=True)
    return df

def load_all_sheets_with_data_start_detection(file_path):
    """
    Load all sheets from an Excel workbook, applying an improved mechanism to detect
    the start of actual data in each sheet.
    
    :param file_path: Path to the Excel workbook.
    :return: A dictionary of DataFrames, one for each sheet, with data start auto-detected.
    """
    # Load all sheets into a dictionary of DataFrames
    all_sheets = pd.read_excel(file_path, sheet_name=None, header=None)
    
    # Apply the improved data start detection mechanism to each sheet
    for sheet_name, df in all_sheets.items():
        # Find the likely data start row using the improved mechanism
        start_row = improved_find_data_start_row(file_path)
        # Reload the sheet with detected start row, if there is meaningful data to skip
        if start_row > 0:
            all_sheets[sheet_name] = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=start_row)
    
    return all_sheets


In [2]:
financial_data=improved_load_excel_with_autodetect("/Users/myself/Desktop/Walmart USA Serching for Growth/walmartHistoricalFinancials.xlsx")
financial_data=financial_data.dropna()
financial_data=financial_data.fillna(0)

In [3]:
# Correctly identify and assign unique years to each financial data column
# Assuming the first column after 'Financial Metric' is the most recent year (2021) and decrement for each column after

number_of_years = financial_data.shape[1] - 1  # Total columns minus the 'Financial Metric' column
base_year = 2021
years = [str(base_year - i) for i in range(number_of_years)]

# Map the new year labels to the columns
financial_data.columns = ['Financial Metric'] + years

In [4]:
import numpy as np

financial_data['Financial Metric']=financial_data['Financial Metric'].str.replace('∟', '', regex=True)
financial_data.replace('\xa0', '', regex=True, inplace=True)
financial_data.replace('n.a.', np.nan, inplace=True)

financial_data=financial_data.dropna(thresh=3)

In [5]:
df = financial_data.copy()

if df.index.name != 'Financial Metric':
    dfcopy=df
    df.set_index('Financial Metric', inplace=True)  # Setting 'Financial Metric' as index

df.index=df.index.str.strip()

# Determine categories based on keyword presence or row structure
assets = [metric for metric in df.index if "asset" in metric.lower()]
liabilities_and_equity = [metric for metric in df.index if "liabilit" in metric.lower() or "equity" in metric.lower() or "fund" in metric.lower()]
p_and_l = [metric for metric in df.index if metric not in assets and metric not in liabilities_and_equity]

# Example to show categorized lists
print("Assets:", assets)
print("Liabilities & Equity:", liabilities_and_equity)
print("P&L Items:", p_and_l)

Assets: ['Non-current assets', 'Intangible assets', 'Tangible fixed assets', 'Other non-current assets', 'Current assets', 'Other current assets', 'Total assets', 'Net current assets']
Liabilities & Equity: ['Shareholders funds', 'Other shareholders funds', 'Non-current liabilities', 'Other non-current liabilities', 'Current liabilities', 'Other current liabilities', "Total shareholders' funds and liabilities"]
P&L Items: ['Stock', 'Debtors', 'Of which cash and cash equivalent', 'Capital', 'Long term debt', 'Loans & short-term debt', 'Creditors', 'Working capital', 'Enterprise value', 'Number of employees', 'Operating revenue (Turnover)', 'Sales', 'Costs of goods sold', 'Gross profit', 'Other operating expense (income)', 'Operating profit (loss) [EBIT]', 'Financial profit (loss)', 'Financial revenue', 'Financial expenses', 'Profit (loss) before tax [PBT]', 'Income tax expenses (benefit)', 'Profit (loss) after tax [PAT]', 'Net extraordinary revenues (expenses)', 'Profit (loss) for the p

In [6]:
# Create a base mapping dictionary
base_mapping = {}

# Map assets to 'Total assets'
for item in assets:
    base_mapping[item] = 'Total assets'

# Map liabilities and equity to 'Total shareholders' funds and liabilities'
for item in liabilities_and_equity:
    base_mapping[item] = 'Total shareholders\' funds and liabilities'

# Map P&L items to 'Sales' (this could be adjusted to 'Operating revenue' if more appropriate)
for item in p_and_l:
    if 'Revenue' in item or 'Sales' in item:
        base_mapping[item] = 'Sales'
    else:
        base_mapping[item] = 'Sales'  # or 'Total operating revenue' if defined in your dataset

# Print or return the base mapping
base_mapping.items()

dict_items([('Non-current assets', 'Total assets'), ('Intangible assets', 'Total assets'), ('Tangible fixed assets', 'Total assets'), ('Other non-current assets', 'Total assets'), ('Current assets', 'Total assets'), ('Other current assets', 'Total assets'), ('Total assets', 'Total assets'), ('Net current assets', 'Total assets'), ('Shareholders funds', "Total shareholders' funds and liabilities"), ('Other shareholders funds', "Total shareholders' funds and liabilities"), ('Non-current liabilities', "Total shareholders' funds and liabilities"), ('Other non-current liabilities', "Total shareholders' funds and liabilities"), ('Current liabilities', "Total shareholders' funds and liabilities"), ('Other current liabilities', "Total shareholders' funds and liabilities"), ("Total shareholders' funds and liabilities", "Total shareholders' funds and liabilities"), ('Stock', 'Sales'), ('Debtors', 'Sales'), ('Of which cash and cash equivalent', 'Sales'), ('Capital', 'Sales'), ('Long term debt', '

In [7]:
df.index.name
df.columns

Index(['2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013',
       '2012', '2011', '2010', '2009', '2008', '2007', '2006', '2005', '2004',
       '2003', '2002', '2001', '2000', '1999', '1998', '1997', '1996', '1995'],
      dtype='object')

## Forecast Base Financial Metrics with Assumed YOY Rates

In [8]:
# Define the growth assumptions based on the details provided earlier
assumptions = {
    'Sales': {'type': 'YOY', 'rates': [0.0814, 0.0392, 0.0261, 0.0244, 0.0237, 0.0238]},
    'Costs of goods sold': {'type': '% of revenue', 'rates': [0.7541, 0.7513, 0.7513, 0.7513, 0.7513, 0.7513]},
    'Gross profit': {'type': 'direct', 'rates': None},  # Calculated directly as difference
    'Operating and SG&A costs': {'type': '% of sales', 'rates': [0.1863, 0.1834, 0.1834, 0.1834, 0.1834, 0.1834]}
}

# Base year for forecast
base_year = '2021'
forecast_years = ['2022F','2023F', '2024F', '2025F', '2026F', '2027F']

# Initialize a DataFrame to hold the forecasted values
yoy_forecast_df = pd.DataFrame(index=df.index, columns=forecast_years)

# Iterate over each forecast year and apply the assumptions to calculate forecast values
for idx, year in enumerate(forecast_years):
    if 'Sales' in assumptions:
        if idx == 0:  # First forecast year, base it on the last historical year
            last_sales = df.loc[df.index.str.contains("Sales"), base_year].values[0]
        growth_rate = assumptions['Sales']['rates'][idx]
        forecast_sales = last_sales * (1 + growth_rate)
        yoy_forecast_df.loc['Sales', year] = forecast_sales
        last_sales = forecast_sales  # Update for next year's calculation

    if 'Costs of goods sold' in assumptions:
        cost_rate = assumptions['Costs of goods sold']['rates'][idx]
        forecast_costs = forecast_sales * cost_rate
        yoy_forecast_df.loc['Costs of goods sold', year] = forecast_costs

    if 'Gross profit' in assumptions:
        forecast_gross_profit = forecast_sales - forecast_costs
        yoy_forecast_df.loc['Gross profit', year] = forecast_gross_profit

    if 'Operating and SG&A costs' in assumptions:
        sgna_rate = assumptions['Operating and SG&A costs']['rates'][idx]
        forecast_sgna = forecast_sales * sgna_rate
        yoy_forecast_df.loc['Operating and SG&A costs', year] = forecast_sgna

yoy_forecast_df = yoy_forecast_df / 1e3
# Calculate CAGR for the forecast period

yoy_cagr_df = (yoy_forecast_df[forecast_years].astype(float).iloc[:, -1] / yoy_forecast_df[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
yoy_forecast_df.loc[:, 'CAGR'] = yoy_cagr_df*100


# Display the forecast results
yoy_forecast_df






Unnamed: 0_level_0,2022F,2023F,2024F,2025F,2026F,2027F,CAGR
Financial Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Non-current assets,,,,,,,
Intangible assets,,,,,,,
Tangible fixed assets,,,,,,,
Other non-current assets,,,,,,,
Current assets,,,,,,,
Stock,,,,,,,
Debtors,,,,,,,
Other current assets,,,,,,,
Of which cash and cash equivalent,,,,,,,
Total assets,,,,,,,


In [9]:
# # # Define growth assumptions based on the details provided earlier
# assumptions = {
#     'Sales': {'type': 'YOY', 'rates': [0.0814, 0.0392, 0.0261, 0.0244, 0.0237, 0.0238]},
#     'Costs of goods sold': {'type': '% of revenue', 'rates': [0.7541, 0.7513, 0.7513, 0.7513, 0.7513, 0.7513]},
#     'Gross profit': {'type': 'direct', 'rates': None},  # Calculated directly as difference
#     'Operating and SG&A costs': {'type': '% of sales', 'rates': [0.1863, 0.1834, 0.1834, 0.1834, 0.1834, 0.1834]}
# }

# # # # Calculate future bases from assumptions
# # base_year = '2021'
# # forecast_years = ['2022F', '2023F', '2024F', '2025F', '2026F', '2027F']
# # last_values = df[base_year]

# # for year in forecast_years:
# #     for metric, config in assumptions.items():
# #         if config['type'] == 'YOY':
# #             growth_rate = config['rates'][int(year[:4]) - 2022]
# #             forecast_value = last_values[metric] * (1 + growth_rate)
# #         elif config['type'] == '% of revenue':
# #             revenue_based_rate = config['rates'][int(year[:4]) - 2022]
# #             forecast_value = forecast_df.loc['Sales', year] * revenue_based_rate
# #         elif config['type'] == 'direct':
# #             forecast_value = forecast_df.loc['Sales', year] - forecast_df.loc['Costs of goods sold', year]
# #         forecast_df.loc[metric, year] = forecast_value
# #         last_values[metric] = forecast_value


## Handle NaNs

In [10]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler


if 'Costs of employees' not in df.columns: df = df.T
# Assuming df is your DataFrame loaded with data

# Identify columns with missing values
cols_with_missing = df.columns[df.isnull().any()].tolist()

# Decide on an imputation strategy for each column
# Example: Use mean for numerical data and a model for 'Costs of employees'
imputation_strategies = {
    col: ('model' if df[col].dtype.kind in 'biufc' else 'most_frequent')
    for col in cols_with_missing if col != 'Costs of employees'
}
imputation_strategies['Costs of employees'] = 'model'  # Deciding to use a model for 'Costs of employees'

# Apply imputation or model prediction
for col, strategy in imputation_strategies.items():
    if strategy != 'model':
        # Simple imputation
        imputer = SimpleImputer(strategy=strategy)
        df[col] = imputer.fit_transform(df[[col]])
    else:
        # Setup for predictive modeling
        # Assuming you've already identified features to use
        features = df.columns.difference(cols_with_missing).tolist()
        train_data = df.dropna(subset=[col] + features)
        target = train_data[col]
        train_features = train_data[features]

        # Scaling features
        scaler = StandardScaler()
        train_features_scaled = scaler.fit_transform(train_features)
        
        # Model fitting
        model = RandomForestRegressor(random_state=0)
        model.fit(train_features_scaled, target)
        
        # Predicting missing values
        test_features = df.loc[df[col].isnull(), features]
        test_features_scaled = scaler.transform(test_features)
        predicted_values = model.predict(test_features_scaled)
        
        # Fill in the missing values
        df.loc[df[col].isnull(), col] = predicted_values

print("Missing values handled for columns:", cols_with_missing)
print(df.info())


Missing values handled for columns: ['Enterprise value', 'Financial revenue', 'Costs of employees', 'Added value']
<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 2021 to 1995
Data columns (total 46 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   Non-current assets                         27 non-null     float64
 1   Intangible assets                          27 non-null     float64
 2   Tangible fixed assets                      27 non-null     float64
 3   Other non-current assets                   27 non-null     float64
 4   Current assets                             27 non-null     float64
 5   Stock                                      27 non-null     float64
 6   Debtors                                    27 non-null     float64
 7   Other current assets                       27 non-null     float64
 8   Of which cash and cash equivalent          27 non-null   

In [11]:
df.columns

Index(['Non-current assets', 'Intangible assets', 'Tangible fixed assets',
       'Other non-current assets', 'Current assets', 'Stock', 'Debtors',
       'Other current assets', 'Of which cash and cash equivalent',
       'Total assets', 'Shareholders funds', 'Capital',
       'Other shareholders funds', 'Non-current liabilities', 'Long term debt',
       'Other non-current liabilities', 'Current liabilities',
       'Loans & short-term debt', 'Creditors', 'Other current liabilities',
       'Total shareholders' funds and liabilities', 'Working capital',
       'Net current assets', 'Enterprise value', 'Number of employees',
       'Operating revenue (Turnover)', 'Sales', 'Costs of goods sold',
       'Gross profit', 'Other operating expense (income)',
       'Operating profit (loss) [EBIT]', 'Financial profit (loss)',
       'Financial revenue', 'Financial expenses',
       'Profit (loss) before tax [PBT]', 'Income tax expenses (benefit)',
       'Profit (loss) after tax [PAT]',
    

## Forecast all values on Historical Data

In [12]:
if '2021' not in df.columns: df = df.T
from sklearn.linear_model import LinearRegression

# Forecast years and base year definition
forecast_years = ['2022F', '2023F', '2024F', '2025F', '2026F', '2027F']
historical_years = df.columns[df.columns.str.isnumeric()]

# Forecast function
def forecast_metric(values, years, forecast_years):
    years_reshaped = years.values.reshape(-1, 1)
    values_reshaped = values.values.reshape(-1, 1)
    model = LinearRegression()
    model.fit(years_reshaped, values_reshaped)
    future_years = pd.Series(forecast_years).str[:-1].astype(int).values.reshape(-1, 1)
    predictions = model.predict(future_years).flatten()
    return pd.Series(predictions, index=forecast_years)

# Apply forecasting
historical_regression_forecast = df.apply(lambda x: forecast_metric(x, historical_years, forecast_years), axis=1) / 1e3
historical_cagr_values = (historical_regression_forecast[forecast_years].astype(float).iloc[:, -1] / historical_regression_forecast[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
historical_regression_forecast.loc[:, 'CAGR'] = historical_cagr_values*100


# Print results
historical_regression_forecast

Unnamed: 0_level_0,2022F,2023F,2024F,2025F,2026F,2027F,CAGR
Financial Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Non-current assets,185282.94302,191509.245828,197735.548637,203961.851445,210188.154253,216414.457061,3.154972
Intangible assets,28450.982906,29500.434066,30549.885226,31599.336386,32648.787546,33698.238706,3.443209
Tangible fixed assets,143498.880342,148108.006716,152717.133089,157326.259463,161935.385836,166544.51221,3.023515
Other non-current assets,13333.079772,13900.805047,14468.530322,15036.255596,15603.980871,16171.706146,3.935778
Current assets,76152.541311,78437.267806,80721.994302,83006.720798,85291.447293,87576.173789,2.834849
Stock,51244.769231,52608.736874,53972.704518,55336.672161,56700.639805,58064.607448,2.530338
Debtors,7464.635328,7736.844729,8009.054131,8281.263533,8553.472934,8825.682336,3.406524
Other current assets,17443.136752,18091.686203,18740.235653,19388.785104,20037.334554,20685.884005,3.468916
Of which cash and cash equivalent,11307.384615,11716.864469,12126.344322,12535.824176,12945.304029,13354.783883,3.384383
Total assets,261435.48433,269946.513635,278457.542939,286968.572243,295479.601547,303990.630851,3.062135


## Forecast Using Ratios of Non Base Financial Metrics to Base Metrics

In [13]:
# from sklearn.linear_model import LinearRegression
# import numpy as np
# #Assuming 'financial_data' is your DataFrame
# if df.index.name != 'Financial Metric':
#     dfcopy=df
#     df.set_index('Financial Metric', inplace=True)  # Setting 'Financial Metric' as index

# # Calculate historical ratios
# historical_ratios = {}
# for metric, base in base_mapping.items():
#     print(metric,base)
#     if metric in df.index and base in df.index:
#         historical_ratios[metric] = df.loc[metric] / df.loc[base]
# # Example of performing linear regression on one of the metrics

# print(historical_ratios)
# model = LinearRegression()
# if 'Operating profit (loss) [EBIT]' in historical_ratios:
#     ratios = historical_ratios['Operating profit (loss) [EBIT]'].dropna()  # Drop NaN to avoid fitting errors
#     years = np.array([int(year) for year in ratios.index]).reshape(-1, 1)
#     model.fit(years, ratios.values.reshape(-1, 1))
#     # Predict for a future year, e.g., 2022
#     future_year = np.array([[2022]])
#     forecast_ratio = model.predict(future_year)
#     print(f"Forecasted Ratio for 2022: {forecast_ratio[0][0]}")

# # Use similar logic for other metrics


In [14]:

# Now apply these dynamic bases to your regression forecasts
future_bases = {
    'Sales': historical_regression_forecast.loc['Sales'],
    'Total assets': historical_regression_forecast.loc['Total assets']  # Assuming 'Total assets' forecast is calculated similarly
}
print(future_bases)
#Calculate historical ratios for regression
historical_ratios = {}
for metric, base in base_mapping.items():
    historical_ratios[metric] = df.loc[metric] / df.loc[base]

historical_ratios=pd.DataFrame.from_dict(historical_ratios)
historical_ratios=historical_ratios.dropna(axis='columns')

#for metric, ratios in historical_ratios.items():
#    print(ratios.index.tolist(), ratios.values.tolist())

# Perform regression and forecast future ratios
forecasts = {}
projected_ratios = {}

yeardf = pd.DataFrame(df.columns)


for metric, ratios in historical_ratios.items():
    model = LinearRegression()
    X = yeardf.values.reshape(-1, 1)  # Ensure X is correctly shaped
    y = ratios

    # Fit the linear model
    model.fit(X, y)
    historical_signs = np.sign(ratios).tolist()
    last_valid_value = ratios[0]# Start with the last historical value

    # Create X and y for model fitting
    for year in forecast_years:
        projected_ratio = model.predict(np.array([[int(year[:-1])]]))[0]
         # Forecast each future year
        projected_sign = np.sign(projected_ratio)
        # Check if the projected sign is not in historical signs
        if projected_sign not in historical_signs:
            #print(metric, projected_sign, historical_signs, int(year[:-1]))
            # If mismatch, use the last valid value
            projected_ratio = last_valid_value
            #print(f"Sign mismatch for {metric} in {year}; using last valid value {last_valid_value}")
        else:
            # If match, update last valid value
            last_valid_value = projected_ratio

        # Store the forecasts
        forecasts[metric, year] = projected_ratio * future_bases[base][year]
        projected_ratios[metric, year] = projected_ratio

historical_ratios['Capital']

{'Sales': 2022F    606581.823362
2023F    625840.744607
2024F    645099.665853
2025F    664358.587098
2026F    683617.508344
2027F    702876.429589
CAGR          2.990678
Name: Sales, dtype: float64, 'Total assets': 2022F    261435.484330
2023F    269946.513635
2024F    278457.542939
2025F    286968.572243
2026F    295479.601547
2027F    303990.630851
CAGR          3.062135
Name: Total assets, dtype: float64}


2021    0.000504
2020    0.000542
2019    0.000560
2018    0.000590
2017    0.000628
2016    0.000657
2015    0.000665
2014    0.000678
2013    0.000708
2012    0.000766
2011    0.000834
2010    0.000926
2009    0.000972
2008    0.001053
2007    0.001186
2006    0.001336
2005    0.001488
2004    0.001666
2003    0.001900
2002    0.002162
2001    0.002315
2000    0.002674
1999    0.003197
1998    0.001878
1997    0.002147
1996    0.002416
1995    0.002757
Name: Capital, dtype: float64

In [15]:
# Process the dictionary to fit DataFrame constructor
# Create an empty list to hold the data
processed_data = []

# Iterate through the dictionary to populate the list
for (metric, year), value in projected_ratios.items():
    processed_data.append({
        'Financial Metric': metric,
        'Year': year,
        'Value': value  # Convert numpy array to a scalar
    })

forecasts_data = []
for (metric, year), value in forecasts.items():
    forecasts_data.append({
        'Financial Metric': metric,
        'Year': year,
        'Value': value  # Convert numpy array to a scalar
    })





# Create a DataFrame from the list


ratio_regression_forecast = pd.DataFrame(forecasts_data)
ratio_regression_percentages = pd.DataFrame(processed_data)


# Pivot the DataFrame to get years as columns


result_ratio_regression_forecast = ratio_regression_forecast.pivot(index='Financial Metric', columns='Year', values='Value')
cagr_ratio_regression_forecast = (result_ratio_regression_forecast[forecast_years].astype(float).iloc[:, -1] / result_ratio_regression_forecast[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
result_ratio_regression_forecast.loc[:, 'CAGR'] = cagr_ratio_regression_forecast * 100

result_ratio_regression_percentages = ratio_regression_percentages.pivot(index='Financial Metric', columns='Year', values='Value')*100



# Display the resulting DataFrame

result_ratio_regression_forecast.loc['Sales'], result_ratio_regression_percentages.loc['Costs of goods sold']

(Year
 2022F    606581.823362
 2023F    625840.744607
 2024F    645099.665853
 2025F    664358.587098
 2026F    683617.508344
 2027F    702876.429589
 CAGR          2.990678
 Name: Sales, dtype: float64,
 Year
 2022F    71.680104
 2023F    71.478386
 2024F    71.276669
 2025F    71.074951
 2026F    70.873233
 2027F    70.671516
 Name: Costs of goods sold, dtype: float64)

In [16]:
# output_file_path = '/Users/myself/Desktop/Walmart USA Serching for Growth/BaselineForecasts.xlsx'




# yoy_cagr_df = (yoy_forecast_df[forecast_years].astype(float).iloc[:, -1] / yoy_forecast_df[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
# yoy_forecast_df=yoy_forecast_df.astype(float).round(0)
# yoy_forecast_df.loc[:, 'CAGR'] = yoy_cagr_df.round(4)*100

# historical_cagr_values = (historical_regression_forecast[forecast_years].astype(float).iloc[:, -1] / historical_regression_forecast[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
# historical_regression_forecast=historical_regression_forecast.astype(float).round(0)
# historical_regression_forecast.loc[:, 'CAGR'] = historical_cagr_values.round(4)*100

# cagr_ratio_regression_forecast = (result_ratio_regression_forecast[forecast_years].astype(float).iloc[:, -1] / result_ratio_regression_forecast[forecast_years].astype(float).iloc[:, 0]) ** (1 / (len(forecast_years) - 1)) - 1
# result_ratio_regression_forecast=result_ratio_regression_forecast.astype(float).round(0)
# result_ratio_regression_forecast.loc[:, 'CAGR'] = cagr_ratio_regression_forecast.round(4) * 100

# result_ratio_regression_percentages=result_ratio_regression_percentages.astype(float).round(4)

# with pd.ExcelWriter(output_file_path) as writer:
#     yoy_forecast_df.to_excel(writer, sheet_name='CAGR Raw')
#     historical_regression_forecast.to_excel(writer, sheet_name='Historical Regression')
#     result_ratio_regression_forecast.to_excel(writer, sheet_name='Historical Base Val Reg')
#     result_ratio_regression_percentages.to_excel(writer, sheet_name='Historical Base Rat Reg')

# print("File printed to: " + output_file_path)

# yoy_forecast_df

File printed to: /Users/myself/Desktop/Walmart USA Serching for Growth/BaselineForecasts.xlsx


Unnamed: 0_level_0,2022F,2023F,2024F,2025F,2026F,2027F,CAGR
Financial Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Non-current assets,,,,,,,
Intangible assets,,,,,,,
Tangible fixed assets,,,,,,,
Other non-current assets,,,,,,,
Current assets,,,,,,,
Stock,,,,,,,
Debtors,,,,,,,
Other current assets,,,,,,,
Of which cash and cash equivalent,,,,,,,
Total assets,,,,,,,
