In [2]:
import pandas as pd

In [3]:
df_Trends = pd.read_csv("..\data\external\EcommerceTrends\ecommerce_product_dataset2.csv")

In [4]:
df_Trends.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   ProductID                          1000 non-null   int64  
 1   ProductName                        1000 non-null   object 
 2   Category                           1000 non-null   object 
 3   Price                              1000 non-null   float64
 4   Rating                             1000 non-null   float64
 5   NumReviews                         1000 non-null   int64  
 6   StockQuantity                      1000 non-null   int64  
 7   Discount                           1000 non-null   float64
 8   Sales                              1000 non-null   int64  
 9   DateAdded                          1000 non-null   object 
 10  City                               1000 non-null   object 
 11  Stock In Vast or Limited Quantity  1000 non-null   bool  

In [5]:
df_Trends.head(1)

Unnamed: 0,ProductID,ProductName,Category,Price,Rating,NumReviews,StockQuantity,Discount,Sales,DateAdded,City,Stock In Vast or Limited Quantity
0,1,Headphones,Electronics,400.31,1.7,3772,20,0.08,466,11/10/2023,Albuquerque,True


In [6]:
## Imoporting next dtaset

In [7]:
df_customerSegmentation = pd.read_csv("..\data\external\Customer Segmentation\customer_segmentation_data.csv")

In [8]:
df_customerSegmentation.head(2)

Unnamed: 0,id,age,gender,income,spending_score,membership_years,purchase_frequency,preferred_category,last_purchase_amount
0,1,38,Female,99342,90,3,24,Groceries,113.53
1,2,21,Female,78852,60,2,42,Sports,41.93


In [9]:
df_customerSegmentation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    1000 non-null   int64  
 1   age                   1000 non-null   int64  
 2   gender                1000 non-null   object 
 3   income                1000 non-null   int64  
 4   spending_score        1000 non-null   int64  
 5   membership_years      1000 non-null   int64  
 6   purchase_frequency    1000 non-null   int64  
 7   preferred_category    1000 non-null   object 
 8   last_purchase_amount  1000 non-null   float64
dtypes: float64(1), int64(6), object(2)
memory usage: 70.4+ KB


In [10]:
## Imoporting next dtaset

In [11]:
df_WallmartSales = pd.read_csv("..\data\external\Walmart Sales\Walmart_Sales.csv")

In [12]:
df_WallmartSales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6435 entries, 0 to 6434
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Store         6435 non-null   int64  
 1   Date          6435 non-null   object 
 2   Weekly_Sales  6435 non-null   float64
 3   Holiday_Flag  6435 non-null   int64  
 4   Temperature   6435 non-null   float64
 5   Fuel_Price    6435 non-null   float64
 6   CPI           6435 non-null   float64
 7   Unemployment  6435 non-null   float64
dtypes: float64(5), int64(2), object(1)
memory usage: 402.3+ KB


In [13]:
df_WallmartSales.head(1)

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,1,05-02-2010,1643690.9,0,42.31,2.572,211.096358,8.106


In [14]:
## Data augmentation

In [15]:
import numpy as np
from datetime import datetime, timedelta

In [16]:
def augment_trends(df):
    df = df.drop(['City'], axis=1)
    
    df['DateAdded'] = pd.to_datetime(df['DateAdded'])    
    df['DaysSinceAdded'] = (datetime.now() - df['DateAdded']).dt.days
    
    df['TrendScore'] = (df['Sales'] * 0.4 + 
                        df['NumReviews'] * 0.3 + 
                        df['Rating'] * 20 + 
                        df['DaysSinceAdded'] * -0.1)
    df['TrendScore'] = df['TrendScore'].clip(0, 100)
    

    df['SalesVelocity'] = df['Sales'] / df['DaysSinceAdded'].clip(1)
    df['RestockUrgency'] = ((1 - df['StockQuantity'] / df['Sales'].clip(1)) * 
                            df['SalesVelocity'] * 10)
    df['RestockUrgency'] = df['RestockUrgency'].clip(0, 10)
    
    df['EstimatedCost'] = df['Price'] * (1 - df['Discount']) * 0.7  # Assuming 30% markup
    df['ProfitMargin'] = (df['Price'] - df['EstimatedCost']) / df['Price']
    
    return df

In [17]:
def augment_customer_segmentation(df):
    df['CustomerValue'] = (df['spending_score'] * 0.4 + 
                           df['purchase_frequency'] * 10 + 
                           df['membership_years'] * 5 + 
                           df['income'] * 0.0001)
    
    df['AgeGroup'] = pd.cut(df['age'], bins=[0, 18, 30, 45, 60, 100], 
                            labels=['<18', '18-30', '31-45', '46-60', '60+'])
    
    df['IncomeGroup'] = pd.qcut(df['income'], q=5, 
                                labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
    
    df['LoyaltyScore'] = df['membership_years'] * df['purchase_frequency']
    
    df['LastPurchaseRecency'] = np.random.randint(1, 365, size=len(df))
    
    return df

In [18]:
def augment_walmart_sales(df):
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
    
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    
    df['SeasonalityIndex'] = df.groupby(['Store', 'Month'])['Weekly_Sales'].transform(
        lambda x: x / x.mean())
    
    df = df.sort_values(['Store', 'Date'])
    df['RollingAvgSales'] = df.groupby('Store')['Weekly_Sales'].rolling(window=4).mean().reset_index(0, drop=True)
    
    df['SalesGrowthRate'] = df.groupby('Store')['Weekly_Sales'].pct_change()
    
    df['EconomicIndex'] = (df['CPI'] / df['CPI'].mean()) * (1 - df['Unemployment'] / 100)
    
    # Create Temperature Category
    df['TempCategory'] = pd.cut(df['Temperature'], bins=[-float('inf'), 32, 50, 70, 90, float('inf')],
                                labels=['Freezing', 'Cold', 'Moderate', 'Warm', 'Hot'])
    
    return df

In [19]:
# -------------------- #

In [20]:
df_Trends_augmented = augment_trends(df_Trends)
df_customerSegmentation_augmented = augment_customer_segmentation(df_customerSegmentation)
df_WallmartSales_augmented = augment_walmart_sales(df_WallmartSales)

In [21]:
print("Head of df_Trends_augmented:")
df_Trends_augmented.info()
df_Trends_augmented.head(1)



Head of df_Trends_augmented:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   ProductID                          1000 non-null   int64         
 1   ProductName                        1000 non-null   object        
 2   Category                           1000 non-null   object        
 3   Price                              1000 non-null   float64       
 4   Rating                             1000 non-null   float64       
 5   NumReviews                         1000 non-null   int64         
 6   StockQuantity                      1000 non-null   int64         
 7   Discount                           1000 non-null   float64       
 8   Sales                              1000 non-null   int64         
 9   DateAdded                          1000 non-null   datetime64[ns]
 10  Stock In

Unnamed: 0,ProductID,ProductName,Category,Price,Rating,NumReviews,StockQuantity,Discount,Sales,DateAdded,Stock In Vast or Limited Quantity,DaysSinceAdded,TrendScore,SalesVelocity,RestockUrgency,EstimatedCost,ProfitMargin
0,1,Headphones,Electronics,400.31,1.7,3772,20,0.08,466,2023-11-10,True,276,100.0,1.688406,10.0,257.79964,0.356


In [22]:
print("Head of df_customerSegmentation_augmented:")
df_customerSegmentation_augmented.info()
df_customerSegmentation_augmented.head(1)

Head of df_customerSegmentation_augmented:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype   
---  ------                --------------  -----   
 0   id                    1000 non-null   int64   
 1   age                   1000 non-null   int64   
 2   gender                1000 non-null   object  
 3   income                1000 non-null   int64   
 4   spending_score        1000 non-null   int64   
 5   membership_years      1000 non-null   int64   
 6   purchase_frequency    1000 non-null   int64   
 7   preferred_category    1000 non-null   object  
 8   last_purchase_amount  1000 non-null   float64 
 9   CustomerValue         1000 non-null   float64 
 10  AgeGroup              1000 non-null   category
 11  IncomeGroup           1000 non-null   category
 12  LoyaltyScore          1000 non-null   int64   
 13  LastPurchaseRecency   1000 non-null   int32   
dtypes: category(2)

Unnamed: 0,id,age,gender,income,spending_score,membership_years,purchase_frequency,preferred_category,last_purchase_amount,CustomerValue,AgeGroup,IncomeGroup,LoyaltyScore,LastPurchaseRecency
0,1,38,Female,99342,90,3,24,Groceries,113.53,300.9342,31-45,Medium,72,352


In [23]:

print("Head of df_WallmartSales_augmented:")
df_WallmartSales_augmented.info()
df_WallmartSales_augmented.head(1)

Head of df_WallmartSales_augmented:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6435 entries, 0 to 6434
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Store             6435 non-null   int64         
 1   Date              6435 non-null   datetime64[ns]
 2   Weekly_Sales      6435 non-null   float64       
 3   Holiday_Flag      6435 non-null   int64         
 4   Temperature       6435 non-null   float64       
 5   Fuel_Price        6435 non-null   float64       
 6   CPI               6435 non-null   float64       
 7   Unemployment      6435 non-null   float64       
 8   Year              6435 non-null   int32         
 9   Month             6435 non-null   int32         
 10  SeasonalityIndex  6435 non-null   float64       
 11  RollingAvgSales   6300 non-null   float64       
 12  SalesGrowthRate   6390 non-null   float64       
 13  EconomicIndex     6435 non-null   float64 

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Year,Month,SeasonalityIndex,RollingAvgSales,SalesGrowthRate,EconomicIndex,TempCategory
0,1,2010-02-05,1643690.9,0,42.31,2.572,211.096358,8.106,2010,2,1.011227,,,1.13059,Cold


In [24]:
# ------ #

In [25]:
import random
from scipy import stats

# 1. Prompt Generation

def generate_trend_prompt(df):
    product = df.sample(1)
    return f"What's the current trend for {product['ProductName'].values[0]} in the {product['Category'].values[0]} category? Its TrendScore is {product['TrendScore'].values[0]:.2f}."

def generate_inventory_prompt(df):
    product = df.sample(1)
    return f"Should we restock {product['ProductName'].values[0]}? Its current stock is {product['StockQuantity'].values[0]} units, and its RestockUrgency is {product['RestockUrgency'].values[0]:.2f}."

def generate_sales_prompt(df):
    store = df.sample(1)
    return f"Predict next week's sales for Store {store['Store'].values[0]}. Current EconomicIndex is {store['EconomicIndex'].values[0]:.2f}, and last week's sales were ${store['Weekly_Sales'].values[0]:,.2f}."

def generate_customer_prompt(df):
    customer = df.sample(1)
    return f"Describe the buying patterns of the {customer['AgeGroup'].values[0]} age group in the {customer['IncomeGroup'].values[0]} income bracket. Their average CustomerValue is {customer['CustomerValue'].values[0]:.2f}."

def generate_prompts(df_trends, df_customers, df_sales, n=100):
    prompts = []
    for _ in range(n):
        prompt_type = random.choice(['trend', 'inventory', 'sales', 'customer'])
        if prompt_type == 'trend':
            prompts.append(generate_trend_prompt(df_trends))
        elif prompt_type == 'inventory':
            prompts.append(generate_inventory_prompt(df_trends))
        elif prompt_type == 'sales':
            prompts.append(generate_sales_prompt(df_sales))
        else:
            prompts.append(generate_customer_prompt(df_customers))
    return prompts

# Generate prompts
prompts = generate_prompts(df_Trends_augmented, df_customerSegmentation_augmented, df_WallmartSales_augmented, n=100)

print("Sample Prompts:")
for prompt in prompts[:5]:
    print(prompt)

# 2. Correlation Analysis
def perform_correlation_analysis(df_trends, df_customers, df_sales):
    trends_num = df_trends.select_dtypes(include=[np.number])
    customers_num = df_customers.select_dtypes(include=[np.number])
    sales_num = df_sales.select_dtypes(include=[np.number])

    all_num = pd.concat([trends_num, customers_num, sales_num], axis=1)

    correlation_matrix = all_num.corr()

    strong_correlations = []
    for i in range(len(correlation_matrix.columns)):
        for j in range(i):
            if abs(correlation_matrix.iloc[i, j]) > 0.5:  
                strong_correlations.append((correlation_matrix.columns[i], 
                                            correlation_matrix.columns[j], 
                                            correlation_matrix.iloc[i, j]))

    return strong_correlations

# Generate prompts
prompts = generate_prompts(df_Trends_augmented, df_customerSegmentation_augmented, df_WallmartSales_augmented, n=100)

print("Sample Prompts:")
for prompt in prompts[:]:
    print(prompt)

correlations = perform_correlation_analysis(df_Trends_augmented, df_customerSegmentation_augmented, df_WallmartSales_augmented)

print("\nStrong Correlations:")
for col1, col2, corr in correlations:
    print(f"{col1} and {col2}: Correlation = {corr:.2f}")

# 3. Generate Prompts Based on Correlations

def generate_correlation_prompt(correlation):
    col1, col2, corr = correlation
    if corr > 0:
        relationship = "positive"
    else:
        relationship = "negative"
    
    return f"There's a strong {relationship} correlation ({corr:.2f}) between {col1} and {col2}. How can we leverage this insight for business strategy?"

correlation_prompts = [generate_correlation_prompt(corr) for corr in correlations[:5]]  # Using top 5 correlations

print("\nCorrelation-Based Prompts:")
for prompt in correlation_prompts:
    print(prompt)

Sample Prompts:
Should we restock Wool Socks? Its current stock is 753 units, and its RestockUrgency is 0.00.
Should we restock Fantasy Book? Its current stock is 645 units, and its RestockUrgency is 10.00.
Should we restock Cleanser? Its current stock is 94 units, and its RestockUrgency is 10.00.
Describe the buying patterns of the 18-30 age group in the Medium income bracket. Their average CustomerValue is 116.19.
Should we restock Jacket? Its current stock is 491 units, and its RestockUrgency is 10.00.
Sample Prompts:
Predict next week's sales for Store 1. Current EconomicIndex is 1.14, and last week's sales were $2,387,950.20.
Predict next week's sales for Store 40. Current EconomicIndex is 0.77, and last week's sales were $954,396.85.
Predict next week's sales for Store 35. Current EconomicIndex is 0.73, and last week's sales were $800,662.82.
Predict next week's sales for Store 41. Current EconomicIndex is 1.05, and last week's sales were $1,356,689.88.
What's the current trend f

In [26]:
# -------- #

In [27]:
import random

def generate_trend_answer(df, product_name, category, trend_score):
    sales = df[df['ProductName'] == product_name]['Sales'].values[0]
    rating = df[df['ProductName'] == product_name]['Rating'].values[0]
    
    if trend_score > 75:
        trend = "very popular"
    elif trend_score > 50:
        trend = "moderately popular"
    else:
        trend = "not very popular"
    
    return f"The {product_name} in the {category} category is currently {trend}. " \
           f"It has a TrendScore of {trend_score:.2f}, with sales of {sales} units " \
           f"and a customer rating of {rating:.1f} out of 5. " \
           f"{'Consider increasing stock and promotion for this item.' if trend_score > 60 else 'Monitor its performance closely.'}"

def generate_inventory_answer(product_name, stock, restock_urgency):
    if restock_urgency > 7:
        action = "Immediate restocking is strongly recommended"
    elif restock_urgency > 5:
        action = "Consider restocking in the near future"
    else:
        action = "Current stock levels seem adequate"
    
    return f"For {product_name}, the current stock is {stock} units. " \
           f"With a RestockUrgency of {restock_urgency:.2f}, {action}. " \
           f"{'Place a new order soon to avoid stockouts.' if restock_urgency > 6 else 'Regular monitoring should suffice.'}"

def generate_sales_answer(store, economic_index, last_week_sales):
    prediction_factor = random.uniform(0.9, 1.1)  # Random factor for variability
    predicted_sales = last_week_sales * prediction_factor
    
    if economic_index > 1:
        outlook = "positive"
    elif economic_index < 1:
        outlook = "challenging"
    else:
        outlook = "stable"
    
    return f"For Store {store}, based on the EconomicIndex of {economic_index:.2f} " \
           f"and last week's sales of ${last_week_sales:,.2f}, " \
           f"we predict next week's sales to be approximately ${predicted_sales:,.2f}. " \
           f"The economic outlook appears {outlook}. " \
           f"{'Consider increasing promotional activities.' if outlook == 'challenging' else 'Maintain current strategies.'}"

def generate_customer_answer(age_group, income_group, customer_value):
    if customer_value > 75:
        value_description = "high-value"
    elif customer_value > 50:
        value_description = "medium-value"
    else:
        value_description = "lower-value"
    
    preferences = random.choice(["electronics", "clothing", "home goods", "groceries"])
    
    return f"The {age_group} age group in the {income_group} income bracket " \
           f"represents a {value_description} customer segment with an average CustomerValue of {customer_value:.2f}. " \
           f"This group tends to prefer {preferences}. " \
           f"{'Focus on retention strategies and premium offerings.' if value_description == 'high-value' else 'Consider targeted promotions to increase engagement.'}"

def generate_correlation_answer(col1, col2, corr):
    if corr > 0:
        relationship = "positive"
        strategy = "consider strategies that boost both simultaneously"
    else:
        relationship = "negative"
        strategy = "be cautious about actions that might negatively impact the other"
    
    return f"The strong {relationship} correlation ({corr:.2f}) between {col1} and {col2} " \
           f"suggests that these factors are closely related. To leverage this insight, {strategy}. " \
           f"For example, {'if we focus on improving {col1}, we might see a corresponding increase in {col2}.' if corr > 0 else 'we should monitor {col2} closely when making changes that affect {col1}.'} " \
           f"This relationship could be used to {'optimize resource allocation and marketing strategies.' if corr > 0 else 'balance trade-offs in decision-making processes.'}"

def generate_answer(prompt, df_trends, df_customers, df_sales):
    def clean_float(s):
        return float(s.rstrip('.').replace(',', ''))

    try:
        if "trend" in prompt.lower() and "TrendScore" in prompt:
            product_name = prompt.split("for ")[1].split(" in")[0]
            category = prompt.split("the ")[1].split(" category")[0]
            trend_score = clean_float(prompt.split("TrendScore is ")[1].strip())
            return generate_trend_answer(df_trends, product_name, category, trend_score)
        
        elif "restock" in prompt.lower() and "RestockUrgency" in prompt:
            product_name = prompt.split("restock ")[1].split("?")[0]
            stock = int(prompt.split("current stock is ")[1].split(" units")[0])
            restock_urgency = clean_float(prompt.split("RestockUrgency is ")[1])
            return generate_inventory_answer(product_name, stock, restock_urgency)
        
        elif "predict next week's sales" in prompt.lower() and "EconomicIndex" in prompt:
            store = int(prompt.split("Store ")[1].split(".")[0])
            economic_index = clean_float(prompt.split("EconomicIndex is ")[1].split(",")[0])
            last_week_sales = clean_float(prompt.split("last week's sales were $")[1].strip())
            return generate_sales_answer(store, economic_index, last_week_sales)
        
        elif "buying patterns" in prompt.lower() and "CustomerValue" in prompt:
            age_group = prompt.split("the ")[1].split(" age")[0]
            income_group = prompt.split("the ")[2].split(" income")[0]
            customer_value = clean_float(prompt.split("CustomerValue is ")[1])
            return generate_customer_answer(age_group, income_group, customer_value)
        
        elif "correlation" in prompt.lower():
            parts = prompt.split("correlation (")[1].split(") between ")
            corr = clean_float(parts[0])
            col1, col2 = parts[1].split(" and ")
            col2 = col2.split(".")[0]  
            return generate_correlation_answer(col1, col2, corr)
        
        else:
            return "I'm sorry, I don't have enough information to answer this prompt accurately."
    
    except Exception as e:
        return f"An error occurred while processing this prompt: {str(e)}"

all_prompts = prompts + correlation_prompts
for prompt in all_prompts:
    print("\nPrompt:", prompt)
    answer = generate_answer(prompt, df_Trends_augmented, df_customerSegmentation_augmented, df_WallmartSales_augmented)
    print("Answer:", answer)


Prompt: Predict next week's sales for Store 1. Current EconomicIndex is 1.14, and last week's sales were $2,387,950.20.
Answer: For Store 1, based on the EconomicIndex of 1.14 and last week's sales of $2,387,950.20, we predict next week's sales to be approximately $2,288,785.04. The economic outlook appears positive. Maintain current strategies.

Prompt: Predict next week's sales for Store 40. Current EconomicIndex is 0.77, and last week's sales were $954,396.85.
Answer: For Store 40, based on the EconomicIndex of 0.77 and last week's sales of $954,396.85, we predict next week's sales to be approximately $961,535.72. The economic outlook appears challenging. Consider increasing promotional activities.

Prompt: Predict next week's sales for Store 35. Current EconomicIndex is 0.73, and last week's sales were $800,662.82.
Answer: For Store 35, based on the EconomicIndex of 0.73 and last week's sales of $800,662.82, we predict next week's sales to be approximately $856,790.53. The economi

In [28]:
# ------- #

In [29]:
import json


all_prompts = prompts + correlation_prompts
training_data = []

for prompt in all_prompts:
    answer = generate_answer(prompt, df_Trends_augmented, df_customerSegmentation_augmented, df_WallmartSales_augmented)
    training_data.append({
        'text_input': prompt,
        'output': answer
    })

print("Sample of training data:")
for item in training_data[:5]:
    print(json.dumps(item, indent=4))

with open('..\data\processed\walmart_training_data1.json', 'w') as f:
    json.dump(training_data, f, indent=4)

print(f"\nTraining data has been exported to walmart_training_data.json")

Sample of training data:
{
    "text_input": "Predict next week's sales for Store 1. Current EconomicIndex is 1.14, and last week's sales were $2,387,950.20.",
    "output": "For Store 1, based on the EconomicIndex of 1.14 and last week's sales of $2,387,950.20, we predict next week's sales to be approximately $2,343,960.33. The economic outlook appears positive. Maintain current strategies."
}
{
    "text_input": "Predict next week's sales for Store 40. Current EconomicIndex is 0.77, and last week's sales were $954,396.85.",
    "output": "For Store 40, based on the EconomicIndex of 0.77 and last week's sales of $954,396.85, we predict next week's sales to be approximately $984,201.18. The economic outlook appears challenging. Consider increasing promotional activities."
}
{
    "text_input": "Predict next week's sales for Store 35. Current EconomicIndex is 0.73, and last week's sales were $800,662.82.",
    "output": "For Store 35, based on the EconomicIndex of 0.73 and last week's s

In [30]:
print(f'Total unmber if Input Output Pairs: {len(training_data)}')

Total unmber if Input Output Pairs: 105


In [33]:
import csv
import os

def export_to_csv(data, filename):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    csv_fields = ['text_input', 'output']

    # Write to CSV file
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_fields)
        writer.writeheader()
        writer.writerows(data)

    print(f"Training data has been exported to {filename}")

csv_filename = os.path.join('..', 'data', 'processed', 'walmart_training_data1.csv')
export_to_csv(training_data, csv_filename)

Training data has been exported to ..\data\processed\walmart_training_data1.csv
