In [1]:
import pandas as pd

# Load the three CSV files from the 'data' folder
df1 = pd.read_csv('data/daily_sales_data_0.csv')
df2 = pd.read_csv('data/daily_sales_data_1.csv')
df3 = pd.read_csv('data/daily_sales_data_2.csv')

In [2]:
# Function to process each dataframe
def process_morsel_data(df):
    # Filter for 'pink morsel'
    df = df[df['product'] == 'pink morsel'].copy()

    # Clean the 'price' column by removing the '$' and converting to a number
    df['price'] = df['price'].str.replace('$', '', regex=False).astype(float)

    # Calculate the new 'sales' column
    df['sales'] = df['quantity'] * df['price']

    # Select and reorder the final columns as requested
    df = df[['sales', 'date', 'region']]

    return df

# Apply the function to all three dataframes
df1_processed = process_morsel_data(df1)
df2_processed = process_morsel_data(df2)
df3_processed = process_morsel_data(df3)

# Display the first few rows of one processed dataframe to check the output
print(df1_processed.head())

     sales        date region
0   1638.0  2018-02-06  north
1   1647.0  2018-02-06  south
2   1731.0  2018-02-06   east
3   1557.0  2018-02-06   west
28  1587.0  2018-02-07  north


In [3]:
# Concatenate the three processed dataframes into one
combined_df = pd.concat([df1_processed, df2_processed, df3_processed], ignore_index=True)

# Save the final combined dataframe to a new CSV file
combined_df.to_csv('pink_morsel_sales.csv', index=False)

print("Combined data saved to pink_morsel_sales.csv")

Combined data saved to pink_morsel_sales.csv
