# 🧼 Data Cleaning for Financials.csv
This notebook cleans currency symbols, missing values, and prepares the dataset for SQL import.

In [5]:
import pandas as pd
# Load the raw dataset
df = pd.read_csv('../data/Financials.csv')
df.columns = df.columns.str.strip()
# Preview data
df.head()

Unnamed: 0,Segment,Country,Product,Discount Band,Units Sold,Manufacturing Price,Sale Price,Gross Sales,Discounts,Sales,COGS,Profit,Date,Month Number,Month Name,Year
0,Government,Canada,Carretera,,"$1,618.50",$3.00,$20.00,"$32,370.00",$-,"$32,370.00","$16,185.00","$16,185.00",01/01/2014,1,January,2014
1,Government,Germany,Carretera,,"$1,321.00",$3.00,$20.00,"$26,420.00",$-,"$26,420.00","$13,210.00","$13,210.00",01/01/2014,1,January,2014
2,Midmarket,France,Carretera,,"$2,178.00",$3.00,$15.00,"$32,670.00",$-,"$32,670.00","$21,780.00","$10,890.00",01/06/2014,6,June,2014
3,Midmarket,Germany,Carretera,,$888.00,$3.00,$15.00,"$13,320.00",$-,"$13,320.00","$8,880.00","$4,440.00",01/06/2014,6,June,2014
4,Midmarket,Mexico,Carretera,,"$2,470.00",$3.00,$15.00,"$37,050.00",$-,"$37,050.00","$24,700.00","$12,350.00",01/06/2014,6,June,2014


In [6]:
# Clean currency columns by removing $ and converting to float
currency_cols = [
    'Units Sold', 'Manufacturing Price', 'Sale Price',
    'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit'
]
for col in currency_cols:
    df[col] = df[col].astype(str)  # ensure string before replace
    df[col] = df[col].replace({
        r'\$': '',
        r',': '',
        r'^\s*-\s*$': '0',   # replace ' - ' with 0
        r'^\s*$': '0'        # empty or whitespace-only with 0
    }, regex=True)
    df[col] = pd.to_numeric(df[col], errors='coerce')  # safely convert to float


In [7]:
# Replace 'None' strings in Discount Band with nulls
df['Discount Band'] = df['Discount Band'].replace('None', None)

In [8]:
# Convert Date column to datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

In [9]:
# Save cleaned dataset to a new CSV file
df.to_csv('../data/Financials_CLEAN.csv', index=False)
print("✅ Cleaned file saved to: ../data/Financials_CLEAN.csv")

✅ Cleaned file saved to: ../data/Financials_CLEAN.csv
