# Data Cleaning and Analysis

This notebook is used for cleaning the transaction data and performing exploratory analysis. We will load the dataset, clean it, and conduct initial analysis to understand spending patterns.

In [None]:
import pandas as pd

# Load the sample transaction data
data_path = '../data/sample_transactions.csv'
transactions = pd.read_csv(data_path)
transactions.head()

In [None]:
# Data Cleaning

# Check for missing values
missing_values = transactions.isnull().sum()
print(missing_values)

# Fill or drop missing values as necessary
transactions.fillna({'description': 'Unknown', 'amount': 0}, inplace=True)

# Convert date column to datetime format
transactions['date'] = pd.to_datetime(transactions['date'])

# Check data types and convert if necessary
transactions.dtypes

In [None]:
# Exploratory Analysis

# Summary statistics
summary_stats = transactions.describe()
print(summary_stats)

# Visualize spending by category
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))
sns.countplot(data=transactions, x='category')
plt.title('Transaction Count by Category')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Save cleaned data to Excel
output_path = '../data/processed_transactions.xlsx'
transactions.to_excel(output_path, index=False)
print('Cleaned data saved to Excel.')