# Flaschen Depot - Exploratory Data Analysis

This notebook demonstrates the exploratory data analysis for the bottle depot project.

In [None]:
# Import necessary libraries
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from flaschen_depot.data import DataIngestion

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load Data

In [None]:
# Initialize data ingestion
data_ingestion = DataIngestion('../data/raw')

# Create sample data
df = data_ingestion.create_sample_data(n_samples=1000)
print(f"Dataset shape: {df.shape}")
df.head()

## 2. Data Overview

In [None]:
# Display basic statistics
df.describe()

In [None]:
# Check for missing values
df.isnull().sum()

## 3. Data Visualization

In [None]:
# Distribution of bottle types
plt.figure(figsize=(10, 6))
df['bottle_type'].value_counts().plot(kind='bar')
plt.title('Distribution of Bottle Types')
plt.xlabel('Bottle Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Distribution of volumes
plt.figure(figsize=(10, 6))
df['volume_ml'].hist(bins=20)
plt.title('Distribution of Bottle Volumes')
plt.xlabel('Volume (ml)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlation = df[numeric_cols].corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()

## 4. Feature Analysis

In [None]:
# Box plot of deposit amount by bottle type
plt.figure(figsize=(12, 6))
df.boxplot(column='deposit_amount', by='bottle_type')
plt.title('Deposit Amount by Bottle Type')
plt.suptitle('')
plt.xlabel('Bottle Type')
plt.ylabel('Deposit Amount')
plt.show()

In [None]:
# Scatter plot: return count vs last return days
plt.figure(figsize=(10, 6))
plt.scatter(df['return_count'], df['last_return_days'], alpha=0.5)
plt.title('Return Count vs Last Return Days')
plt.xlabel('Return Count')
plt.ylabel('Last Return Days')
plt.show()