# Module 2: Basic EDA Practice

This notebook walks through basic exploratory data analysis (EDA) on **sales.csv**.

## 1. Setup & Load Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
## sales = pd.read_csv('assets/data/sales.csv')
    ## if you have the file locally, uncomment the above line and insert the correct file path
## I'll create the dataframe for you directly for demonstration purposes

import numpy as np 

np.random.seed(0)
dates = pd.date_range(start='2021-01-01',end='2021-06-30',freq='W-MON') 
regions = ['North', 'South', 'East', 'West']
products = ['A', 'B', 'C']
prices = {'A': 10.0, 'B': 15.0, 'C': 20.0}

rows = []
for date in dates:
    for region in regions:
        product = np.random.choice(products)
        units = np.random.randint(1,21)
        price = prices[product]
        revenue = units * price
        rows.append({
            'Date': date.strftime('%Y-%m-%d'),
            'Region': region,
            'Product': product,
            'Units Sold': units,
            'Unit Price': price,
            'Revenue': revenue
        })
sales = pd.DataFrame(rows)
sales.head()

## 2. Data Overview

In [None]:
# Display info and summary statistics
sales.info()

sales.describe()

## 3. Time Series Analysis

In [None]:
sales['Date'] = pd.to_datetime(sales['Date'])
weekly_revenue = sales.groupby('Date')['Revenue'].sum()

plt.figure(figsize=(10, 4))
plt.plot(weekly_revenue)
plt.title('Total Weekly Revenue')
plt.xlabel('Date')
plt.ylabel('Revenue')
plt.tight_layout()
plt.show()

## 4. Revenue by Region

In [None]:
region_rev = sales.groupby('Region')['Revenue'].sum()

region_rev.plot(kind='bar', figsize=(6, 4), title='Total Revenue by Region')
plt.ylabel('Revenue')
plt.tight_layout()
plt.show()

## 5. Units Sold by Product

In [None]:
prod_units = sales.groupby('Product')['Units Sold'].sum()

prod_units.plot(kind='bar', figsize=(6, 4), title='Units Sold by Product')
plt.ylabel('Units Sold')
plt.tight_layout()
plt.show()

## 6. Pivot Table: Monthly Revenue by Region

In [None]:
sales['Month'] = sales['Date'].dt.to_period('M')
pivot = sales.pivot_table(index='Month', columns='Region', values='Revenue', aggfunc='sum')
pivot

**Save your work** by downloading this notebook (*.ipynb*) when you’re done.