# Exploratory Data Analysis (EDA) with Pandas
This notebook creates a sample `sales.csv`, saves it, loads it, and performs EDA.

In [None]:
import pandas as pd

# Create a sample dataset
data = {
    "product": ["A", "B", "C", "D", "E", "F"],
    "category": ["Food", "Clothes", "Food", "Electronics", "Clothes", "Food"],
    "price": [10, 25, 12, 200, 40, 8],
    "quantity": [5, 2, 8, 1, 3, 10],
    "date": [
        "2024-01-01", "2024-01-02", "2024-01-03",
        "2024-01-04", "2024-01-05", "2024-01-06"
    ]
}

df = pd.DataFrame(data)
df

In [2]:
# Save DataFrame as sales.csv
df.to_csv("sales.csv", index=False)
"sales.csv saved!" 

'sales.csv saved!'

In [3]:
# Load dataset
df = pd.read_csv("sales.csv")
df.head()

Unnamed: 0,product,category,price,quantity,date
0,A,Food,10,5,2024-01-01
1,B,Clothes,25,2,2024-01-02
2,C,Food,12,8,2024-01-03
3,D,Electronics,200,1,2024-01-04
4,E,Clothes,40,3,2024-01-05


In [4]:
# Basic information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   product   6 non-null      object
 1   category  6 non-null      object
 2   price     6 non-null      int64 
 3   quantity  6 non-null      int64 
 4   date      6 non-null      object
dtypes: int64(2), object(3)
memory usage: 372.0+ bytes


In [5]:
df.describe()

Unnamed: 0,price,quantity
count,6.0,6.0
mean,49.166667,4.833333
std,74.871668,3.544949
min,8.0,1.0
25%,10.5,2.25
50%,18.5,4.0
75%,36.25,7.25
max,200.0,10.0


In [6]:
# Missing values
df.isnull().sum()

product     0
category    0
price       0
quantity    0
date        0
dtype: int64

In [7]:
# Duplicate rows
df.duplicated().sum()

0

In [8]:
# Total sales column
df["total"] = df["price"] * df["quantity"]
df.head()

Unnamed: 0,product,category,price,quantity,date,total
0,A,Food,10,5,2024-01-01,50
1,B,Clothes,25,2,2024-01-02,50
2,C,Food,12,8,2024-01-03,96
3,D,Electronics,200,1,2024-01-04,200
4,E,Clothes,40,3,2024-01-05,120


In [9]:
# Grouping by category
df.groupby("category")["total"].sum()

category
Clothes        170
Electronics    200
Food           226
Name: total, dtype: int64

In [10]:
# Date conversion and time series
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)
df["total"].resample("D").sum()

date
2024-01-01     50
2024-01-02     50
2024-01-03     96
2024-01-04    200
2024-01-05    120
2024-01-06     80
Freq: D, Name: total, dtype: int64