In [None]:
# 📊 Analyzing Pandas DataFrames

# Once you've loaded your data into a DataFrame, you can start analyzing it using built-in Pandas methods.

# This notebook shows how to quickly explore and summarize a DataFrame.

In [2]:
import pandas as pd

# Read from json file
data = pd.read_json('analyzing_data.json')
df = pd.DataFrame(data)
df

## 🔍 Quick Overview Methods

# - `df.head()` → First rows  
# - `df.tail()` → Last rows  
# - `df.shape` → (rows, columns)  
# - `df.columns` → Column names  
# - `df.dtypes` → Data types  
# - `df.info()` → Full summary  

Unnamed: 0,Name,Age,Salary,Department
0,Alice,25,50000,HR
1,Bob,30,60000,IT
2,Charlie,35,65000,Finance
3,David,40,70000,IT
4,Eva,28,62000,HR


In [3]:
print(df.head())
print(df.shape)
print(df.columns)
print(df.dtypes)
df.info()

      Name  Age  Salary Department
0    Alice   25   50000         HR
1      Bob   30   60000         IT
2  Charlie   35   65000    Finance
3    David   40   70000         IT
4      Eva   28   62000         HR
(5, 4)
Index(['Name', 'Age', 'Salary', 'Department'], dtype='object')
Name          object
Age            int64
Salary         int64
Department    object
dtype: object
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        5 non-null      object
 1   Age         5 non-null      int64 
 2   Salary      5 non-null      int64 
 3   Department  5 non-null      object
dtypes: int64(2), object(2)
memory usage: 288.0+ bytes


In [4]:
## 📈 Statistical Summary

# Use `df.describe()` to get a statistical summary of numerical columns.
df.describe()

Unnamed: 0,Age,Salary
count,5.0,5.0
mean,31.6,61400.0
std,5.94138,7402.702209
min,25.0,50000.0
25%,28.0,60000.0
50%,30.0,62000.0
75%,35.0,65000.0
max,40.0,70000.0


In [5]:
## 📊 Analyzing Categorical Data

# Use `value_counts()` to see the distribution of categories.

df["Department"].value_counts()

Department
HR         2
IT         2
Finance    1
Name: count, dtype: int64

In [6]:
## ➕ Basic Aggregation

# You can compute:

# - Mean → `df["Age"].mean()`  
# - Max → `df["Salary"].max()`  
# - Min → `df["Salary"].min()`  
# - Median → `df["Age"].median()`  

print("Average Age:", df["Age"].mean())
print("Highest Salary:", df["Salary"].max())
print("Youngest Age:", df["Age"].min())

Average Age: 31.6
Highest Salary: 70000
Youngest Age: 25
