# üêº Pandas Complete Cheat Sheet


This notebook covers **all major Pandas functionalities** ‚Äî from data loading and manipulation to advanced analytics and visualization.
Run each cell to see examples in action.


## 1. Setup & Basic Operations

In [1]:

import pandas as pd
import numpy as np

# Check version
print(pd.__version__)

# Sample data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 45],
    'Salary': [50000, 60000, 70000, 80000, 90000],
    'Department': ['HR', 'IT', 'IT', 'Finance', 'HR']
}

df = pd.DataFrame(data)
df


1.5.3


Unnamed: 0,Name,Age,Salary,Department
0,Alice,25,50000,HR
1,Bob,30,60000,IT
2,Charlie,35,70000,IT
3,David,40,80000,Finance
4,Eva,45,90000,HR


## 2. Reading and Writing Data

In [2]:

# CSV
df.to_csv('data.csv', index=False)
pd.read_csv('data.csv')

# Excel
df.to_excel('data.xlsx', index=False)
pd.read_excel('data.xlsx')

# JSON
df.to_json('data.json')
pd.read_json('data.json')


Unnamed: 0,Name,Age,Salary,Department
0,Alice,25,50000,HR
1,Bob,30,60000,IT
2,Charlie,35,70000,IT
3,David,40,80000,Finance
4,Eva,45,90000,HR


## 3. Inspecting Data

In [3]:

df.head()
df.tail()
df.info()
df.describe()
df.dtypes
df.shape


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        5 non-null      object
 1   Age         5 non-null      int64 
 2   Salary      5 non-null      int64 
 3   Department  5 non-null      object
dtypes: int64(2), object(2)
memory usage: 288.0+ bytes


(5, 4)

## 4. Selecting Columns and Rows

In [4]:

df['Name']
df[['Name', 'Salary']]

# Using loc and iloc
df.loc[0]
df.loc[0:2, ['Name', 'Age']]
df.iloc[0:3, 0:2]

# Conditional selection
df[df['Age'] > 30]


Unnamed: 0,Name,Age,Salary,Department
2,Charlie,35,70000,IT
3,David,40,80000,Finance
4,Eva,45,90000,HR


## 5. Cleaning Data

In [5]:

# Missing values
df.isna()
df.fillna('Unknown')
df.dropna()

# Replace values
df['Department'].replace('HR', 'Human Resources', inplace=True)

# Remove duplicates
df.duplicated()
df.drop_duplicates()

# Rename
df.rename(columns={'Name': 'Employee_Name'}, inplace=True)

# Change data types
df['Age'] = df['Age'].astype(float)
df.dtypes


Employee_Name     object
Age              float64
Salary             int64
Department        object
dtype: object

## 6. Grouping and Aggregations

In [6]:

# Basic aggregation
df['Salary'].sum()
df['Age'].mean()

# Groupby
df.groupby('Department')['Salary'].mean()
df.groupby('Department').agg({'Salary': ['mean', 'max'], 'Age': 'median'})


Unnamed: 0_level_0,Salary,Salary,Age
Unnamed: 0_level_1,mean,max,median
Department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Finance,80000.0,80000,40.0
Human Resources,70000.0,90000,35.0
IT,65000.0,70000,32.5


## 7. Pivot, Melt, and Crosstab

In [9]:

pd.pivot_table(df, values='Salary', index='Department', aggfunc=np.mean)
pd.melt(df, id_vars=['Department'], value_vars=['Age', 'Salary'])
# pd.crosstab(df['Department'], df['Age'])


Unnamed: 0,Department,variable,value
0,Human Resources,Age,25.0
1,IT,Age,30.0
2,IT,Age,35.0
3,Finance,Age,40.0
4,Human Resources,Age,45.0
5,Human Resources,Salary,50000.0
6,IT,Salary,60000.0
7,IT,Salary,70000.0
8,Finance,Salary,80000.0
9,Human Resources,Salary,90000.0


## 8. Merge, Join, Concat

In [10]:

df1 = pd.DataFrame({'ID':[1,2,3], 'Name':['A','B','C']})
df2 = pd.DataFrame({'ID':[1,2,4], 'Score':[90,80,70]})

pd.merge(df1, df2, on='ID', how='inner')
pd.concat([df1, df2], axis=0)


Unnamed: 0,ID,Name,Score
0,1,A,
1,2,B,
2,3,C,
0,1,,90.0
1,2,,80.0
2,4,,70.0


## 9. String Operations

In [16]:

df['Employee_Name' ].str.upper()
df['Employee_Name'].str.contains('A')
df['Employee_Name'].str.len()


0    5
1    3
2    7
3    5
4    3
Name: Employee_Name, dtype: int64

## 10. Date and Time

In [17]:

dates = pd.date_range('2023-01-01', periods=5, freq='D')
df['Date'] = dates
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df


Unnamed: 0,Employee_Name,Age,Salary,Department,Date,Year,Month,Day
0,Alice,25.0,50000,Human Resources,2023-01-01,2023,1,1
1,Bob,30.0,60000,IT,2023-01-02,2023,1,2
2,Charlie,35.0,70000,IT,2023-01-03,2023,1,3
3,David,40.0,80000,Finance,2023-01-04,2023,1,4
4,Eva,45.0,90000,Human Resources,2023-01-05,2023,1,5


## 11. Analytics and Statistical Functions

In [18]:

# Correlation and covariance
df[['Age', 'Salary']].corr()
df[['Age', 'Salary']].cov()

# Quantiles and ranking
df['Salary'].quantile([0.25, 0.5, 0.75])
df['Salary'].rank()

# Value counts and unique
df['Department'].value_counts()
df['Department'].unique()

# Cumulative and rolling
df['Salary'].cumsum()
df['Salary'].rolling(2).mean()


0        NaN
1    55000.0
2    65000.0
3    75000.0
4    85000.0
Name: Salary, dtype: float64

## 12. Apply, Map, Applymap

In [None]:

df['Bonus'] = df['Salary'].apply(lambda x: x * 0.1)
df[['Age', 'Salary']].applymap(lambda x: x / 10 if isinstance(x, (int, float)) else x)


## 13. Visualization

In [None]:

import matplotlib.pyplot as plt

df['Salary'].plot(kind='hist', title='Salary Distribution')
plt.show()

df.plot(x='Age', y='Salary', kind='scatter')
plt.show()


## 14. Performance Tips

In [None]:

# Convert to category to save memory
df['Department'] = df['Department'].astype('category')
df.info()

# Vectorized operations instead of loops
df['Double_Salary'] = df['Salary'] * 2


## 15. Common Snippets and Shortcuts

In [None]:

# Sorting
df.sort_values(by='Salary', ascending=False)

# Reset index
df.reset_index(drop=True, inplace=True)

# Copy
df_copy = df.copy()

# Conditional assignment
df.loc[df['Age'] > 30, 'Status'] = 'Senior'
df.loc[df['Age'] <= 30, 'Status'] = 'Junior'

df
