# Pandas
***
Pandas is a powerful Python library for data analysis and manipulation.
Provides two main data structures:

DataFrame: 2D table (rows & columns, like Excel or SQL table)

Series: 1D labeled array (like a single column)

Handles missing data (e.g., NaN)

Supports data filtering, grouping, sorting, and aggregation

Easy to read/write data from CSV, Excel, JSON, SQL, etc.

Supports label-based and integer-based indexing

Integrates well with NumPy, Matplotlib, and Scikit-learn
***

In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [4]:
pd.__version__

'2.2.3'

In [14]:
a = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
a

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [15]:
type(a)

pandas.core.series.Series

In [19]:
print(a['a'])

10


In [22]:
a.shape

(5,)

In [23]:
a.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [24]:
a.size

5

In [25]:
a.describe()

count     5.000000
mean     30.000000
std      15.811388
min      10.000000
25%      20.000000
50%      30.000000
75%      40.000000
max      50.000000
dtype: float64

In [26]:
a.sort_values(ascending=False)

e    50
d    40
c    30
b    20
a    10
dtype: int64

In [4]:
frame = pd.DataFrame({"name": ["Sachin", "Rahul", "Bharath", "Kishore","Dinesh"],
                      "age": [25, 30, 35, 40, 45],
                      "city": ["Bangalore", "Chennai", "Hyderabad", "Delhi", "Mumbai"]})
frame

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore
1,Rahul,30,Chennai
2,Bharath,35,Hyderabad
3,Kishore,40,Delhi
4,Dinesh,45,Mumbai


In [33]:
frame.shape

(5, 3)

In [31]:
frame.columns

Index(['name', 'age', 'city'], dtype='object')

In [34]:
frame.size

15

In [5]:
frame.to_csv('Frame.csv', index=False)

In [6]:
pd.read_csv('Frame.csv')

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore
1,Rahul,30,Chennai
2,Bharath,35,Hyderabad
3,Kishore,40,Delhi
4,Dinesh,45,Mumbai


In [12]:
frame.iloc[0:3, 0:2]

Unnamed: 0,name,age
0,Sachin,25
1,Rahul,30
2,Bharath,35


In [None]:
frame.iloc[[0,2]] # Selects rows 0 and 2

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore
2,Bharath,35,Hyderabad


In [None]:
frame.iloc[0,2] # Selects row 0 and column 2

'Bangalore'

In [18]:
frame['name'] # Selects the 'name' column

0     Sachin
1      Rahul
2    Bharath
3    Kishore
4     Dinesh
Name: name, dtype: object

In [21]:
frame[frame['city'] == 'Hyderabad']

Unnamed: 0,name,age,city
2,Bharath,35,Hyderabad


In [23]:
frame[frame['name'] == 'Sachin']

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore


In [40]:
frame[(frame['name'] == 'Sachin') & (frame['age'] > 10)]  # Logical AND operation

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore


In [44]:
# frame.head() # Displays the first 5 rows of the DataFrame
frame.head(2)

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore
1,Rahul,30,Chennai


In [45]:
# frame.tail() # Displays the last 5 rows of the DataFrame
frame.tail(2)

Unnamed: 0,name,age,city
3,Kishore,40,Delhi
4,Dinesh,45,Mumbai


In [51]:
selected_cities=['Hyderabad', 'Chennai']

In [52]:
frame[frame['city'].isin(selected_cities)]  # Selects rows where 'city' is in the selected_cities list

Unnamed: 0,name,age,city
1,Rahul,30,Chennai
2,Bharath,35,Hyderabad


In [50]:
selected_ages = [25, 35]
frame[frame['age'].isin(selected_ages)]

Unnamed: 0,name,age,city
0,Sachin,25,Bangalore
2,Bharath,35,Hyderabad


In [56]:
frame.rename(columns={'name':'Name', 'age':'Age', 'city':'City'})  # Renaming columns

Unnamed: 0,Name,Age,City
0,Sachin,25,Bangalore
1,Rahul,30,Chennai
2,Bharath,35,Hyderabad
3,Kishore,40,Delhi
4,Dinesh,45,Mumbai


In [80]:
n={'A':[20,35,np.nan, np.nan, 50], 
   'B':[np.nan, 20, 30, np.nan, 50],
   'C':[12,17,np.nan,np.nan,20]}  # Creating a numpy array with missing values

In [None]:
np.nan  # Represents a missing value in numpy

nan

In [94]:
df = pd.DataFrame(n)  # Creating a DataFrame from the dictionary
df

Unnamed: 0,A,B,C
0,20.0,,12.0
1,35.0,20.0,17.0
2,,30.0,
3,,,
4,50.0,50.0,20.0


In [85]:
df.isnull()  # Checks for missing values in the DataFrame

Unnamed: 0,A,B,C
0,False,True,False
1,False,False,False
2,True,False,True
3,True,True,True
4,False,False,False


In [86]:
df.isnull().sum()  # Counts the number of missing values in each column

A    2
B    2
C    2
dtype: int64

In [91]:
df.dropna()  # Drops rows with any missing values

Unnamed: 0,A,B,C
1,35.0,20.0,17.0
4,50.0,50.0,20.0


In [95]:
df

Unnamed: 0,A,B,C
0,20.0,,12.0
1,35.0,20.0,17.0
2,,30.0,
3,,,
4,50.0,50.0,20.0


In [96]:
df.fillna(0)  # Replaces missing values with 0

Unnamed: 0,A,B,C
0,20.0,0.0,12.0
1,35.0,20.0,17.0
2,0.0,30.0,0.0
3,0.0,0.0,0.0
4,50.0,50.0,20.0


In [100]:
# df.fillna(df.max())  # Replaces missing values with the maximum value of each column
# df.fillna(df.mean())  # Replaces missing values with the mean of each column
# df.fillna(df.median())  # Replaces missing values with the median of each column
df.fillna(df.min())  # Replaces missing values with the minimum value of each column

Unnamed: 0,A,B,C
0,20.0,20.0,12.0
1,35.0,20.0,17.0
2,20.0,30.0,12.0
3,20.0,20.0,12.0
4,50.0,50.0,20.0
