Pandas: Python Data Analysis Library

Pandas is used for handling structured/tabular data.

In [1]:
import pandas as pd

In [2]:
# Series
data = [10,20,30,40]
s = pd.Series(data,index=["a","b","c","d"])
print(s)

a    10
b    20
c    30
d    40
dtype: int64


In [3]:
# Creating a DataFrame
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "Salary": [50000, 60000, 70000]
}
df = pd.DataFrame(data)
# Print DF
print(df)
# Only DF
df

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,35,70000


In [4]:
# Reading and Witing Data
# Save to CSV file
df.to_csv("data.csv", index=False)  
# Read CSV file
df_read = pd.read_csv("data.csv")   
print(df_read)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


In [5]:
# Basic DataFrame Operations
print(df.head(2))  
print(df.tail(2))  
print(df.info())   
print(df.describe())  

    Name  Age  Salary
0  Alice   25   50000
1    Bob   30   60000
      Name  Age  Salary
1      Bob   30   60000
2  Charlie   35   70000
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   Salary  3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes
None
        Age   Salary
count   3.0      3.0
mean   30.0  60000.0
std     5.0  10000.0
min    25.0  50000.0
25%    27.5  55000.0
50%    30.0  60000.0
75%    32.5  65000.0
max    35.0  70000.0


In [6]:
# Selecting & Filtering Data

print(df["Age"])  
print(df[["Name", "Salary"]])  

# Filtering data (Get rows where Age > 25)
print(df[df["Age"] > 25])

0    25
1    30
2    35
Name: Age, dtype: int64
      Name  Salary
0    Alice   50000
1      Bob   60000
2  Charlie   70000
      Name  Age  Salary
1      Bob   30   60000
2  Charlie   35   70000


In [7]:
# Adding and Removing Coloumns
df["Bonus"] = df["Salary"] * 0.2  
df_copy = df
print(df)

      Name  Age  Salary    Bonus
0    Alice   25   50000  10000.0
1      Bob   30   60000  12000.0
2  Charlie   35   70000  14000.0


In [8]:
df_copy.drop("Bonus", axis=1, inplace=True)  # Remove Column
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


In [9]:
# Handling Missing Data
df.fillna(0, inplace=True)  # Fill missing values with 0
print(df)

df.dropna(inplace=True)  # Drop rows with missing values
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


In [10]:
# Grouping and Aggregations
grouped = df.groupby("Age")["Salary"].sum()
print(grouped)

Age
25    50000
30    60000
35    70000
Name: Salary, dtype: int64
