### Import Pandas

In [1]:
import pandas as pd

## Series in Pandas

### Creating a simple Series

In [2]:
data = [1, 3, 5, 7, 9]
series = pd.Series(data)
print(series)


0    1
1    3
2    5
3    7
4    9
dtype: int64


### Adding Custom Index

In [3]:
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)

a    1
b    3
c    5
d    7
e    9
dtype: int64


### Accessing Values

In [4]:
# Accessing elements by index
print(series['c'])

5


### Basic Operations

In [5]:
# Mathematical operations are element-wise
print(series + 2)

a     3
b     5
c     7
d     9
e    11
dtype: int64


## DataFrame in Pandas

In [6]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'Salary': [50000, 60000, 70000]
}

df = pd.DataFrame(data)
print(df)


      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


### Exploring a DataFrame

In [7]:
# Displaying first few rows
print(df.head())

# Displaying column names
print(df.columns)

# Shape of the DataFrame (rows, columns)
print(df.shape)


      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
Index(['Name', 'Age', 'Salary'], dtype='object')
(3, 3)


### Selecting Columns

In [8]:
# Accessing a column
print(df['Name'])

# Accessing multiple columns
print(df[['Name', 'Age']])


0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


### Selecting Rows

In [9]:
# Accessing rows by index (loc)
print(df.loc[0])

# Accessing rows by position (iloc)
print(df.iloc[0:2])


Name      Alice
Age          25
Salary    50000
Name: 0, dtype: object
    Name  Age  Salary
0  Alice   25   50000
1    Bob   30   60000


## Data Manipulation with Pandas

### Adding New Columns

In [10]:
# Add a new column
df['Bonus'] = df['Salary'] * 0.10
print(df)


      Name  Age  Salary   Bonus
0    Alice   25   50000  5000.0
1      Bob   30   60000  6000.0
2  Charlie   35   70000  7000.0


### Filtering Data

In [11]:
# Filter rows where Age > 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)


      Name  Age  Salary   Bonus
2  Charlie   35   70000  7000.0


### Sorting Data

In [12]:
# Sort by Age
sorted_df = df.sort_values(by='Age')
print(sorted_df)


      Name  Age  Salary   Bonus
0    Alice   25   50000  5000.0
1      Bob   30   60000  6000.0
2  Charlie   35   70000  7000.0


### Handling Missing Data

In [13]:
# Handling missing data
data_with_na = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, None, 35], 'Salary': [50000, 60000, None]}
df_with_na = pd.DataFrame(data_with_na)

# Check for missing values
print(df_with_na.isna())

# Fill missing values
df_filled = df_with_na.fillna(0)
print(df_filled)


    Name    Age  Salary
0  False  False   False
1  False   True   False
2  False  False    True
      Name   Age   Salary
0    Alice  25.0  50000.0
1      Bob   0.0  60000.0
2  Charlie  35.0      0.0


## Basic Data Analysis with Pandas

### Descriptive Statistics

In [14]:
# Summary statistics
print(df.describe())


        Age   Salary   Bonus
count   3.0      3.0     3.0
mean   30.0  60000.0  6000.0
std     5.0  10000.0  1000.0
min    25.0  50000.0  5000.0
25%    27.5  55000.0  5500.0
50%    30.0  60000.0  6000.0
75%    32.5  65000.0  6500.0
max    35.0  70000.0  7000.0


In [15]:
### Grouping Data

In [16]:
df

Unnamed: 0,Name,Age,Salary,Bonus
0,Alice,25,50000,5000.0
1,Bob,30,60000,6000.0
2,Charlie,35,70000,7000.0


In [17]:
# Group by and compute mean
grouped = df[['Age', 'Salary', 'Bonus']].groupby('Age').mean()
print(grouped)


      Salary   Bonus
Age                 
25   50000.0  5000.0
30   60000.0  6000.0
35   70000.0  7000.0


### Pivot Tables

In [18]:
# Pivot table
pivot = df.pivot_table(values='Salary', index='Name', aggfunc='sum')
print(pivot)


         Salary
Name           
Alice     50000
Bob       60000
Charlie   70000
