In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {'Name': ['Sakshi Yadav', 'Santosh Shah', 'Diya Bansal', 'Riya Bansal', 'Abhishek Sharma', 'Ram Rai', 'Shyam Gupta', 'Satyam Mishera', 'Rahul Rai', 'Roshani Kumar'],
     'Age': [20, 21, 19, 19, 22, 18, 22, 21, 19, 18],
     'City': ['Indirapuram', 'Nodia', 'Nodia', 'Delhi', 'New Delhi', 'Delhi', np.nan, 'New Delhi', 'Nodia', 'Delhi'],
     'Score': [99, 98, 70, 50, 66, 30, np.nan, 55, np.nan, 87]}

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,Name,Age,City,Score
0,Sakshi Yadav,20,Indirapuram,99.0
1,Santosh Shah,21,Nodia,98.0
2,Diya Bansal,19,Nodia,70.0
3,Riya Bansal,19,Delhi,50.0
4,Abhishek Sharma,22,New Delhi,66.0
5,Ram Rai,18,Delhi,30.0
6,Shyam Gupta,22,,
7,Satyam Mishera,21,New Delhi,55.0
8,Rahul Rai,19,Nodia,
9,Roshani Kumar,18,Delhi,87.0


6. Handle Missing Data
- Replace missing values in the Score column with the average score.
- Drop all rows where the City is missing.

In [5]:
df['Score'].isnull

<bound method Series.isnull of 0    99.0
1    98.0
2    70.0
3    50.0
4    66.0
5    30.0
6     NaN
7    55.0
8     NaN
9    87.0
Name: Score, dtype: float64>

In [6]:
df['Score'].mean()

np.float64(69.375)

In [7]:
df['Score'] = df['Score'].fillna(df['Score'].mean())

In [8]:
df

Unnamed: 0,Name,Age,City,Score
0,Sakshi Yadav,20,Indirapuram,99.0
1,Santosh Shah,21,Nodia,98.0
2,Diya Bansal,19,Nodia,70.0
3,Riya Bansal,19,Delhi,50.0
4,Abhishek Sharma,22,New Delhi,66.0
5,Ram Rai,18,Delhi,30.0
6,Shyam Gupta,22,,69.375
7,Satyam Mishera,21,New Delhi,55.0
8,Rahul Rai,19,Nodia,69.375
9,Roshani Kumar,18,Delhi,87.0


In [9]:
df = df.dropna(subset=['City'])

In [10]:
df

Unnamed: 0,Name,Age,City,Score
0,Sakshi Yadav,20,Indirapuram,99.0
1,Santosh Shah,21,Nodia,98.0
2,Diya Bansal,19,Nodia,70.0
3,Riya Bansal,19,Delhi,50.0
4,Abhishek Sharma,22,New Delhi,66.0
5,Ram Rai,18,Delhi,30.0
7,Satyam Mishera,21,New Delhi,55.0
8,Rahul Rai,19,Nodia,69.375
9,Roshani Kumar,18,Delhi,87.0


7. GroupBy Operations
- Group the data by City and calculate the average Score.
- Count how many people are in each city.

In [11]:
df.groupby('City')['Score'].mean()

City
Delhi          55.666667
Indirapuram    99.000000
New Delhi      60.500000
Nodia          79.125000
Name: Score, dtype: float64

In [12]:
df['City'].value_counts()

City
Nodia          3
Delhi          3
New Delhi      2
Indirapuram    1
Name: count, dtype: int64

8. Sorting and Ranking
- Sort the DataFrame by Score in descending order.
- Rank the students based on their Score.

In [13]:
df.sort_values(by='Score', ascending = False)

Unnamed: 0,Name,Age,City,Score
0,Sakshi Yadav,20,Indirapuram,99.0
1,Santosh Shah,21,Nodia,98.0
9,Roshani Kumar,18,Delhi,87.0
2,Diya Bansal,19,Nodia,70.0
8,Rahul Rai,19,Nodia,69.375
4,Abhishek Sharma,22,New Delhi,66.0
7,Satyam Mishera,21,New Delhi,55.0
3,Riya Bansal,19,Delhi,50.0
5,Ram Rai,18,Delhi,30.0


In [14]:
df['Score'].rank(ascending=False)

0    1.0
1    2.0
2    4.0
3    8.0
4    6.0
5    9.0
7    7.0
8    5.0
9    3.0
Name: Score, dtype: float64

9. String Operations
- Convert all names to uppercase.
- Extract the first letter of each city name.

In [15]:
df['Name'].str.upper()

0       SAKSHI YADAV
1       SANTOSH SHAH
2        DIYA BANSAL
3        RIYA BANSAL
4    ABHISHEK SHARMA
5            RAM RAI
7     SATYAM MISHERA
8          RAHUL RAI
9      ROSHANI KUMAR
Name: Name, dtype: object

In [16]:
df['City'].str[0]

0    I
1    N
2    N
3    D
4    N
5    D
7    N
8    N
9    D
Name: City, dtype: object

10. Date and Time
- Add a column Join_Date with dates.
- Convert it to datetime format and extract the year/month.

In [17]:
df['Join_Date'] = pd.date_range(start='2025-01-01', periods=len(df), freq='M')

  df['Join_Date'] = pd.date_range(start='2025-01-01', periods=len(df), freq='M')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Join_Date'] = pd.date_range(start='2025-01-01', periods=len(df), freq='M')


In [18]:
df

Unnamed: 0,Name,Age,City,Score,Join_Date
0,Sakshi Yadav,20,Indirapuram,99.0,2025-01-31
1,Santosh Shah,21,Nodia,98.0,2025-02-28
2,Diya Bansal,19,Nodia,70.0,2025-03-31
3,Riya Bansal,19,Delhi,50.0,2025-04-30
4,Abhishek Sharma,22,New Delhi,66.0,2025-05-31
5,Ram Rai,18,Delhi,30.0,2025-06-30
7,Satyam Mishera,21,New Delhi,55.0,2025-07-31
8,Rahul Rai,19,Nodia,69.375,2025-08-31
9,Roshani Kumar,18,Delhi,87.0,2025-09-30


In [19]:
df['Join_Date'].dt.year

0    2025
1    2025
2    2025
3    2025
4    2025
5    2025
7    2025
8    2025
9    2025
Name: Join_Date, dtype: int32

In [20]:
df['Join_Date'].dt.month

0    1
1    2
2    3
3    4
4    5
5    6
7    7
8    8
9    9
Name: Join_Date, dtype: int32