In [1]:
import pandas as pd

# Dictionary to DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['Delhi', 'Mumbai', 'Chennai', 'Kolkata']
}

df = pd.DataFrame(data)
print(df)

      Name  Age     City
0    Alice   25    Delhi
1      Bob   30   Mumbai
2  Charlie   35  Chennai
3    David   40  Kolkata


In [2]:
import pandas as pd

# Load CSV file
df = pd.read_csv("sample_students.csv")

# Show first few rows
print(df.head())


      Name  Age     City  Score
0    Alice   25    Delhi     85
1      Bob   30   Mumbai     90
2  Charlie   35  Chennai     88
3    David   40  Kolkata     76
4      Eva   22    Delhi     95


In [3]:
print(df.info())   # summary of dataset
print(df.describe())  # statistics for numeric columns
print(df.shape)   # rows, columns


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    8 non-null      object
 1   Age     8 non-null      int64 
 2   City    8 non-null      object
 3   Score   8 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 384.0+ bytes
None
             Age      Score
count   8.000000   8.000000
mean   32.125000  85.625000
std     7.661359   8.467374
min    22.000000  70.000000
25%    27.250000  82.750000
50%    31.000000  88.500000
75%    36.250000  90.500000
max    45.000000  95.000000
(8, 4)


In [4]:
print(df['Name'])               # Single column
print(df[['Name', 'Score']])    # Multiple columns

print(df.loc[0])    # Row by label
print(df.iloc[3])   # Row by index


0      Alice
1        Bob
2    Charlie
3      David
4        Eva
5      Frank
6      Grace
7     Hannah
Name: Name, dtype: object
      Name  Score
0    Alice     85
1      Bob     90
2  Charlie     88
3    David     76
4      Eva     95
5    Frank     89
6    Grace     92
7   Hannah     70
Name     Alice
Age         25
City     Delhi
Score       85
Name: 0, dtype: object
Name       David
Age           40
City     Kolkata
Score         76
Name: 3, dtype: object


In [5]:
# Students older than 30
print(df[df['Age'] > 30])

# Students from Mumbai
print(df[df['City'] == 'Mumbai'])

# Students with Score >= 90
print(df[df['Score'] >= 90])

# Multiple conditions (AND)
print(df[(df['Age'] > 30) & (df['City'] == 'Chennai')])


      Name  Age     City  Score
2  Charlie   35  Chennai     88
3    David   40  Kolkata     76
6    Grace   32  Chennai     92
7   Hannah   45  Kolkata     70
    Name  Age    City  Score
1    Bob   30  Mumbai     90
5  Frank   28  Mumbai     89
    Name  Age     City  Score
1    Bob   30   Mumbai     90
4    Eva   22    Delhi     95
6  Grace   32  Chennai     92
      Name  Age     City  Score
2  Charlie   35  Chennai     88
6    Grace   32  Chennai     92


In [6]:
# Sort by Age
print(df.sort_values(by='Age'))

# Sort by Score descending
print(df.sort_values(by='Score', ascending=False))


      Name  Age     City  Score
4      Eva   22    Delhi     95
0    Alice   25    Delhi     85
5    Frank   28   Mumbai     89
1      Bob   30   Mumbai     90
6    Grace   32  Chennai     92
2  Charlie   35  Chennai     88
3    David   40  Kolkata     76
7   Hannah   45  Kolkata     70
      Name  Age     City  Score
4      Eva   22    Delhi     95
6    Grace   32  Chennai     92
1      Bob   30   Mumbai     90
5    Frank   28   Mumbai     89
2  Charlie   35  Chennai     88
0    Alice   25    Delhi     85
3    David   40  Kolkata     76
7   Hannah   45  Kolkata     70


In [7]:
# Add new column (Grade)
df['Grade'] = ['B', 'A', 'A', 'C', 'A+', 'A', 'A', 'C']
print(df)

# Drop column
df = df.drop(columns=['Grade'])
print(df)


      Name  Age     City  Score Grade
0    Alice   25    Delhi     85     B
1      Bob   30   Mumbai     90     A
2  Charlie   35  Chennai     88     A
3    David   40  Kolkata     76     C
4      Eva   22    Delhi     95    A+
5    Frank   28   Mumbai     89     A
6    Grace   32  Chennai     92     A
7   Hannah   45  Kolkata     70     C
      Name  Age     City  Score
0    Alice   25    Delhi     85
1      Bob   30   Mumbai     90
2  Charlie   35  Chennai     88
3    David   40  Kolkata     76
4      Eva   22    Delhi     95
5    Frank   28   Mumbai     89
6    Grace   32  Chennai     92
7   Hannah   45  Kolkata     70


In [8]:
# Average score by city
print(df.groupby('City')['Score'].mean())

# Maximum age by city
print(df.groupby('City')['Age'].max())


City
Chennai    90.0
Delhi      90.0
Kolkata    73.0
Mumbai     89.5
Name: Score, dtype: float64
City
Chennai    35
Delhi      25
Kolkata    45
Mumbai     30
Name: Age, dtype: int64


In [9]:
print(df.groupby('City')['Age'].min())

City
Chennai    32
Delhi      22
Kolkata    40
Mumbai     28
Name: Age, dtype: int64


In [10]:
print(df.isnull())

    Name    Age   City  Score
0  False  False  False  False
1  False  False  False  False
2  False  False  False  False
3  False  False  False  False
4  False  False  False  False
5  False  False  False  False
6  False  False  False  False
7  False  False  False  False


In [11]:
df.loc[2,"Score"]=None

In [12]:
print(df)

      Name  Age     City  Score
0    Alice   25    Delhi   85.0
1      Bob   30   Mumbai   90.0
2  Charlie   35  Chennai    NaN
3    David   40  Kolkata   76.0
4      Eva   22    Delhi   95.0
5    Frank   28   Mumbai   89.0
6    Grace   32  Chennai   92.0
7   Hannah   45  Kolkata   70.0


In [13]:
print(df.isnull())

    Name    Age   City  Score
0  False  False  False  False
1  False  False  False  False
2  False  False  False   True
3  False  False  False  False
4  False  False  False  False
5  False  False  False  False
6  False  False  False  False
7  False  False  False  False


In [14]:
df.loc[5,"City"]=None

In [15]:
print(df)

      Name  Age     City  Score
0    Alice   25    Delhi   85.0
1      Bob   30   Mumbai   90.0
2  Charlie   35  Chennai    NaN
3    David   40  Kolkata   76.0
4      Eva   22    Delhi   95.0
5    Frank   28     None   89.0
6    Grace   32  Chennai   92.0
7   Hannah   45  Kolkata   70.0


In [16]:
print(df.isnull())

    Name    Age   City  Score
0  False  False  False  False
1  False  False  False  False
2  False  False  False   True
3  False  False  False  False
4  False  False  False  False
5  False  False   True  False
6  False  False  False  False
7  False  False  False  False


In [17]:
print(df.isnull().sum())

Name     0
Age      0
City     1
Score    1
dtype: int64


In [18]:
print(df.isnull().sum().sum())

2


In [19]:
print(df[df.isnull().any(axis=1)])

      Name  Age     City  Score
2  Charlie   35  Chennai    NaN
5    Frank   28     None   89.0


In [20]:
f=df.fillna(0)


In [21]:
print(f)

      Name  Age     City  Score
0    Alice   25    Delhi   85.0
1      Bob   30   Mumbai   90.0
2  Charlie   35  Chennai    0.0
3    David   40  Kolkata   76.0
4      Eva   22    Delhi   95.0
5    Frank   28        0   89.0
6    Grace   32  Chennai   92.0
7   Hannah   45  Kolkata   70.0
