In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    'A': [1.32921, -1.07082, -1.6264, 0.961538, np.nan],
    'B': [np.nan, -1.43871, 0.219565, 0.104011, 1.05774]
})
print("DataFrame with NaN:")
print(df)
print("\nNaN positions:")
print(df.isna())
# In Colab: df.style.highlight_null('yellow')

DataFrame with NaN:
          A         B
0  1.329210       NaN
1 -1.070820 -1.438710
2 -1.626400  0.219565
3  0.961538  0.104011
4       NaN  1.057740

NaN positions:
       A      B
0  False   True
1  False  False
2  False  False
3  False  False
4   True  False


In [2]:
import pandas as pd
df = pd.DataFrame(np.random.randn(5,4), columns=['A','B','C','D'])
print("Normal DataFrame")
print(df)
# In Colab: df.style.set_properties(**{'background-color': 'black', 'color': 'yellow'})

Normal DataFrame
          A         B         C         D
0  0.407762  0.330051  0.454373  0.548603
1 -0.263623 -0.629645  0.291642  0.237752
2  0.922083  1.019883  0.048212  1.599975
3  0.365452  0.035278  1.755548  0.226460
4 -0.743750 -0.353241  1.143776 -1.178366


In [3]:
df = pd.DataFrame({
    'A': [1, np.nan, 3],
    'B': [4, 5, np.nan]
})
print("Missing values?")
print(df.isna())

Missing values?
       A      B
0  False  False
1   True  False
2  False   True


In [4]:
df = pd.DataFrame({
    'A': [1, np.nan, 3, np.nan],
    'B': [4, 5, np.nan, 7]
})
print("Before:", df)
df_filled = df.fillna(0)
print("\nAfter fill with 0:", df_filled)

Before:      A    B
0  1.0  4.0
1  NaN  5.0
2  3.0  NaN
3  NaN  7.0

After fill with 0:      A    B
0  1.0  4.0
1  0.0  5.0
2  3.0  0.0
3  0.0  7.0


In [5]:
df = pd.DataFrame({
    'A': [np.nan, np.nan, 70002, np.nan],
    'B': [np.nan, 270.65, 65.26, np.nan],
    'C': [np.nan, '2012-09-10', np.nan, np.nan]
})
rows_with_2_nan = df[df.isna().sum(axis=1) >= 2]
print("Rows with ≥2 NaN:", rows_with_2_nan)

Rows with ≥2 NaN:     A   B    C
0 NaN NaN  NaN
3 NaN NaN  NaN


In [6]:
df = pd.DataFrame({
    'school': ['s001','s002','s003','s001','s002','s004'],
    'name': ['Alberto','Gino','Ryan','Eesha','Gino','David'],
    'age': [12,12,13,13,14,12]
})
grouped = df.groupby('school')
print("Groups:", list(grouped.groups.keys()))
print("Type:", type(grouped))

Groups: ['s001', 's002', 's003', 's004']
Type: <class 'pandas.core.groupby.generic.DataFrameGroupBy'>


In [7]:
age_stats = df.groupby('school')['age'].agg(['mean','min','max'])
print("Age statistics per school:")
print(age_stats)

Age statistics per school:
        mean  min  max
school                
s001    12.5   12   13
s002    13.0   12   14
s003    13.0   13   13
s004    12.0   12   12


In [8]:
df['class'] = ['V','V','VI','VI','V','VI']
grouped = df.groupby(['school','class'])
for name, group in grouped:
    print(f"\nGroup {name}:")
    print(group[['name','age']])


Group ('s001', 'V'):
      name  age
0  Alberto   12

Group ('s001', 'VI'):
    name  age
3  Eesha   13

Group ('s002', 'V'):
   name  age
1  Gino   12
4  Gino   14

Group ('s003', 'VI'):
   name  age
2  Ryan   13

Group ('s004', 'VI'):
    name  age
5  David   12


In [9]:
df = pd.DataFrame({
    'Year': [1986,1986,1985,1986,1987],
    'Country': ['Viet Nam','Uruguay',"Cte d'Ivoire",'Colombia','Saint Kitts'],
    'Beverage': ['Wine','Other','Wine','Beer','Beer']
})
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

Shape: (5, 3)
Columns: ['Year', 'Country', 'Beverage']


In [10]:
df = pd.DataFrame({
    'Name': ['John Doe', 'Jane Smith', 'Bob Johnson', 'Alice Brown']
})
# Find rows containing 'ohn'
mask = df['Name'].str.contains('ohn')
print("Rows with 'ohn':")
print(df[mask])
print("\nIndices:", df[mask].index.tolist())

Rows with 'ohn':
          Name
0     John Doe
2  Bob Johnson

Indices: [0, 2]
