# Experiment 01 â€“ Pandas Fundamentals

This notebook demonstrates:
- Creating Series
- Creating DataFrames
- Loading CSV data
- Handling missing values
- Feature engineering
- Filtering operations


In [4]:
import pandas as pd

## 1. Creating Pandas Series


In [5]:
# Simple Series
a = [10, 20, 30]
s = pd.Series(a)
print("Simple Series:")
print(s)

# Series with custom index
s = pd.Series(a, index=['x', 'y', 'z'])
print("\nSeries with custom index:")
print(s)

print("\nAccess by position:", s.iloc[1])
print("Access by label:", s.loc['y'])
print("Access by label:", s['y'])

Simple Series:
0    10
1    20
2    30
dtype: int64

Series with custom index:
x    10
y    20
z    30
dtype: int64

Access by position: 20
Access by label: 20
Access by label: 20


## 2. Series from Dictionary

In [7]:
d = {'d1': 200, 'd2': 300, 'd3': 400}

s = pd.Series(d)
print("Series from dictionary:")
print(s)

subset = pd.Series(d, index=['d1', 'd2'])
print("\nSubset of dictionary:")
print(subset)

Series from dictionary:
d1    200
d2    300
d3    400
dtype: int64

Subset of dictionary:
d1    200
d2    300
dtype: int64


## 3. Creating DataFrame

In [8]:
data = {
    'a': [10, 20, 30],
    'b': [40, 50, 60]
}

df_basic = pd.DataFrame(data)
print("Basic DataFrame:")
print(df_basic)

print("\nFirst Row:")
print(df_basic.loc[0])

Basic DataFrame:
    a   b
0  10  40
1  20  50
2  30  60

First Row:
a    10
b    40
Name: 0, dtype: int64


## 4. Loading Dataset

In [9]:
df = pd.read_csv("data/students.csv")

print("First 5 rows:")
display(df.head())

print("\nDataset Shape:", df.shape)

print("\nMissing Values:")
print(df.isnull().sum())

First 5 rows:


Unnamed: 0,Student_ID,Name,Branch,Maths,Science,English,Attendance
0,101,pavana,CSE,85.0,92.0,88.0,90
1,102,navya,IT,90.0,,75.0,85
2,103,bhavana,CSE,,80.0,82.0,88
3,104,prasanna,ECE,78.0,85.0,,92
4,105,shashank,IT,88.0,89.0,90.0,95



Dataset Shape: (7, 7)

Missing Values:
Student_ID    0
Name          0
Branch        0
Maths         1
Science       1
English       1
Attendance    0
dtype: int64


## 5. Handling Missing Values

In [10]:
df['Maths'] = df['Maths'].fillna(df['Maths'].mean())
df['Science'] = df['Science'].fillna(df['Science'].mean())
df['English'] = df['English'].fillna(df['English'].mean())

print("After handling missing values:")
display(df)

After handling missing values:


Unnamed: 0,Student_ID,Name,Branch,Maths,Science,English,Attendance
0,101,pavana,CSE,85.0,92.0,88.0,90
1,102,navya,IT,90.0,85.0,75.0,85
2,103,bhavana,CSE,83.0,80.0,82.0,88
3,104,prasanna,ECE,78.0,85.0,82.333333,92
4,105,shashank,IT,88.0,89.0,90.0,95
5,106,krishna,CSE,65.0,70.0,68.0,75
6,107,teja,ECE,92.0,94.0,91.0,98


## 6. Feature Engineering
Creating Total, Average and Result columns.

In [11]:
df['Total'] = df['Maths'] + df['Science'] + df['English']
df['Average'] = df['Total'] / 3

df['Result'] = df['Average'].apply(lambda x: "Pass" if x >= 50 else "Fail")

display(df)

Unnamed: 0,Student_ID,Name,Branch,Maths,Science,English,Attendance,Total,Average,Result
0,101,pavana,CSE,85.0,92.0,88.0,90,265.0,88.333333,Pass
1,102,navya,IT,90.0,85.0,75.0,85,250.0,83.333333,Pass
2,103,bhavana,CSE,83.0,80.0,82.0,88,245.0,81.666667,Pass
3,104,prasanna,ECE,78.0,85.0,82.333333,92,245.333333,81.777778,Pass
4,105,shashank,IT,88.0,89.0,90.0,95,267.0,89.0,Pass
5,106,krishna,CSE,65.0,70.0,68.0,75,203.0,67.666667,Pass
6,107,teja,ECE,92.0,94.0,91.0,98,277.0,92.333333,Pass


## 7. Filtering Operations

In [12]:
# Students with Attendance > 90
print("Students with Attendance > 90:")
display(df[df['Attendance'] > 90])

# Names with Attendance > 90
print("\nNames with Attendance > 90:")
print(df[df['Attendance'] > 90]['Name'])

# Student_ID of Passed Students
print("\nStudent_ID of Passed Students:")
print(df[df['Result'] == "Pass"]['Student_ID'])

# Student_ID where Maths > English
print("\nStudent_ID where Maths > English:")
print(df[df['Maths'] > df['English']]['Student_ID'])

Students with Attendance > 90:


Unnamed: 0,Student_ID,Name,Branch,Maths,Science,English,Attendance,Total,Average,Result
3,104,prasanna,ECE,78.0,85.0,82.333333,92,245.333333,81.777778,Pass
4,105,shashank,IT,88.0,89.0,90.0,95,267.0,89.0,Pass
6,107,teja,ECE,92.0,94.0,91.0,98,277.0,92.333333,Pass



Names with Attendance > 90:
3    prasanna
4    shashank
6        teja
Name: Name, dtype: object

Student_ID of Passed Students:
0    101
1    102
2    103
3    104
4    105
5    106
6    107
Name: Student_ID, dtype: int64

Student_ID where Maths > English:
1    102
2    103
6    107
Name: Student_ID, dtype: int64


## Conclusion

This experiment covered:
- Series and DataFrame creation
- Loading real-world CSV data
- Handling missing values using mean imputation
- Creating new features
- Filtering data based on conditions