In [1]:
# This notebook demonstrates feature extraction techniques on a student dataset.

# Import the pandas library for data manipulation.
import pandas as pd

# Load the dataset that was processed in the previous feature splitting step.
df = pd.read_csv("student_feature_split.csv")

# Display the first few rows of the DataFrame to get a quick look at the data.
print(df.head())

   Student_id  Age  Gender      Grade Employed  Employed_flag
0           1   19    Male  1st Class      yes              1
1           2   20  Female  2nd Class       no              0
2           3   18    Male  1st Class       no              0
3           4   21  Female  2nd Class       no              0
4           5   19    Male  1st Class       no              0


In [2]:
# Create a mapping for the 'Grade' column to convert it to a numerical format.
# This is a form of ordinal encoding, where the order of the grades is preserved.
grade_map = {"1st Class": 1, "2nd Class": 2, "3rd Class": 3}
df['Grade_numeric'] = df['Grade'].map(grade_map)

In [3]:
# Bin the 'Age' column into different age groups.
# 'pd.cut' is used to segment and sort data values into bins.
df['Age_group'] = pd.cut(df['Age'], bins=[0, 18, 20, 100], labels=['Teen', 'Young Adult', 'Adult'])

In [4]:
# Display the 'Age', 'Age_group', 'Grade', and 'Grade_numeric' columns to see the results of the feature extraction.
print(df[['Age', 'Age_group', 'Grade', 'Grade_numeric']].head())

   Age    Age_group      Grade  Grade_numeric
0   19  Young Adult  1st Class              1
1   20  Young Adult  2nd Class              2
2   18         Teen  1st Class              1
3   21        Adult  2nd Class              2
4   19  Young Adult  1st Class              1


In [5]:
# Export the DataFrame with the new features to a CSV file.
# The index is excluded from the output file.
df.to_csv("student_feature_extraction.csv", index=False)