In [None]:
import pandas as pd

file_path = "/content/Student_Marks.csv"
df = pd.read_csv(file_path)

print("Dataset Loaded Successfully!")
print("Shape:", df.shape)
df.head()


Dataset Loaded Successfully!
Shape: (100, 3)


Unnamed: 0,number_courses,time_study,Marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [None]:
df = df.drop_duplicates()
df = df.fillna(df.mean(numeric_only=True))
df.columns = df.columns.str.strip().str.lower()
print("After Cleaning:")
df.head()


After Cleaning:


Unnamed: 0,number_courses,time_study,marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [None]:

if "math score" in df.columns and "reading score" in df.columns:
    df["total_score"] = df["math score"] + df["reading score"]

if "math score" in df.columns:
    df["result"] = df["math score"].apply(lambda x: "Pass" if x >= 35 else "Fail")

print("Feature Engineering Done!")
df.head()


Feature Engineering Done!


Unnamed: 0,number_courses,time_study,marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [None]:
df.to_csv("cleaned_student_data.csv", index=False)
print("Saved as cleaned_student_data.csv")


Saved as cleaned_student_data.csv


In [None]:
X = df.copy()

numerical_cols = X.select_dtypes(include=['int64','float64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

print("Numerical Columns:", list(numerical_cols))
print("Categorical Columns:", list(categorical_cols))


Numerical Columns: ['number_courses', 'time_study', 'marks']
Categorical Columns: []


In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
numeric_transform = StandardScaler()
categorical_transform = OneHotEncoder(handle_unknown="ignore")

preprocess = ColumnTransformer(
    transformers=[
        ("num", numeric_transform, numerical_cols),
        ("cat", categorical_transform, categorical_cols)
    ]
)
pipeline = Pipeline(steps=[
    ("preprocess", preprocess),
    ("pca", PCA(n_components=2))
])
processed_data = pipeline.fit_transform(X)

print("Pipeline Completed!")
print("PCA Output Shape:", processed_data.shape)


Pipeline Completed!
PCA Output Shape: (100, 2)


In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, learning_rate='auto', init='random')
tsne_output = tsne.fit_transform(processed_data)

print("t-SNE Completed!")
tsne_output[:5]


t-SNE Completed!


array([[-4.094304 , -0.6664835],
       [-3.7178166,  4.8258715],
       [-3.5110874,  1.7075298],
       [ 1.950346 , -3.854578 ],
       [ 3.0191123, -4.5963717]], dtype=float32)