In [1]:
## Importing the Library packages

import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

## Importing the datasets

In [2]:
data = pd.read_csv("../input/students-performance-in-exams/StudentsPerformance.csv")

data

In [3]:
## Encoding the data from categorical to numerical

encoder = LabelEncoder()

data['gender'] = encoder.fit_transform(data['gender'])
gender_mapping = {index: label for index, label in enumerate(encoder.classes_)}

data['race/ethnicity'] = encoder.fit_transform(data['race/ethnicity'])
ethnicity_mapping = {index: label for index, label in enumerate(encoder.classes_)}

data['parental level of education'] = encoder.fit_transform(data['parental level of education'])
parental_education_mapping = {index: label for index, label in enumerate(encoder.classes_)}

data['lunch'] = encoder.fit_transform(data['lunch'])
lunch_mapping = {index: label for index, label in enumerate(encoder.classes_)}

data['test preparation course'] = encoder.fit_transform(data['test preparation course'])
test_prep_mapping = {index: label for index, label in enumerate(encoder.classes_)}

In [4]:
gender_mapping

## Data Analysis

In [5]:
report = ProfileReport(data)

In [6]:
report.to_notebook_iframe()

## Spliting the datasets

**Spliting the data into Targets and Features**

In [7]:
X = data[["math score", "reading score", "writing score"]]

X

In [8]:
y_math = X["math score"]
y_reading = X["reading score"]
y_writing = X["writing score"]

## Scaling the data

In [9]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [10]:
X_math = X[["reading score", "writing score"]]
X_reading = X[["writing score", "math score"]]
X_writing = X[["math score", "reading score"]]

In [11]:
X_math_train, X_math_test, y_math_train, y_math_test = train_test_split(X_math, y_math, train_size=0.7)
X_reading_train, X_reading_test, y_reading_train, y_reading_test = train_test_split(X_reading, y_reading, train_size=0.7)
X_writing_train, X_writing_test, y_writing_train, y_writing_test = train_test_split(X_writing, y_writing, train_size=0.7)

## Simple Model Prediction

In [12]:
math_model = LinearRegression()
reading_model = LinearRegression()
writing_model = LinearRegression()

math_model.fit(X_math_train, y_math_train)
reading_model.fit(X_reading_train, y_reading_train)
writing_model.fit(X_writing_train, y_writing_train)

In [13]:
math_r2 = math_model.score(X_math_test, y_math_test)
reading_r2 = reading_model.score(X_reading_test, y_reading_test)
writing_r2 = writing_model.score(X_writing_test, y_writing_test)

In [14]:
print(f"Math R^2: {math_r2}")
print(f"Reading R^2: {reading_r2}")
print(f"Writing R^2: {writing_r2}")