In [1]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import matplotlib.pyplot as plt
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import joblib
# from sklearn.metrics import mean_squared_error, r2_score

Mounted at /content/drive


In [2]:
def upload_dataset(path, feature_list):
  data = pd.read_csv(path)
  or_df = pd.DataFrame(data)
  df = or_df[feature_list]
  X = df.iloc[:, :-1]
  y = df.iloc[:, -1]
  return X, y

In [3]:
def split_dataset(X, y, test_size, val_size):
  X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size, random_state=42)
  X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=val_size, random_state=42)
  return X_train, X_val, X_test, y_train, y_val, y_test

In [4]:
X, y = upload_dataset(
    path="/content/drive/MyDrive/25-2/StudentPerformanceFactors.csv",
    feature_list=['Hours_Studied', 'Attendance', 'Extracurricular_Activities', 'Sleep_Hours', 'Tutoring_Sessions', 'Physical_Activity', 'Exam_Score']
                      )

# one-hot encoding
X = pd.get_dummies(X, columns=['Extracurricular_Activities'], drop_first=True)

X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(X, y, 0.2, 0.5)

print("Train 데이터 크기: ", len(X_train))
print("Validation 데이터 크기: ", len(X_val))
print("Test 데이터 크기: ", len(X_test))

data_size = torch.tensor([len(X_train), len(X_val), len(X_test)])
pd.DataFrame(data_size, index=["train", "val", "test"], columns=["value"])

Train 데이터 크기:  5285
Validation 데이터 크기:  661
Test 데이터 크기:  661


Unnamed: 0,value
train,5285
val,661
test,661


In [5]:
class multiregressionmodel:
  def __init__(self, degree: int):
    self.model = LinearRegression()
    self.poly = PolynomialFeatures(degree)

  def train(self, x, y):
    x = self.poly.fit_transform(x)
    y = y.values.reshape(-1, 1)
    self.model.fit(x, y)

  def predict(self, x):
    x = self.poly.fit_transform(x)
    return self.model.predict(x)

In [6]:
model = multiregressionmodel(degree=2)
model.train(X_train, y_train)

In [7]:
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
print(poly.get_feature_names_out())

['1' 'Hours_Studied' 'Attendance' 'Sleep_Hours' 'Tutoring_Sessions'
 'Physical_Activity' 'Extracurricular_Activities_Yes' 'Hours_Studied^2'
 'Hours_Studied Attendance' 'Hours_Studied Sleep_Hours'
 'Hours_Studied Tutoring_Sessions' 'Hours_Studied Physical_Activity'
 'Hours_Studied Extracurricular_Activities_Yes' 'Attendance^2'
 'Attendance Sleep_Hours' 'Attendance Tutoring_Sessions'
 'Attendance Physical_Activity'
 'Attendance Extracurricular_Activities_Yes' 'Sleep_Hours^2'
 'Sleep_Hours Tutoring_Sessions' 'Sleep_Hours Physical_Activity'
 'Sleep_Hours Extracurricular_Activities_Yes' 'Tutoring_Sessions^2'
 'Tutoring_Sessions Physical_Activity'
 'Tutoring_Sessions Extracurricular_Activities_Yes' 'Physical_Activity^2'
 'Physical_Activity Extracurricular_Activities_Yes'
 'Extracurricular_Activities_Yes^2']


In [8]:
joblib.dump(model, "polynomial_model.joblib")

print("Model saved to polynomial_model.joblib")

Model saved to polynomial_model.joblib
