In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.linear_model import Ridge, Lasso
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression

In [None]:
df=pd.read_csv("/content/student_performance_with_noise.csv")

In [None]:
df.head()

In [None]:
data=df.copy()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
X = df.drop('final_grade', axis=1)
y = df['final_grade']  

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train) 

In [None]:
pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)  
r2 = r2_score(y_test, y_pred) 
print("Mean Squared Error:", mse)
print("R² Score:", r2)

In [None]:
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")              

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Grades")
plt.ylabel("Predicted Grades")
plt.title("Actual vs Predicted Final Grades")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()

In [None]:
from sklearn.metrics import r2_score
y_train_pred = model.predict(X_train)
r2_train = r2_score(y_train, y_train_pred)

print("R² on training:", r2_train)             
print("R² on test:", r2_score(y_test, y_pred))

In [None]:
models = {
    "LinearRegression": LinearRegression(),
    "Ridge": Ridge(alpha=1.0),            #اللاسو بتصفر الريدج بتقلل
    "Lasso": Lasso(alpha=0.1, max_iter=10000)
}

In [None]:
for name, model in models.items():
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    print(f"--- {name} ---")                     #بنجيب الاكيورسي بعد الريجوريزيشن
    print(f"R² on Training: {r2_score(y_train, y_train_pred):.4f}")
    print(f"R² on Test: {r2_score(y_test, y_test_pred):.4f}")
    print(f"MSE on Test: {mean_squared_error(y_test, y_test_pred):.4f}")
    print()

In [None]:
r2_train_scores = []
r2_test_scores = []
mse_test_scores = []
model_names = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    r2_train_scores.append(r2_score(y_train, y_train_pred))
    r2_test_scores.append(r2_score(y_test, y_test_pred))
    mse_test_scores.append(mean_squared_error(y_test, y_test_pred))
    model_names.append(name)
x = np.arange(len(model_names))
width = 0.25

plt.figure(figsize=(10, 6))
plt.bar(x - width, r2_train_scores, width=width, label='R² Train')
plt.bar(x, r2_test_scores, width=width, label='R² Test')
plt.bar(x + width, mse_test_scores, width=width, label='MSE Test')

plt.xlabel('Model')
plt.title('Model Performance Comparison')
plt.xticks(x, model_names)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
alphas = [0.001, 0.01, 0.1, 1, 10, 100]

print("🔹 Lasso Results with Different Alpha:")
for alpha in alphas:
    model = Lasso(alpha=alpha, max_iter=10000)      #علشان هي كمبلكس اكتر من الريدج
    model.fit(X_train_scaled, y_train)
    r2 = r2_score(y_test, model.predict(X_test_scaled))
    print(f"Lasso(alpha={alpha}): R² Test = {r2:.4f}")

print("\n🔹 Ridge Results with Different Alpha:")
for alpha in alphas:
    model = Ridge(alpha=alpha)
    model.fit(X_train_scaled, y_train)
    r2 = r2_score(y_test, model.predict(X_test_scaled))
    print(f"Ridge(alpha={alpha}): R² Test = {r2:.4f}")

In [None]:
X, y = make_regression(n_samples=100, n_features=5, noise=0.1)
model = LinearRegression()
cv_scores = cross_val_score(model, X, y, cv=5)            #ده كود الكورس فاليدشن
print(f"CV Scores (Linear Regression): {cv_scores}")
print(f"Mean CV Score: {cv_scores.mean()}")


In [None]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Lasso Regression": Lasso(alpha=0.01, max_iter=10000)
}

#بنجيب الاكيورسي بتعهم بعد ما عالجنا الاوفرفيتنج
best_score = -np.inf
best_model_name = None

print("Model Comparison:\n")
for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    mean_score = scores.mean()
    print(f"{name} - CV Scores: {scores}")
    print(f"Mean CV Score (R²): {mean_score:.8f}\n")

    if mean_score > best_score:
        best_score = mean_score
        best_model_name = name

print("="*50)
print(f"✅ Best Model: {best_model_name}")
print(f"📈 Best Mean CV Score (R²): {best_score:.8f}")
print("="*50)

In [None]:
models = ['Linear Regression', 'Polynomial Regression (deg=2)', 'Ridge Regression', 'Lasso Regression']
mean_scores = [0.99999860, 0.99999831, 0.99984100, 0.99999853]
best_index = mean_scores.index(max(mean_scores))
colors = ['skyblue' if i != best_index else 'lightgreen' for i in range(len(models))]
plt.figure(figsize=(10, 6))
bars = plt.barh(models, mean_scores, color=colors)
plt.xlim(0.99984, 1)
plt.xlabel("Mean CV Score (R²)")
plt.title("Model Comparison - Mean Cross-Validation Score")
for bar, score in zip(bars, mean_scores):
    xval = bar.get_width()
    plt.text(xval + 0.000001, bar.get_y() + bar.get_height()/2, f"{score:.8f}", va='center', fontsize=10)

plt.tight_layout()
plt.show()
