In [ ]:
import pandas as pd
df = pd.read_csv("StudentsPerformance.csv")
print("--- Data Information ---")
df.info()
print("\n--- Missing Values Check ---")
missing_values = df.isnull().sum()
if missing_values.sum() == 0:
    print("Dataset is clean. No missing values.")
else:
    print("Missing values found. Dropping rows with missing data.")
    df.dropna(inplace=True)
print(f"Total rows after cleaning: {len(df)}")

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

X = df[['reading score']]
Y = df['writing score']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

r2 = r2_score(Y_test, Y_pred)
coefficient = model.coef_[0]
intercept = model.intercept_

print("R^2:", r2)
print("Coefficient:", coefficient)
print("Intercept:", intercept)
print(f"Regression Equation: Y = {coefficient} * X + {intercept}")

In [ ]:
plt.figure(figsize=(10, 6))
plt.scatter(X_test, Y_test, alpha=0.6)
plt.plot(X_test, Y_pred, linewidth=2)
plt.xlabel('Reading Score')
plt.ylabel('Writing Score')
plt.title('Linear Regression Plot')
plt.grid(True)
plt.show()