In [None]:
#p5
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Step 1: Load dataset
file_path = "N:\CS2225 DS\Datasets\Program5_StudentsPerformance.csv"
try:
    df = pd.read_csv(file_path, encoding='latin1')  
    print("Original Data:\n", df.head())
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please check the file path.")
    exit(1)
except pd.errors.EmptyDataError:
    print(f"Error: File '{file_path}' is empty or invalid.")
    exit(1)
except Exception as e:
    print(f"Error reading CSV file: {e}")
    exit(1)


# Step 2: Check if required columns exist
required_columns = ['reading score', 'math score']  
if not all(col in df.columns for col in required_columns):
    print(f"Error: Required columns {required_columns} not found. Available columns: {list(df.columns)}")
    exit(1)


# Step 3: Prepare data
try:
    X = df[['reading score']]  
    y = df['math score']       
except KeyError as e:
    print(f"Error accessing columns: {e}")
    exit(1)


# Step 4: Train/Test split
try:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
except Exception as e:
    print(f"Train/test split failed: {e}")
    exit(1)


# Step 5: Model training
try:
    model = LinearRegression()
    model.fit(X_train, y_train)
except Exception as e:
    print(f"Model training failed: {e}")
    exit(1)


# Step 6: Prediction and Evaluation
try:
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)
except Exception as e:
    print(f"Prediction or evaluation failed: {e}")
    exit(1)


# Step 7: Predict new value
try:
    new_value = [[70]]  
    predicted_score = model.predict(new_value)
    print(f"Predicted Math Score for Reading Score of 70: {predicted_score[0]:.2f}")
except Exception as e:
    print(f"Prediction for new value failed: {e}")
    exit(1)


Original Data:
    gender race/ethnicity parental level of education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3    male        group A          associate's degree  free/reduced   
4    male        group C                some college      standard   

  test preparation course  math score  reading score  writing score  
0                    none          72             72             74  
1               completed          69             90             88  
2                    none          90             95             93  
3                    none          47             57             44  
4                    none          76             78             75  
Mean Squared Error: 77.75953982761706
Predicted Math Score for Reading Score of 70: 66.87




In [None]:
#ex5
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

data = {
    'Hours': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'Scores': [22, 30, 35, 47, 50, 60, 66, 72, 80, 85, 89, 95]
}
df = pd.DataFrame(data)
print("Original Data:\n", df)


X = df[['Hours']]    
y = df['Scores']     


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\nModel Evaluation:")
print("Mean Squared Error:", mse)
print("R² Score:", r2)

print("\nModel Coefficients:")
print("Slope (Coefficient):", model.coef_[0])
print("Intercept:", model.intercept_)

new_hours = np.array([[9.5]])
predicted_score = model.predict(new_hours)
print(f"\nPredicted Score for 9.5 study hours: {predicted_score[0]:.2f}")


Original Data:
     Hours  Scores
0       1      22
1       2      30
2       3      35
3       4      47
4       5      50
5       6      60
6       7      66
7       8      72
8       9      80
9      10      85
10     11      89
11     12      95

Model Evaluation:
Mean Squared Error: 4.05795049051737
R² Score: 0.9956901635101892

Model Coefficients:
Slope (Coefficient): 6.688547486033517
Intercept: 17.826815642458115

Predicted Score for 9.5 study hours: 81.37


