Title: Train a Linear Regression Model

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset (e.g., Boston Housing Dataset)
from sklearn.datasets import load_boston
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['PRICE'] = boston.target

# Split the data into training and testing sets
X = df.drop('PRICE', axis=1)
y = df['PRICE']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")



In [None]:
# Write your code here

Task 2: Predicting Car Mileage (MPG)<br>
Dataset: Use a dataset where features include horsepower, weight, and model year of cars.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_openml
import numpy as np

# Load the Auto MPG dataset
mpg_data = fetch_openml('autoMpg', version=1)
df = pd.DataFrame(mpg_data.data, columns=mpg_data.feature_names)
df['mpg'] = mpg_data.target

# Check for NaN values
print("NaN values before handling:", df.isnull().sum())

# Handle NaN values (e.g., replace with mean or median)
df = df.replace('?', np.nan)
df = df.apply(pd.to_numeric, errors='coerce')
df = df.fillna(df.mean())

# Split the data into training and testing sets
X = df.drop('mpg', axis=1)
y = df['mpg']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Task 3: Predicting Student's Scores'
Dataset: Use a dataset that contains study hours and corresponding student test scores.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# Create a sample dataset
data = {
    'Study Hours': [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
    'Scores': [40, 50, 60, 70, 80, 85, 90, 92, 95, 98]
}
df = pd.DataFrame(data)

# Split the data into training and testing sets
X = df[['Study Hours']]
y = df['Scores']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Plot the data and regression line
plt.scatter(df['Study Hours'], df['Scores'], label='Actual Scores')
plt.plot(df['Study Hours'], model.predict(df[['Study Hours']]), label='Regression Line', color='red')
plt.xlabel('Study Hours')
plt.ylabel('Scores')
plt.legend()
plt.show()
