
# Linear Regression Projects with F1 Score

This notebook includes 5 different linear regression projects using datasets from `sklearn`.
Each project calculates the F1 Score for evaluation.
Note: F1 Score is typically for classification problems. However, we'll simulate classification by thresholding the regression outputs.


## Project: Diabetes Dataset

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import f1_score
from sklearn.datasets import load_diabetes
data = load_diabetes()

# Prepare data
X = data.data
y = data.target

# Convert regression target to binary classification for F1 Score
threshold = np.median(y)
y_binary = (y >= threshold).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and threshold
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)

# Evaluate with F1 Score
f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", f1)


## Project: California Housing Dataset

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing()

# Prepare data
X = data.data
y = data.target

# Convert regression target to binary classification for F1 Score
threshold = np.median(y)
y_binary = (y >= threshold).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and threshold
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)

# Evaluate with F1 Score
f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", f1)


## Project: Boston Housing Dataset

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import f1_score
from sklearn.datasets import load_boston
data = load_boston()  # Deprecated in new versions

# Prepare data
X = data.data
y = data.target

# Convert regression target to binary classification for F1 Score
threshold = np.median(y)
y_binary = (y >= threshold).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and threshold
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)

# Evaluate with F1 Score
f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", f1)


## Project: Diabetes Dataset (variation)

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import f1_score
from sklearn.datasets import load_diabetes
data = load_diabetes()

# Prepare data
X = data.data
y = data.target

# Convert regression target to binary classification for F1 Score
threshold = np.median(y)
y_binary = (y >= threshold).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and threshold
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)

# Evaluate with F1 Score
f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", f1)


## Project: California Housing Dataset (variation)

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing()

# Prepare data
X = data.data
y = data.target

# Convert regression target to binary classification for F1 Score
threshold = np.median(y)
y_binary = (y >= threshold).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and threshold
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)

# Evaluate with F1 Score
f1 = f1_score(y_test, y_pred_binary)
print("F1 Score:", f1)
