In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, root_mean_squared_error, mean_absolute_error

sns.set_style()
sns.set_theme()

In [None]:
'''
features:

"length" - longest shell measurement.
"diameter" - perpendicular to the length.
"height" - measured with meat in the shell.
"whole_wt" - whole abalone weight.
"shucked_wt" - the weight of abalone meat.
"viscera_wt" - gut-weight.
"shell_wt" - the weight of the dried shell.

target:
"age" - the age of the abalone.
'''

data = pd.read_csv('data/abalone2.csv')
data

In [None]:
_ = data.hist(figsize=(10,10))

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Specify target column
target_column = "age"  # Replace with actual target column name

# Filter out the target column from features
feature_columns = [col for col in data.columns if col != target_column]

# Set grid layout
cols_per_row = 3  # Adjust for your preference
num_plots = len(feature_columns)
rows = (num_plots // cols_per_row) + (num_plots % cols_per_row > 0)

# Create subplots
fig, axes = plt.subplots(rows, cols_per_row, figsize=(16, 8))
axes = axes.flatten()  # Flatten in case of multiple rows

# Plot each feature against the target column
for i, col in enumerate(feature_columns):
    axes[i].scatter(data[col], data[target_column], alpha=0.7, color="blue")
    axes[i].set_xlabel(col)
    axes[i].set_ylabel(target_column)
    axes[i].set_title(f"{col} vs {target_column}")

# Remove empty subplots if any
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()

In [None]:
data.columns

In [16]:
feature_names = ['length', 'diameter', 'height', 'whole_wt', 'shucked_wt', 'viscera_wt', 'shell_wt']
features = data[feature_names]
target = data['age']

In [17]:
lr = LinearRegression()

In [None]:
lr.fit(features, target)

In [None]:
lr.coef_

In [None]:
lr.feature_names_in_

In [21]:
predicted_values = lr.predict(features)

In [None]:
root_mean_squared_error(target, predicted_values)

In [None]:
r2_score(target, predicted_values)