In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("cleaned_data.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df["weight_kg"].value_counts()

In [None]:
df.isnull().sum()

In [None]:
df["age_years"] = df["age"]
df["weight_kg"] = df["weight"]

In [None]:
df.drop(columns=["cattle-id","age","weight"],inplace=True)

In [None]:
df.head()

In [None]:
def clean_age(data):
    data = data.lower()
    data = data.replace("years","")
    return data

In [None]:
def clean_weight(data):
    data = data.lower()
    data = data.replace("kg","")
    return data

In [None]:
df["age_years"] = df["age_years"].apply(clean_age)
df["weight_kg"] = df["weight_kg"].apply(clean_weight)

In [None]:
df.head()

In [None]:
df["breed"].value_counts()

In [None]:
sns.countplot(data=df, x="breed", palette="Set2")
plt.title("Breed Distribution", fontsize=14)
plt.xlabel("Breed", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.xticks(rotation=45, fontsize=10)
plt.yticks(fontsize=10)
plt.show()

In [None]:
sns.barplot(data=df, x="color", y="price", palette="pastel")
plt.title("Average Price by Color", fontsize=14)
plt.xlabel("Color", fontsize=12)
plt.ylabel("Average Price", fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.pipeline import Pipeline

In [None]:
X = df.drop(columns=["price"])
y = df["price"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)

In [None]:
transformer = ColumnTransformer(transformers=[
    ('tnf1',StandardScaler(),["age_years","weight_kg"]),
    ('tnf2',OneHotEncoder(sparse_output=False,drop='first'),['color','breed'])
],remainder='passthrough')

In [None]:
transformer.fit(X_train)
X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

y_train_2d = y_train.values.reshape(-1, 1)
y_test_2d = y_test.values.reshape(-1, 1)
scaler.fit(y_train_2d)
y_train = scaler.transform(y_train_2d)
y_test = scaler.transform(y_test_2d)


In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
r2 = r2_score(y_pred,y_test)
mae = mean_absolute_error(y_pred,y_test)
mse = mean_squared_error(y_pred,y_test)

In [None]:
print("R2 Score",r2)
print("mae",mse)
print("mse",mse)

In [None]:
pipe = Pipeline([
    ('preprocessing', transformer),
    ('regressor', lr)
])