In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load your dataset
df = pd.read_csv("cars.csv")

# Create polynomial features
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(df[['car_model', 'miles_driven', 'age']])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_poly, df['sales_price'], test_size=0.2, random_state=42)

# Train the polynomial regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Print the coefficients
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)


ValueError: could not convert string to float: 'Honda Civic'

Mean Squared Error: 84947.35932864537
Intercept: 7819.622186055067
Coefficients: [-5.53968077e-02 -3.09607384e+01 -3.40696268e-07  7.51696546e-04
  1.35537785e+00  1.16849434e+02 -8.10346003e+01  3.51527394e+01
 -6.99974095e+01]


In [9]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Preprocess the data
X = df[['car_model', 'miles_driven', 'age']]
y = df['sales_price']

# Perform one-hot encoding for car_model
X_encoded = pd.get_dummies(X, columns=['car_model'])

# Define the desired polynomial degree
degree = 3

# Create polynomial features
poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(X_encoded)

# Create and fit the polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Calculate RMSE using cross-validation
mse = -cross_val_score(model, X_poly, y, cv=5, scoring='neg_mean_squared_error')
rmse = np.sqrt(mse.mean())

# Print the polynomial equation and RMSE
equation = "sales_price = "
for i, coef in enumerate(model.coef_):
    equation += f" {round(coef, 2)} * X{[i]} +"
equation += f" {round(model.intercept_, 2)}"
print("Polynomial Regression Equation:", equation)
print("Root Mean Squared Error:", round(rmse, 2))

Polynomial Regression Equation: sales_price =  22.14 * X[0] + -3.09 * X[1] + -4.04 * X[2] + -106.19 * X[3] + -20.12 * X[4] + 22.14 * X[5] + 97.81 * X[6] + 6.32 * X[7] + -0.1 * X[8] + -0.03 * X[9] + 1.47 * X[10] + 1.62 * X[11] + 1.81 * X[12] + 1.6 * X[13] + 3.54 * X[14] + 16.12 * X[15] + 67.77 * X[16] + -25.51 * X[17] + 4.69 * X[18] + -39.65 * X[19] + -7.65 * X[20] + -105.71 * X[21] + -0.0 * X[22] + -0.0 * X[23] + 0.0 * X[24] + 0.0 * X[25] + -20.32 * X[26] + -0.0 * X[27] + -0.0 * X[28] + 0.0 * X[29] + 22.19 * X[30] + 0.0 * X[31] + -0.0 * X[32] + 97.52 * X[33] + 0.0 * X[34] + 6.31 * X[35] + 0.0 * X[36] + 0.0 * X[37] + 0.1 * X[38] + 0.1 * X[39] + 0.1 * X[40] + 0.1 * X[41] + 0.1 * X[42] + -0.0 * X[43] + 0.03 * X[44] + 0.03 * X[45] + 0.03 * X[46] + 0.03 * X[47] + 0.03 * X[48] + 1.71 * X[49] + 0.0 * X[50] + 0.0 * X[51] + 0.0 * X[52] + 0.0 * X[53] + 1.54 * X[54] + 0.0 * X[55] + 0.0 * X[56] + 0.0 * X[57] + 1.36 * X[58] + 0.0 * X[59] + 0.0 * X[60] + 1.56 * X[61] + 0.0 * X[62] + -0.36 * X[63] + 

In [10]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Combine features into a single array
X_array = X.values.array()

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(C, x) + np.dot(B, x**2)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_array]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


LinAlgError: Singular matrix

In [14]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x, C) + np.dot(x**2, B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_array]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (7,) and (2,) not aligned: 7 (dim 0) != 2 (dim 0)

In [15]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x, C) + np.dot(x**2, B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_array]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)



Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (7,) and (2,) not aligned: 7 (dim 0) != 2 (dim 0)

In [16]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x, C) + np.dot(x**2, B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_array]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (7,) and (2,) not aligned: 7 (dim 0) != 2 (dim 0)

In [17]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(C, x[1:]) + np.dot(x[1:]**2, B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_poly]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (2,) and (14,) not aligned: 2 (dim 0) != 14 (dim 0)

In [18]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x[1:], C) + np.dot(x[1:]**2, B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_poly]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (14,) and (2,) not aligned: 14 (dim 0) != 2 (dim 0)

In [19]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        X_poly = np.hstack((X_poly, X**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x[1:(degree+1)], C) + np.dot(x[(degree+1):], B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_poly]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (12,) and (2,) not aligned: 12 (dim 0) != 2 (dim 0)

In [20]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("cars.csv")

# Extract independent variables (features) and dependent variable
X = df[['miles_driven', 'age']]
y = df['sales_price']

# Convert categorical variable 'car_model' to numerical using one-hot encoding
X = pd.get_dummies(df['car_model']).join(X)

# Standardize the features (subtract mean and divide by standard deviation)
X_mean = X.mean()
X_std = X.std()
X_scaled = (X - X_mean) / X_std

# Combine features into a single array
X_array = X_scaled.values

# Define the degree of the polynomial
degree = 2

# Create polynomial features manually
def create_polynomial_features(X, degree):
    n_samples, n_features = X.shape
    X_poly = np.ones((n_samples, 1))  # Start with a column of ones for the bias term

    for d in range(1, degree + 1):
        for feature in range(n_features):
            X_poly = np.hstack((X_poly, X[:, feature].reshape(-1, 1)**d))

    return X_poly

X_poly = create_polynomial_features(X_array, degree)

# Solve for the coefficients using the normal equation
coefficients = np.linalg.inv(X_poly.T @ X_poly) @ X_poly.T @ y.values

# Extract coefficients for each term
D = coefficients[0]
C = coefficients[1:(degree+1)]
B = coefficients[(degree+1):(2*degree+1)]
A = np.zeros(degree)  # Since degree 1 terms don't exist in this example

# Print the coefficients
print("Coefficients:")
print("D =", D)
print("C =", C)
print("B =", B)
print("A =", A)

# Define the polynomial equation
def polynomial_equation(x):
    return D + np.dot(x[1:(degree+1)], C) + np.dot(x[(degree+1):], B)

# Predict the target variable on the test set
y_pred = [polynomial_equation(x) for x in X_poly]

# Calculate the root mean squared error manually
rmse = np.sqrt(np.mean((y - y_pred)**2))

# Print the root mean squared error
print("Root Mean Squared Error:", rmse)


Coefficients:
D = -259.1407783893079
C = [ 417.27492646 1180.65998044]
B = [3302.79918613  777.75570552]
A = [0. 0.]


ValueError: shapes (12,) and (2,) not aligned: 12 (dim 0) != 2 (dim 0)

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# Load the dataset
dataset_path = 'penguins.csv'
penguins_data = pd.read_csv(dataset_path)

# Assuming the target variable is 'body_mass_g'
X = penguins_data.drop('body_mass_g', axis=1)
y = penguins_data['body_mass_g']

# Preprocess the data if needed (handle missing values, encode categorical variables)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the decision tree regressor
regressor = DecisionTreeRegressor(random_state=42)
regressor.fit(X_train, y_train)

# Visualize the decision tree
plt.figure(figsize=(15, 10))
plot_tree(regressor, feature_names=X.columns, filled=True, rounded=True, fontsize=8)
plt.show()

# Make predictions on the test set
y_pred = regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


FileNotFoundError: [Errno 2] No such file or directory: 'your_dataset_path.csv'