In [16]:
#1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv("D:/ai workforce/21 august/ridge_correlated_150.csv")

X = df[["x1", "x2"]]
y = df["y"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

alphas = np.logspace(-3, 3, 50)
ridge = RidgeCV(alphas=alphas)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)

print("Best alpha:", ridge.alpha_)
print("Coefficients:", ridge.coef_)
print("Intercept:", ridge.intercept_)
print("Rsquare:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

Best alpha: 0.0071968567300115215
Coefficients: [ 4.41273599 -3.41175106]
Intercept: 0.012383721735237843
Rsquare: 0.6538727524919414
RMSE: 0.5705925764636008


In [17]:
#2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv("D:/ai workforce/21 august/ridge_10feat_150.csv")

X = df[["x1", "x2"]]
y = df["y"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

alphas = np.logspace(-3, 3, 50)
ridge_cv = RidgeCV(alphas=alphas)
ridge_cv.fit(X_train, y_train)
y_pred = ridge_cv.predict(X_test)

print("Best alpha:", ridge_cv.alpha_)
print("Coefficients:", ridge_cv.coef_)
print("Intercept:", ridge_cv.intercept_)
print("Rsquare:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

Best alpha: 0.1206792640639329
Coefficients: [ 3.87623039 -1.51643541]
Intercept: -0.013726759187761978
Rsquare: 0.9808450980365264
RMSE: 0.7713843686168288


In [18]:
#3
import pandas as pd
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

df = pd.read_csv("D:/ai workforce/21 august/lasso_sparse_150.csv")

X, y = df.iloc[:, :-1], df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LassoCV(cv=5).fit(X_train, y_train)

print("Best alpha:", model.alpha_)
print("Selected features:", X.columns[model.coef_ != 0].tolist())
print("Rsquare:", r2_score(y_test, model.predict(X_test)))
print("RMSE:", np.sqrt(mean_squared_error(y_test, model.predict(X_test))))

Best alpha: 0.06662744526920758
Selected features: ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x11', 'x12', 'x13', 'x16', 'x18', 'x21', 'x23', 'x24', 'x26', 'x28', 'x29']
Rsquare: 0.9620885670252837
RMSE: 1.1860751609124554


In [19]:
#4
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

df = pd.read_csv("D:/ai workforce/21 august/lasso_groups_150.csv")
X = df[["xA","xB","xC","xD","xE"]]
y = df["y"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

lasso = LassoCV(cv=5, random_state=0).fit(X_train_s, y_train)

coef_original = lasso.coef_ / scaler.scale_
intercept_original = lasso.intercept_ - (lasso.coef_ @ (scaler.mean_ / scaler.scale_))


y_pred = lasso.predict(X_test_s)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Best alpha:", lasso.alpha_)
print("Coefficients (original scale):")
for f, c in zip(X.columns, coef_original):
    print(f"{f}: {c:.6f}")
print("Intercept (original scale):", intercept_original)
print("Test Rsquare:", r2)
print("Test RMSE:", rmse)

Best alpha: 0.006791843214729933
Coefficients (original scale):
xA: 2.019080
xB: -0.000000
xC: 1.573574
xD: 0.000000
xE: -0.145555
Intercept (original scale): -0.05612014831050652
Test Rsquare: 0.918738934016131
Test RMSE: 0.8746060348849793


In [20]:
#5
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error


df = pd.read_csv("D:/ai workforce/21 august/poly_quadratic_150.csv")
X, y = df[["x"]], df["y"]
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.25, random_state=42)


lin = LinearRegression().fit(Xtr, ytr)
ypl = lin.predict(Xte)

print("Linear R²:", r2_score(yte, ypl))
print("Linear RMSE:", np.sqrt(mean_squared_error(yte, ypl)))


quad = Pipeline([("poly", PolynomialFeatures(2, include_bias=False)),
                 ("lin", LinearRegression())]).fit(Xtr, ytr)
ypq = quad.predict(Xte)
lr = quad.named_steps["lin"]

print("intercept:", lr.intercept_)
print("coefficients:", dict(zip(["x","x²"], lr.coef_)))
print("Rsquare:", r2_score(yte, ypq))
print("RMSE:", np.sqrt(mean_squared_error(yte, ypq)))


print("Prediction at x=1.5:", quad.predict(pd.DataFrame([[1.5]], columns=["x"]))[0])



Linear R²: 0.8538381605808614
Linear RMSE: 1.8960081155340847
intercept: 1.962255007965543
coefficients: {'x': np.float64(3.0503360329717104), 'x²': np.float64(-0.6969014507509734)}
Rsquare: 0.9698005933583901
RMSE: 0.861831566389018
Prediction at x=1.5: 4.969730793233419
