<a href="https://colab.research.google.com/github/saturn70/Python_Mercury/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Regression analysis

### It is a statistical technique used to model and analyze the **relationship** between a **dependent variable** and one or more **independent variables**.

# Common regression models -
### 1. Linear Regression
### 2. Polynomial Regression
### 3. Ridge Regression
### 4. Lasso Regression
### 5. ElasticNet Regression
### 6. Support Vector Regression
### 7. Decision Tree Regression
### 8. Random Forest Regression

## Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

## Load Data

In [None]:
# Load and prepare your data
data = pd.read_csv('https://raw.githubusercontent.com/saturn70/R_Projects/main/Trees/trees.csv')
data.head()

## Independent Variable - X
## Dependent Variable - y

In [None]:
X = data[['Girth','Height']]
y = data['Volume']

In [None]:
print(X.head())

In [None]:
print(y.head())

## Split the data into training and testing sets

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(X_train.head())
print(X_test.head())
print(y_train.head())
print(y_test.head())

## Create a model
## **LinearRegression()**

In [None]:
# Create a linear regression model
model = LinearRegression()
print(model)

In [None]:
# Train the model on the training data
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test data
y_pred = model.predict(X_test)
print(y_pred)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(mse)
print(r2)

In [None]:
# Visualize
plt.plot(X_test,y_test,color='blue')
plt.show()

In [None]:
plt.plot(X_test, y_pred, color='red', linewidth=2)
plt.show()

# Polynomial Regression
## Create a model

In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
print(X.head())

In [None]:
# Reshape X to a column vector
#X = X.reshape(10,2)
#X = np.column_stack((X1, X2))

In [None]:
# Create polynomial features (e.g., 2nd-degree polynomial)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

In [None]:
# Fit a linear regression model to the polynomial features
model = LinearRegression()
model.fit(X_poly, y)

In [None]:
# Generate predictions
X_pred = np.linspace(1, 5, 100).reshape(-2, 2)
#X_pred = np.linspace(1, 5, 100)
print(X_pred)

In [None]:
X_pred_poly = poly.transform(X_pred)
print(X_pred_poly)

In [None]:
y_pred = model.predict(X_pred_poly)
print(y_pred)

In [None]:
# Visualize the results
plt.plot(X, y, color='blue', label='Data')
plt.show()

In [None]:
plt.plot(X_pred, y_pred, color='red', label='Polynomial Regression (degree=2)')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

# Ridge Regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
#alpha = 1.0  # You can adjust the alpha value to control the regularization strength
ridge_model = Ridge(alpha=1)
ridge_model.fit(X_train, y_train)

In [None]:
y_pred = ridge_model.predict(X_test)
print(y_pred)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R2) Score: {r2}')

In [None]:
plt.plot(X_test, y_test, color='orange', label='Actual Data')
plt.show()

In [None]:
plt.plot(X_test, y_pred, color='red', linewidth=1.5, label='Ridge Regression')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

# Lasso Regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#alpha = 1.0  # You can adjust the alpha value to control the regularization strength
lasso_model = Lasso(alpha = alpha)
lasso_model.fit(X_train, y_train)

## Model Predict

In [None]:
y_pred=lasso_model.predict(X_test)
print(y_pred)

## Evaluate Model

In [None]:
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)
print(mse)
print(r2)

In [None]:
plt.plot(X_test, y_test, color='blue', label='Actual Data')
plt.show()

In [None]:
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Lasso Regression Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

# ElasticNet regression
## It is a linear regression technique that combines both L1 (Lasso) and L2 (Ridge) regularization. It adds a penalty term to the linear regression cost function that includes both L1 and L2 regularization terms.

## This combination allows ElasticNet to handle multicollinearity (correlation between independent variables) while also encouraging feature selection

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
alpha = 1.0  # You can adjust the alpha value to control the regularization strength
l1_ratio = 0.5  # You can adjust the ratio between L1 and L2 regularization
elasticnet_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
elasticnet_model.fit(X_train, y_train)

In [None]:
y_pred = elasticnet_model.predict(X_test)
print(y_pred)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R2) Score: {r2}')

In [None]:
plt.plot(X_test, y_test, color='blue', label='Actual Data')
plt.show()

In [None]:
plt.plot(X_test, y_pred, color='red', linewidth=2, label='ElasticNet Regression Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

# Support Vector Regression

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Example with a Radial Basis Function (RBF) kernel
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.2)
svr_model.fit(X_train, y_train)

In [None]:
y_pred = svr_model.predict(X_test)
print(y_pred)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R2) Score: {r2}')

In [None]:
plt.plot(X_test, y_test, color='blue', label='Actual Data')
plt.show()

In [None]:
plt.plot(X_test, y_pred, color='red', linewidth=2, label='SVR Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
decision_tree_model = DecisionTreeRegressor(random_state=0)  # You can specify other hyperparameters as well
decision_tree_model.fit(X, y)

In [None]:
y_pred = decision_tree_model.predict(X)

In [None]:
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R2) Score: {r2}')

In [None]:
plt.plot(X, y, color='blue', label='Actual Data')
plt.show()


In [None]:
plt.plot(X, y_pred, color='red', label='Decision Tree Regression Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()

# Random Forest

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#n_estimators = 100  # You can adjust the number of trees
random_forest_model = RandomForestRegressor(n_estimators=100, random_state=0)
random_forest_model.fit(X_train, y_train)

In [None]:
y_pred = random_forest_model.predict(X_test)
print(y_pred)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared (R2) Score: {r2}')

In [None]:
plt.plot(X_test, y_test, color='blue', label='Actual Data')
plt.show()