In [38]:
import pandas as pd
data = {
    'number': [10, 20, 30, 40, 50]
}

# Corresponding labels
labels = [1, 2, 3, 4, 5]

# Create DataFrame for training data (X)
X = pd.DataFrame(data)

# Create DataFrame for labels (y)
y = pd.Series(labels)

In [39]:
data

{'number': [10, 20, 30, 40, 50]}

# Regression Algorithms

# Linear Regression:

In [40]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Assuming you have 'X' as the feature matrix and 'y' as the target vector

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

# Create and train the linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Linear Regression Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Linear Regression Model Performance:
Mean Squared Error: 3.75
R-squared: -0.7142857142857142


# Logistic Regression:

In [41]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have 'X' as the feature matrix and 'y' as the binary target vector (0 or 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logreg_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Logistic Regression Model Performance:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)




ValueError: pos_label=1 is not a valid label: array([2, 5], dtype=int64)

# K-Means:

In [34]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Assuming you have 'X' as the feature matrix

# Scale the features for K-Means
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create and train the K-Means model
num_clusters = 3  # You can adjust the number of clusters as needed
kmeans_model = KMeans(n_clusters=num_clusters, random_state=42)
kmeans_model.fit(X_scaled)

# Get the cluster assignments for each data point
cluster_labels = kmeans_model.labels_

# (Optional) Get the cluster centers
cluster_centers = kmeans_model.cluster_centers_

# (Optional) Get the sum of squared distances to the nearest cluster center for each point
inertia = kmeans_model.inertia_

print("K-Means Clustering Results:")
print("Cluster Labels:", cluster_labels)
print("Cluster Centers:", cluster_centers)
print("Sum of Squared Distances (Inertia):", inertia)


K-Means Clustering Results:
Cluster Labels: [1 1 2 0 0]
Cluster Centers: [[ 1.06066017 -1.06066017  1.06066017]
 [-1.06066017  1.06066017 -1.06066017]
 [ 0.          0.          0.        ]]
Sum of Squared Distances (Inertia): 1.4999999999999998


# Clasification Algorithms

# Decision Tree:

In [24]:
from sklearn.tree import DecisionTreeRegressor
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

# Create and train the decision tree model
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = dt_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Decision Tree Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Decision Tree Model Performance:
Mean Squared Error: 0.75
R-squared: -3.0


# Random Forest:

In [25]:
from sklearn.ensemble import RandomForestRegressor
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

# Create and train the random forest model
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Random Forest Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Random Forest Model Performance:
Mean Squared Error: 0.75
R-squared: -3.0




# Support Vector Machine (SVM):

In [26]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

# Scale the features for SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the SVM model
svm_model = SVR(kernel='linear')
svm_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Support Vector Machine (SVM) Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Support Vector Machine (SVM) Model Performance:
Mean Squared Error: 0.75
R-squared: -3.0


# K-Nearest Neighbors (KNN):

In [27]:
from sklearn.neighbors import KNeighborsRegressor
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=42)

# Create and train the KNN model
knn_model = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed
knn_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("K-Nearest Neighbors (KNN) Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


ValueError: Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 5

# Gradient Boosting (e.g., XGBoost):

In [30]:
from xgboost import XGBRegressor

# Create and train the XGBoost model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)  # You can adjust hyperparameters
xgb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = xgb_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Gradient Boosting (XGBoost) Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Gradient Boosting (XGBoost) Model Performance:
Mean Squared Error: 0.7455683661428374
R-squared: -2.976364619428466


# Naive Bayes:

In [31]:
from sklearn.naive_bayes import GaussianNB

# Create and train the Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Naive Bayes Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Naive Bayes Model Performance:
Mean Squared Error: 0.75
R-squared: -3.0


  n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
  (self.sigma_[i, :]), 1)
  (self.sigma_[i, :]), 1)


# Support Vector Machines (SVM) with RBF Kernel:

In [32]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

# Scale the features for SVM with RBF kernel
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the SVM model with RBF kernel
svm_model = SVR(kernel='rbf', C=1.0, gamma='scale')  # You can adjust hyperparameters C and gamma as needed
svm_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Support Vector Machine (SVM) with RBF Kernel Model Performance:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)


Support Vector Machine (SVM) with RBF Kernel Model Performance:
Mean Squared Error: 0.75
R-squared: -3.0


# Principal Component Analysis (PCA):

In [35]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Assuming you have 'X' as the feature matrix

# Scale the features for PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create and apply PCA to reduce dimensionality
num_components = 2  # You can adjust the number of components as needed
pca = PCA(n_components=num_components)
X_pca = pca.fit_transform(X_scaled)

# (Optional) Get the explained variance ratio of the selected components
explained_variance_ratio = pca.explained_variance_ratio_

print("PCA Dimensionality Reduction Results:")
print("Explained Variance Ratio:", explained_variance_ratio)


PCA Dimensionality Reduction Results:
Explained Variance Ratio: [1.00000000e+00 5.54410523e-34]


# Support Vector Machines (SVM) for Multi-class Classification:

In [36]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have 'X' as the feature matrix and 'y' as the multi-class target vector

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the SVM model for multi-class classification
svm_model = SVC(kernel='linear', C=1.0)  # You can adjust hyperparameters as needed
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print("SVM for Multi-class Classification Performance:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


SVM for Multi-class Classification Performance:
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
