In [None]:
# ============================================
# ðŸ“Œ Q1: Simple Linear Regression
# ============================================
import numpy as np
from sklearn.linear_model import LinearRegression

# Dataset
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([3, 6, 7, 8, 11])

# Train model
lin_model = LinearRegression()
lin_model.fit(X, y)

# Predictions
y_pred = lin_model.predict(X)

print("Coefficient:", lin_model.coef_)
print("Intercept:", lin_model.intercept_)
print("Predicted values:", y_pred)


# ============================================
# ðŸ“Œ Q2: Logistic Regression
# ============================================
from sklearn.linear_model import LogisticRegression

# Dataset
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([0, 0, 0, 1, 1, 1])

log_model = LogisticRegression()
log_model.fit(X, y)

y_pred = log_model.predict(X)
y_prob = log_model.predict_proba(X)

print("Coefficient:", log_model.coef_)
print("Intercept:", log_model.intercept_)
print("Predicted labels:", y_pred)
print("Predicted probabilities:", y_prob)


# ============================================
# ðŸ“Œ Q3: Preprocessing (Manual)
# ============================================
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# Dataset
data = pd.DataFrame({
    'age': [25, 30, np.nan, 40, 35],
    'salary': [50000, 60000, 55000, np.nan, 65000],
    'city': ['Delhi', 'Mumbai', 'Delhi', 'Chennai', 'Mumbai'],
    'target': [0, 1, 0, 1, 1]
})

X = data[['age', 'salary', 'city']]
y = data['target']

# Numeric preprocessing
num_features = ['age', 'salary']
imputer_num = SimpleImputer(strategy='mean')
X_num = imputer_num.fit_transform(X[num_features])
scaler = StandardScaler()
X_num = scaler.fit_transform(X_num)

# Categorical preprocessing
cat_features = ['city']
imputer_cat = SimpleImputer(strategy='most_frequent')
X_cat = imputer_cat.fit_transform(X[cat_features])
encoder = OneHotEncoder(handle_unknown='ignore')
X_cat = encoder.fit_transform(X_cat).toarray()

# Combine
X_processed = np.hstack((X_num, X_cat))
print("Processed feature matrix shape:", X_processed.shape)


# ============================================
# ðŸ“Œ Q4: Class Imbalance
# ============================================
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1])

print("Class distribution:", Counter(y))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

imb_model = LogisticRegression(class_weight='balanced')
imb_model.fit(X_train, y_train)

y_pred = imb_model.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


# ============================================
# ðŸ“Œ Q5: PCA (Dimensionality Reduction)
# ============================================
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Dataset: random 50 samples with 4 features
X = np.random.rand(50, 4)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

print("Explained variance ratio:", pca.explained_variance_ratio_)

plt.scatter(X_pca[:, 0], X_pca[:, 1])
plt.title("PCA Projection to 2D")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()


# ============================================
# ðŸ“Œ Q6: Support Vector Machine (SVM)
# ============================================
from sklearn import datasets
from sklearn.svm import SVC

iris = datasets.load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))


# ============================================
# ðŸ“Œ Q7: Clustering (KMeans)
# ============================================
from sklearn.cluster import KMeans

# Dataset
X = np.array([[1,2], [1,3], [2,2], [8,8], [9,9], [8,9]])

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

labels = kmeans.labels_
centers = kmeans.cluster_centers_

print("Cluster centers:\n", centers)

plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='x', s=200, label='Centers')
plt.title("KMeans Clustering")
plt.legend()
plt.show()


# ============================================
# ðŸ“Œ Q8: Decision Trees
# ============================================
from sklearn.tree import DecisionTreeClassifier, plot_tree

iris = datasets.load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

dt_model = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

plt.figure(figsize=(12,8))
plot_tree(dt_model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.title("Decision Tree (Iris Dataset)")
plt.show()