### House Prediction

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

data = {
    'Size_sqft': [850, 900, 1000, 1200, 1500, 1800, 2000],
    'Bedrooms': [2, 2, 3, 3, 4, 4, 5],
    'Price': [150000, 165000, 180000, 210000, 250000, 290000, 320000]
}

df = pd.DataFrame(data)
print("Dataset:\n", df)

# Features (X) and Target (y)
X = df[['Size_sqft', 'Bedrooms']].values
y = df['Price'].values.reshape(-1, 1)

X_norm = (X - X.mean(axis=0)) / X.std(axis=0)

# Add intercept term
X_b = np.c_[np.ones((X_norm.shape[0], 1)), X_norm]

# Parameters
m, n = X_b.shape
theta = np.zeros((n, 1))
alpha = 0.1  # learning rate
iterations = 200
cost_history = []

def compute_cost(X, y, theta):
    m = len(y)
    predictions = X.dot(theta)
    error = predictions - y
    cost = (1/(2*m)) * np.sum(error**2)
    return cost

# Gradient Descent
for i in range(iterations):
    gradients = (1/m) * X_b.T.dot(X_b.dot(theta) - y)
    theta -= alpha * gradients
    cost_history.append(compute_cost(X_b, y, theta))

print("\nParameters learned (theta):", theta.ravel())

plt.figure(figsize=(6,4))
plt.plot(range(iterations), cost_history, color='blue')
plt.xlabel('Iterations')
plt.ylabel('Cost (MSE)')
plt.title('Cost Convergence (Gradient Descent)')
plt.grid(True)
plt.show()

model = LinearRegression()
model.fit(X, y)

custom_pred = X_b.dot(theta)
sklearn_pred = model.predict(X)

plt.figure(figsize=(6,4))
plt.plot(df['Size_sqft'], y, color='black', linestyle='-',label='Actual Prices')
plt.scatter(df['Size_sqft'], custom_pred, color='red', label='Custom GD Predictions')
plt.scatter(df['Size_sqft'], sklearn_pred, color='green', marker='x', label='Sklearn Predictions')
plt.xlabel('Size (sqft)')
plt.ylabel('Price')
plt.title('House Price Predictions')
plt.legend()
plt.grid(True)
plt.show()


fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(df['Size_sqft'], df['Bedrooms'], y, color='black', label='Actual Prices')
ax.scatter(df['Size_sqft'], df['Bedrooms'], custom_pred, color='red', label='Custom GD Predictions')
ax.scatter(df['Size_sqft'], df['Bedrooms'], sklearn_pred, color='green', marker='x', label='Sklearn Predictions')

ax.set_xlabel('Size (sqft)')
ax.set_ylabel('Bedrooms')
ax.set_zlabel('Price')
ax.set_title('3D Visualization of Predictions')
ax.legend()
plt.show()

### Email Spams

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Dataset
data = {
    "Email": [
        "Win a lottery now", 
        "Limited time offer, buy now", 
        "Meeting at 10am tomorrow", 
        "Project deadline extended", 
        "Free money waiting for you", 
        "Let’s catch up for lunch", 
        "Claim your prize immediately", 
        "Important update about your account",
        "Discount on all items today",
        "Are we still on for the meeting?"
    ],
    "Label": ["Spam", "Spam", "Not Spam", "Not Spam", "Spam", "Not Spam", "Spam", "Not Spam", "Spam", "Not Spam"]
}

df = pd.DataFrame(data)
print("Dataset:\n", df)

# Feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["Email"])
y = df["Label"].map({"Not Spam": 0, "Spam": 1})  

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

Dataset:
                                  Email     Label
0                    Win a lottery now      Spam
1          Limited time offer, buy now      Spam
2             Meeting at 10am tomorrow  Not Spam
3            Project deadline extended  Not Spam
4           Free money waiting for you      Spam
5             Let’s catch up for lunch  Not Spam
6         Claim your prize immediately      Spam
7  Important update about your account  Not Spam
8          Discount on all items today      Spam
9     Are we still on for the meeting?  Not Spam

Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

Accuracy: 0.6666666666666666
