In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

data = {
    "Size_sqft": [1800, 2200, 1500, 2750, 2000, 3100, 1400, 2600],
    "Bedrooms":  [3, 3, 2, 4, 3, 4, 2, 4],
    "Age_yrs":   [10, 5, 15, 2, 7, 1, 20, 4],
    "Distance_km": [5.2, 3.8, 7.5, 2.0, 4.0, 1.5, 9.0, 3.0],
    "Price": ["84,00,000", "96,00,000", "65,00,000", "1,25,00,000",
              "88,00,000", "1,40,00,000", "58,00,000", "1,18,00,000"]
}

df = pd.DataFrame(data)

df["Price"] = df["Price"].str.replace(",", "").astype(int)

X = df[["Size_sqft", "Bedrooms", "Age_yrs", "Distance_km"]]
y = df["Price"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

new_house = pd.DataFrame({
    "Size_sqft": [2100],
    "Bedrooms": [3],
    "Age_yrs": [5],
    "Distance_km": [4]
})

new_house_scaled = scaler.transform(new_house)
predicted_price = model.predict(new_house_scaled)

print("Predicted Price for the new house:", int(predicted_price[0]))


Predicted Price for the new house: 9638600


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = {
    "Distance_km": [2.5, 5.2, 1.8, 9.0, 3.2, 7.5, 4.0, 6.8],
    "Traffic_Level(0–10)": [4, 7, 3, 8, 5, 7, 5, 6],
    "Items": [3, 5, 2, 6, 3, 8, 4, 7],
    "Weather(0=Clear,1=Rainy)": [0, 1, 0, 1, 0, 1, 1, 0],
    "Delivery_Time_min": [22, 46, 18, 69, 28, 55, 36, 48]
}

df = pd.DataFrame(data)

df["Log_Distance"] = np.log(df["Distance_km"])

X = df[[
    "Log_Distance",
    "Traffic_Level(0–10)",
    "Items",
    "Weather(0=Clear,1=Rainy)"
]]

y = df["Delivery_Time_min"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)


model = LinearRegression()
model.fit(X_train, y_train)

new_data = pd.DataFrame({
    "Log_Distance": [np.log(6)],
    "Traffic_Level(0–10)": [6],
    "Items": [4],
    "Weather(0=Clear,1=Rainy)": [1]
})

predicted_time = model.predict(new_data)

print("Predicted Delivery Time (min):", round(predicted_time[0], 2))


Predicted Delivery Time (min): 25.85


In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = {
    "Experience_yrs": [1, 3, 5, 7, 10, 12, 15, 18],
    "Education_Level(1–3)": [1, 2, 2, 3, 3, 3, 3, 3],
    "Skill_Score": [55, 65, 70, 75, 80, 88, 90, 92],
    "Salary": ["2,80,000", "3,80,000", "4,20,000", "6,20,000",
               "9,50,000", "12,00,000", "15,00,000", "19,00,000"]
}

df = pd.DataFrame(data)

df["Salary"] = df["Salary"].str.replace(",", "").astype(int)

df["Log_Salary"] = np.log(df["Salary"])

X = df[["Experience_yrs", "Education_Level(1–3)", "Skill_Score"]]
y = df["Log_Salary"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

model_log = LinearRegression()
model_log.fit(X_train, y_train)

pred_log = model_log.predict(X_test)
pred_salary = np.exp(pred_log)

print("Predicted Salaries after Log Transformation:")
print(pred_salary)


Predicted Salaries after Log Transformation:
[371275.26049693 999241.71547025]


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import r2_score

data = {
    "Month": ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug"],
    "Ad_Spend": ["80,000","60,000","75,000","90,000","70,000","65,000","85,000","95,000"],
    "Competitors": [3,4,3,2,4,5,3,2],
    "Seasonal_Index": [1.1,0.9,1.0,1.2,0.8,0.7,1.1,1.3],
    "Discount(%)": [10,5,12,15,5,4,14,16],
    "Sales": ["4,50,000","3,20,000","4,10,000","5,50,000",
              "3,00,000","2,80,000","5,20,000","6,00,000"]
}

df = pd.DataFrame(data)

df["Ad_Spend"] = df["Ad_Spend"].str.replace(",", "").astype(int)
df["Sales"] = df["Sales"].str.replace(",", "").astype(int)

X = df[["Ad_Spend", "Competitors", "Seasonal_Index", "Discount(%)"]]
y = df["Sales"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

baseline = LinearRegression()
baseline.fit(X_train, y_train)
baseline_pred = baseline.predict(X_test)
baseline_r2 = r2_score(y_test, baseline_pred)

print("Baseline R² Score:", round(baseline_r2, 3))

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

ridge = Ridge(alpha=1.0)
ridge.fit(X_train_s, y_train_s)
ridge_pred = ridge.predict(X_test_s)
ridge_r2 = r2_score(y_test_s, ridge_pred)

print("Ridge Regression R² Score:", round(ridge_r2, 3))

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(
    X_poly, y, test_size=0.25, random_state=42
)

poly_model = LinearRegression()
poly_model.fit(X_train_p, y_train_p)
poly_pred = poly_model.predict(X_test_p)
poly_r2 = r2_score(y_test_p, poly_pred)

print("Polynomial Regression R² Score:", round(poly_r2, 3))
y_log = np.log(y)

X_train_l, X_test_l, y_train_l, y_test_l = train_test_split(
    X, y_log, test_size=0.25, random_state=42
)

log_model = LinearRegression()
log_model.fit(X_train_l, y_train_l)
log_pred = log_model.predict(X_test_l)

log_pred_actual = np.exp(log_pred)
y_test_actual = np.exp(y_test_l)

log_r2 = r2_score(y_test_actual, log_pred_actual)

print("Log-Transformed Regression R² Score:", round(log_r2, 3))


Baseline R² Score: -2.735
Ridge Regression R² Score: -0.563
Polynomial Regression R² Score: -18.062
Log-Transformed Regression R² Score: 0.891


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE

data = {
    "Income": [45000, 32000, 78000, 25000, 90000, 40000, 50000, 28000],
    "Credit_Score": [710, 660, 780, 600, 800, 640, 690, 580],
    "Loan_Amount": [120000, 80000, 200000, 50000, 300000, 70000, 110000, 60000],
    "Late_Payments": [0, 2, 0, 5, 0, 3, 1, 6],
    "Default": [0, 0, 0, 1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)

X = df.drop("Default", axis=1)
y = df["Default"]


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

smote = SMOTE(random_state=42, k_neighbors=1)
X_balanced, y_balanced = smote.fit_resample(X_scaled, y)

print("Original Data Distribution:")
print(y.value_counts(), "\n")

print("After SMOTE Balancing:")
print(pd.Series(y_balanced).value_counts(), "\n")

X_train, X_test, y_train, y_test = train_test_split(
    X_balanced, y_balanced, test_size=0.3, random_state=42
)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nROC-AUC Score:", roc_auc_score(y_test, y_prob))


Original Data Distribution:
Default
0    5
1    3
Name: count, dtype: int64 

After SMOTE Balancing:
Default
0    5
1    5
Name: count, dtype: int64 


Confusion Matrix:
[[1 0]
 [0 2]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


ROC-AUC Score: 1.0


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report

data = {
    "Email_Length": [120, 200, 150, 300, 180, 90, 250, 130],
    "Num_Links": [3, 6, 2, 10, 4, 1, 9, 3],
    "Num_Spam_Words": [0, 5, 1, 8, 3, 0, 6, 1],
    "Sender_Reputation": [0.9, 0.3, 0.8, 0.2, 0.5, 1.0, 0.4, 0.7],
    "Spam": [0, 1, 0, 1, 1, 0, 1, 0]
}

df = pd.DataFrame(data)

X = df.drop("Spam", axis=1)
y = df["Spam"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)

y_prob = model.predict_proba(X_test)[:, 1]

threshold = 0.3
y_pred = (y_prob >= threshold).astype(int)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nPrecision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[2 0]
 [0 1]]

Precision: 1.0
Recall: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

data = {
    "Glucose": [120,160,140,110,180,95,150,100],
    "BMI": [22,30,28,24,35,20,32,23],
    "Age": [25,45,40,30,50,22,48,27],
    "BloodPressure": [70,85,82,76,90,65,88,72],
    "Diabetes": [0,1,1,0,1,0,1,0]
}

df = pd.DataFrame(data)

X = df.drop("Diabetes", axis=1)
y = df["Diabetes"]

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

scaler = StandardScaler()
X_poly_scaled = scaler.fit_transform(X_poly)

model = LogisticRegression()
model.fit(X_poly_scaled, y)

y_pred = model.predict(X_poly_scaled)

print("Confusion Matrix:")
print(confusion_matrix(y, y_pred))

print("\nClassification Report:")
print(classification_report(y, y_pred))


Confusion Matrix:
[[4 0]
 [0 4]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         4

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [12]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

data = {
    'Action':  [8, 6, 3, 2, 5, 4, 7, 3],
    'Romance': [2, 1, 7, 8, 3, 4, 2, 6],
    'Comedy':  [3, 2, 4, 5, 7, 8, 4, 2],
    'Drama':   [4, 3, 8, 7, 6, 5, 3, 9],
    'Genre_Label': ['Action','Action','Romance','Romance',
                    'Comedy','Comedy','Action','Romance']
}

df = pd.DataFrame(data)

X = df[['Action', 'Romance', 'Comedy', 'Drama']]
y = df['Genre_Label']

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

k_values = range(1, 4)
cv_scores = []

skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y_encoded, cv=skf)
    cv_scores.append(scores.mean())

best_k = k_values[cv_scores.index(max(cv_scores))]
print("Best K =", best_k)

model = KNeighborsClassifier(n_neighbors=best_k)
model.fit(X, y_encoded)

new_movie = [[6, 3, 6, 5]]
predicted_label = encoder.inverse_transform(model.predict(new_movie))

print("Predicted Genre:", predicted_label[0])


Best K = 1
Predicted Genre: Comedy




In [13]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

data = {
    'Annual_Spend':      [45000, 30000, 65000, 20000, 85000, 25000, 40000, 70000],
    'Visit_Freq':        [12, 8, 15, 4, 20, 5, 10, 18],
    'Items_Per_Visit':   [4, 3, 5, 2, 8, 2, 3, 6],
    'Segment':           ['A','B','A','C','A','C','B','A']
}

df = pd.DataFrame(data)

X = df[['Annual_Spend', 'Visit_Freq', 'Items_Per_Visit']]
y = df['Segment']

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.25, random_state=42
)

model = KNeighborsClassifier(
    n_neighbors=5,
    algorithm='ball_tree',
    leaf_size=30,
    weights='distance'
)

model.fit(X_train, y_train)

new_customer = [[60000, 14, 5]]
prediction = model.predict(new_customer)
predicted_segment = encoder.inverse_transform(prediction)

print("Predicted Segment:", predicted_segment[0])


Predicted Segment: A




In [14]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = {
    'Pixel_Intensity_Mean': [8900,7200,9500,6800,10200,7000,9800,6500],
    'Texture_Score':        [12,15,18,14,20,13,19,12],
    'Shape_Complexity':     [40,30,45,28,50,32,48,27],
    'Tumor':                [1,0,1,0,1,0,1,0]
}

df = pd.DataFrame(data)

X = df[['Pixel_Intensity_Mean', 'Texture_Score', 'Shape_Complexity']]
y = df['Tumor']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Predictions:", y_pred)
print("Actual:", list(y_test))
print("Accuracy:", accuracy)

new_mri = [[9000, 14, 42]]
new_mri_scaled = scaler.transform(new_mri)

prediction = knn.predict(new_mri_scaled)
print("\nNew MRI Tumor Prediction:", "Tumor" if prediction[0] == 1 else "No Tumor")


Predictions: [0 0]
Actual: [0, 0]
Accuracy: 1.0

New MRI Tumor Prediction: Tumor


