In [None]:
#Naive Byes using GaussianNB

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

df=pd.read_csv("/content/Social_Network_Ads.csv")
print(df)

x=df[['Age','EstimatedSalary']]
y=df['Purchased']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

model=GaussianNB()
model.fit(x_train,y_train)

y_pred=model.predict(x_test)

cm=confusion_matrix(y_test,y_pred)
print(cm)

accuracy=accuracy_score(y_test, y_pred)
print(accuracy*100)

precision=precision_score(y_test,y_pred)
print(precision*100)

new_input = [[30, 130000]]
new_input_scaled=scaler.transform(new_input)
new_prediction=model.predict(new_input_scaled)
print(new_prediction[0])

x_set, y_set = x_train, y_train
x1, x2 = np.meshgrid(
    np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.01),
    np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.01)
)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.contourf(
    x1, x2,
    model.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
    alpha=0.3, cmap=ListedColormap(('red', 'green'))
)

for i, j in enumerate(np.unique(y_set)):
    plt.scatter(
        x_set[y_set == j, 0],
        x_set[y_set == j, 1],
        label=f'Class {j}'
    )

plt.title('Naive Bayes (Training Set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()

x_set, y_set = x_test, y_test
x1, x2 = np.meshgrid(
    np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.01),
    np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.01)
)

plt.subplot(1, 2, 2)
plt.contourf(
    x1, x2,
    model.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
    alpha=0.3, cmap=ListedColormap(('red', 'green'))
)

for i, j in enumerate(np.unique(y_set)):
    plt.scatter(
        x_set[y_set == j, 0],
        x_set[y_set == j, 1],
        label=f'Class {j}'
    )

plt.title('Naive Bayes (Test Set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()

plt.tight_layout()
plt.show()



In [None]:
#Bernoulli Naive bayes

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Binarizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score

df = pd.read_csv("/content/Social_Network_Ads.csv")

x = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

binarizer = Binarizer(threshold=0.0)
x_train_binarized = binarizer.fit_transform(x_train_scaled)
x_test_binarized = binarizer.transform(x_test_scaled)

model = BernoulliNB()
model.fit(x_train_binarized, y_train)

y_pred = model.predict(x_test_binarized)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(y_test, y_pred)
print(f"Precision: {precision * 100:.2f}%")


new_input = [[30, 87000]]

new_input_scaled = scaler.transform(new_input)
new_input_binarized = binarizer.transform(new_input_scaled)

predicted_class = model.predict(new_input_binarized)
print(f"Predicted Class for input {new_input}: {predicted_class[0]}")

predicted_prob = model.predict_proba(new_input_binarized)
print(f"Class Probabilities: {predicted_prob}")

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

x_set, y_set = x_train_binarized, y_train
x1, x2 = np.meshgrid(
    np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.01),
    np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.01)
)


plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.contourf(
    x1, x2,
    model.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
    alpha=0.3, cmap=ListedColormap(('red', 'green'))
)


for i, j in enumerate(np.unique(y_set)):
    plt.scatter(
        x_set[y_set == j, 0],
        x_set[y_set == j, 1],
        label=f'Class {j}'
    )


plt.title('Bernoulli Naive Bayes (Training Set)')
plt.xlabel('Age (Binarized)')
plt.ylabel('Estimated Salary (Binarized)')
plt.legend()


x_set, y_set = x_test_binarized, y_test
x1, x2 = np.meshgrid(
    np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.01),
    np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.01)
)

plt.subplot(1, 2, 2)
plt.contourf(
    x1, x2,
    model.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape),
    alpha=0.3, cmap=ListedColormap(('red', 'green'))
)

for i, j in enumerate(np.unique(y_set)):
    plt.scatter(
        x_set[y_set == j, 0],
        x_set[y_set == j, 1],
        label=f'Class {j}'
    )

plt.title('Bernoulli Naive Bayes (Test Set)')
plt.xlabel('Age (Binarized)')
plt.ylabel('Estimated Salary (Binarized)')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
#Multinomial Naive Bayes

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

df = pd.read_csv("/content/Social_Network_Ads.csv")

x = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

model = MultinomialNB()
model.fit(x_train_scaled, y_train)

y_pred = model.predict(x_test_scaled)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(y_test, y_pred)
print(f"Precision: {precision * 100:.2f}%")


new_input = [[30, 87000]]

new_input_scaled = scaler.transform(new_input)

predicted_class = model.predict(new_input_scaled)
print(f"Predicted Class for input {new_input}: {predicted_class[0]}")

predicted_prob = model.predict_proba(new_input_scaled)
print(f"Class Probabilities: {predicted_prob}")


def plot_decision_boundary(x_set, y_set, title, model, scaler):
    x1, x2 = np.meshgrid(
        np.arange(x_set[:, 0].min() - 0.1, x_set[:, 0].max() + 0.1, 0.01),
        np.arange(x_set[:, 1].min() - 0.1, x_set[:, 1].max() + 0.1, 0.01)
    )

    plt.contourf(
        x1, x2,
        model.predict(np.array([x1.ravel(), x2.ravel()]).reshape(-1, 2)).reshape(x1.shape),
        alpha=0.3, cmap=ListedColormap(('red', 'green'))
    )

    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(
            x_set[y_set == j, 0],
            x_set[y_set == j, 1],
            label=f'Class {j}'
        )

    plt.title(title)
    plt.xlabel('Age (scaled)')
    plt.ylabel('Estimated Salary (scaled)')
    plt.legend()

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plot_decision_boundary(x_train_scaled, y_train, 'Multinomial NB (Training Set)', model, scaler)

plt.subplot(1, 2, 2)
plot_decision_boundary(x_test_scaled, y_test, 'Multinomial NB (Test Set)', model, scaler)

plt.tight_layout()
plt.show()


In [None]:
#random forest

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap


df = pd.read_csv("/content/Social_Network_Ads.csv")

x = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(y_test, y_pred)
print(f"Precision: {precision * 100:.2f}%")


new_input = [[30, 100000]]

predicted_class = model.predict(new_input)
print(f"Predicted Class for input {new_input}: {predicted_class[0]}")

predicted_prob = model.predict_proba(new_input)
print(f"Class Probabilities: {predicted_prob}")

def plot_decision_boundary(x_set, y_set, title, model):
    x1, x2 = np.meshgrid(
        np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.8),
        np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.8)
    )

    predictions = model.predict(np.array([x1.ravel(), x2.ravel()]).T).reshape(x1.shape)

    plt.contourf(x1, x2, predictions, alpha=0.3, cmap=ListedColormap(('red', 'green')))

    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(
            x_set[y_set == j, 0],
            x_set[y_set == j, 1],
            c=ListedColormap(('red', 'green'))(i),
            label=f'Class {j}'
        )

    plt.title(title)
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')
    plt.legend()

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plot_decision_boundary(x_train, y_train, 'Random Forest (Training Set)', model)

plt.subplot(1, 2, 2)
plot_decision_boundary(x_test, y_test, 'Random Forest (Test Set)', model)

plt.tight_layout()
plt.show()



In [None]:
# kernel SVM

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score

df = pd.read_csv("/content/Social_Network_Ads.csv")

x = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = SVC(kernel='rbf', probability=True, random_state=42)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(y_test, y_pred)
print(f"Precision: {precision * 100:.2f}%")


new_input = [[30, 100000]]

predicted_class = model.predict(new_input)
print(f"Predicted Class for input {new_input}: {predicted_class[0]}")

predicted_prob = model.predict_proba(new_input)
print(f"Class Probabilities: {predicted_prob}")


In [None]:
# Tennis Dataset - Decision Tree Classifier
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
import matplotlib.pyplot as plt

df = pd.read_csv("/content/play_tennis.csv").drop("day", axis=1)

label_encoders = {}
for column in df.columns[:-1]:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

target_encoder = LabelEncoder()
df["play"] = target_encoder.fit_transform(df["play"])


X = df.iloc[:, :-1]
y = df["play"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classifier = DecisionTreeClassifier(criterion="entropy", random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {round(accuracy * 100, 2)}%")
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision * 100:.2f}%")


plt.figure(figsize=(12, 8))
plot_tree(classifier, feature_names=X.columns, class_names=target_encoder.classes_, filled=True)
plt.title("Decision Tree for Playing Tennis")
plt.show()

def get_user_input():
    """Ask the user for input to predict whether to play tennis."""
    print("\nAnswer the following questions to predict whether to play tennis or not:")
    try:
        outlook = input("Outlook (Sunny/Overcast/Rain): ").capitalize()
        temp = input("Temperature (Hot/Mild/Cool): ").capitalize()
        humidity = input("Humidity (High/Normal): ").capitalize()
        wind = input("Wind (Weak/Strong): ").capitalize()

        outlook_encoded = label_encoders['outlook'].transform([outlook])[0]
        temp_encoded = label_encoders['temp'].transform([temp])[0]
        humidity_encoded = label_encoders['humidity'].transform([humidity])[0]
        wind_encoded = label_encoders['wind'].transform([wind])[0]

        return [outlook_encoded, temp_encoded, humidity_encoded, wind_encoded]

    except ValueError:
        print("Invalid input! Please enter valid values.")
        return None

user_input = get_user_input()
if user_input:
    user_input_df = pd.DataFrame([user_input], columns=X.columns)
    prediction = classifier.predict(user_input_df)
    prediction_label = target_encoder.inverse_transform(prediction)[0]

    if prediction_label == "Yes":
        print("\nPrediction: Play Tennis.")
    else:
        print("\nPrediction: Don't Play Tennis.")

In [None]:
# k-Nearest neighbours on social_network
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score


df = pd.read_csv("/content/Social_Network_Ads.csv")
print(df.head())


X = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


new_input = np.array([[30, 117000]])

predicted_class = knn_model.predict(new_input)
print(f"\nPrediction for input {new_input[0]}: {'Purchased' if predicted_class[0] == 1 else 'Not Purchased'}")


plt.scatter(X_train[y_train == 0][:, 0], X_train[y_train == 0][:, 1], color='red', label='Not Purchased (0)')
plt.scatter(X_train[y_train == 1][:, 0], X_train[y_train == 1][:, 1], color='green', label='Purchased (1)')

plt.scatter(new_input[0][0], new_input[0][1], color='blue', marker='*', s=200, label='New Input (Prediction)')

plt.title('KNN Classification - Training Set')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()


In [None]:
# k-nearest neighbour to predist suv buying
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score

df = pd.read_csv("/content/suv_data.csv")
print(df.head())


X = df[['Age', 'EstimatedSalary']].values
y = df['Purchased'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

new_input = np.array([[30, 50000]])

predicted_class = knn_model.predict(new_input)

if predicted_class[0] == 1:
    print(f"\nPrediction for input {new_input[0]}: Will Buy SUV")
else:
    print(f"\nPrediction for input {new_input[0]}: Won't Buy SUV")

plt.scatter(X_train[y_train == 0][:, 0], X_train[y_train == 0][:, 1], color='red', label="Won't Buy (0)")
plt.scatter(X_train[y_train == 1][:, 0], X_train[y_train == 1][:, 1], color='green', label='Will Buy (1)')

plt.scatter(new_input[0][0], new_input[0][1], color='blue', marker='*', s=200, label='New Input (Prediction)')

plt.title('KNN Classification - SUV Dataset')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
