<a href="https://colab.research.google.com/github/sangitaGIT011/Myproj/blob/main/FML_06to10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Practicle 6
#On cleaned dataset, apply Naïve Bayes classification and compare its result with decision tree
#and random forest.

# ------------------------------------------------------------
# Final ML Classification Comparison
# ------------------------------------------------------------

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# ------------------------------------------------------------
# 1. Load and clean dataset
# ------------------------------------------------------------
# Example: Using the Iris dataset (you can replace with any cleaned dataset)
from sklearn.datasets import load_iris
iris = load_iris()

## Create dataframe
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target


# Show dataset info

#print("Dataset Information:")
#print(df.head(), "\n")

# Split into features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ------------------------------------------------------------
# 2. Naïve Bayes Classifier
# ------------------------------------------------------------

print("=== Naïve Bayes Classification ===")
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_pred = nb.predict(X_test)

nb_acc = accuracy_score(y_test, nb_pred)
print("Accuracy:", round(nb_acc, 3))
print("Classification Report:\n", classification_report(y_test, nb_pred))

# ------------------------------------------------------------
# 3. Decision Tree Classifier
# ------------------------------------------------------------
print("\n---------------")
print("\n=== Decision Tree Classification ===")
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

dt_acc = accuracy_score(y_test, dt_pred)
print("Accuracy:", round(dt_acc, 3))
print("Classification Report:\n", classification_report(y_test, dt_pred))

# ------------------------------------------------------------
# 4. Random Forest Classifier
# ------------------------------------------------------------
print("\n---------------")
print("\n=== Random Forest Classification ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

rf_acc = accuracy_score(y_test, rf_pred)
print("Accuracy:", round(rf_acc, 3))
print("Classification Report:\n", classification_report(y_test, rf_pred))
print("\n---------------")


# ------------------------------------------------------------
# 5. Compare Model Accuracies
# ------------------------------------------------------------
accuracy_results = pd.DataFrame({
    'Model': ['Naïve Bayes', 'Decision Tree', 'Random Forest'],
    'Accuracy': [nb_acc, dt_acc, rf_acc]
})

print("\n=== Model Accuracy Comparison ===")
print(accuracy_results)
print("\n---------------")

# ------------------------------------------------------------
# 7. Conclusion
# ------------------------------------------------------------
best_model = accuracy_results.loc[accuracy_results['Accuracy'].idxmax()]
print("\nBest Performing Model:")
print(best_model)


In [None]:
#Practicle 7 FML
#Develop a code to classify spam mail with Naïve Bayes.
------------------------------------------

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# ------------------------------------------------------------
# 1. Load Dataset
# ------------------------------------------------------------
# Example dataset URL (SMS Spam Collection)
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# #Inspect dataset
#print("Dataset info:\n", df.info())
#print("\nFirst 5 rows:\n", df.head())

# Encode labels (ham=0, spam=1)
df['label_num'] = df.label.map({'ham': 0, 'spam': 1})

# ------------------------------------------------------------
# 2. Train-Test Split
# ------------------------------------------------------------
X = df['message']
y = df['label_num']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# ------------------------------------------------------------
# 3. Text Preprocessing (Convert text to numerical features)
# ------------------------------------------------------------
# Convert messages to a bag-of-words matrix
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_test_counts = count_vect.transform(X_test)

# Optional: TF-IDF transformation
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)

# ------------------------------------------------------------
# 4. Train Naïve Bayes Classifier
# ------------------------------------------------------------
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# Predict on test set
y_pred = nb_model.predict(X_test_tfidf)

# ------------------------------------------------------------
# 5. Evaluate Model
# ------------------------------------------------------------
acc = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of Naïve Bayes Spam Classifier: {acc:.3f}\n")

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Ham','Spam']))


# ------------------------------------------------------------
# 6. Test with new sample messages
# ------------------------------------------------------------
sample_msgs = ["Congratulations! You won a free iPhone. Click here to claim.",
               "Hi mom, can we meet tomorrow?"]
sample_counts = count_vect.transform(sample_msgs)
sample_tfidf = tfidf_transformer.transform(sample_counts)
sample_pred = nb_model.predict(sample_tfidf)

for msg, label in zip(sample_msgs, sample_pred):
    print(f"\nMessage: {msg}\nPredicted Label: {'Spam' if label==1 else 'Ham'}")


In [None]:
#Practicle 8
#Estimate the accuracy of Naïve Bayes algorithm using 10-fold cross validation on the housevotes-84 data set.
# ------------------------------------------------------------
# Naïve Bayes 10-Fold Cross Validation on HouseVotes-84
# ------------------------------------------------------------

import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder

# ------------------------------------------------------------
# 1. Load Dataset
# ------------------------------------------------------------
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/voting-records/house-votes-84.data"

# Column names from UCI dataset
columns = ['party', 'handicapped-infants', 'water-project-cost-sharing', 'adoption-of-the-budget-resolution',
           'physician-fee-freeze', 'el-salvador-aid', 'religious-groups-in-schools',
           'anti-satellite-test-ban', 'aid-to-nicaraguan-contras', 'mx-missile', 'immigration',
           'synfuels-corporation-cutback', 'education-spending', 'superfund-right-to-sue',
           'crime', 'duty-free-exports', 'export-administration-act-south-africa']

df = pd.read_csv(url, header=None, names=columns)

# Replace '?' with NaN and drop rows with missing values
df.replace('?', pd.NA, inplace=True)
df.dropna(inplace=True)

# Encode categorical features (y/n → 1/0)
vote_cols = df.columns[1:]
for col in vote_cols:
    df[col] = df[col].map({'y': 1, 'n': 0})

# Encode target variable (party: Democrat=0, Republican=1)
le = LabelEncoder()
df['party'] = le.fit_transform(df['party'])

# Split into X and y
X = df[vote_cols]
y = df['party']

# ------------------------------------------------------------
# 2. Naïve Bayes with 10-Fold Cross Validation
# ------------------------------------------------------------
nb = CategoricalNB()
kf = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(nb, X, y, cv=kf)

# ------------------------------------------------------------
# 3. Display Results
# ------------------------------------------------------------
print("10-Fold Cross Validation Accuracy Scores:\n", scores)
print("\nMean Accuracy: {:.3f}".format(scores.mean()))
print("Standard Deviation: {:.3f}".format(scores.std()))


In [None]:
#Practicle 9
#Develop a feed forward neural network with backpropagation function to improve a
#handwritten character recognition system


import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# ------------------------------------------------------------
# 1. Load and Preprocess Data
# ------------------------------------------------------------
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

# One-hot encode labels
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

print("Training set shape:", X_train.shape, y_train_cat.shape)
print("Test set shape:", X_test.shape, y_test_cat.shape)

# ------------------------------------------------------------
# 2. Build Feedforward Neural Network
# ------------------------------------------------------------
model = Sequential()

# Flatten input 28x28 image to 784 vector
model.add(Flatten(input_shape=(28,28)))

# Hidden layer 1
model.add(Dense(128, activation='relu'))

# Hidden layer 2
model.add(Dense(64, activation='relu'))

# Output layer (10 classes)
model.add(Dense(10, activation='softmax'))

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ------------------------------------------------------------
# 3. Train Model
# ------------------------------------------------------------
history = model.fit(
    X_train, y_train_cat,
    validation_split=0.1,
    epochs=15,
    batch_size=128,
    verbose=1
)

# ------------------------------------------------------------
# 4. Evaluate Model
# ------------------------------------------------------------
loss, accuracy = model.evaluate(X_test, y_test_cat)
print(f"\nTest Accuracy: {accuracy:.4f}")

# ------------------------------------------------------------
# 5. Predict & Confusion Matrix
# ------------------------------------------------------------
y_pred = np.argmax(model.predict(X_test), axis=1)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# ------------------------------------------------------------
# 7. Test on New Sample Images
# ------------------------------------------------------------
plt.figure(figsize=(1,1))
for i in range(5):
    plt.subplot(1,5,i+1)
    plt.imshow(X_test[i], cmap='gray')
    #plt.title(f"Pred: {y_pred[i]}")
    plt.axis('off')
plt.show()


In [None]:
#Practicle 10
#Develop a neural network, use learning functions and tune the parameters to reduce the mean
#square error for recognizing the face.
# ------------------------------------------------------------
# Neural Network for Face Recognition
# ------------------------------------------------------------

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# ------------------------------------------------------------
# 1. Load LFW Dataset
# ------------------------------------------------------------
lfw = fetch_lfw_people(min_faces_per_person=50, resize=0.5)
X = lfw.images
y = lfw.target
target_names = lfw.target_names
n_classes = len(target_names)

print("Dataset shape:", X.shape)
print("Number of classes:", n_classes)

# Normalize pixel values
X = X / 255.0

# Flatten images for feedforward network
X_flat = X.reshape(X.shape[0], -1)

# One-hot encode labels
y_cat = to_categorical(y, num_classes=n_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_cat, test_size=0.2, random_state=42)

# ------------------------------------------------------------
# 2. Build Neural Network
# ------------------------------------------------------------
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(X_flat.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))  # Output layer

# Compile model with MSE loss
optimizer = Adam(learning_rate=0.001)  # You can tune learning rate
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])

# ------------------------------------------------------------
# 3. Train Model
# ------------------------------------------------------------
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=32,
    verbose=1
)
# ------------------------------------------------------------
# 4. Evaluate Model
# ------------------------------------------------------------
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")
print(f"Test Mean Squared Error: {loss:.4f}")

# Predict for MSE calculation manually
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Calculated MSE: {mse:.4f}")