<a href="https://colab.research.google.com/github/sohamgeek/bmllca/blob/main/skill_1_to_8expt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install necessary libraries
!pip install pandas numpy scikit-learn

# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

# 1. Load Dataset
file_path = "IRIS (1).csv"  # Ensure the correct file path
df = pd.read_csv(file_path)

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# Check the column names
print("\nColumns in dataset:", df.columns)

# Assuming the last column is the target (species/class label)
target_column = df.columns[-1]

# 2. Data Preprocessing
# Encode target labels if they are categorical
if df[target_column].dtype == 'object':
    label_encoder = LabelEncoder()
    df[target_column] = label_encoder.fit_transform(df[target_column])

# Define Features (X) and Target (y)
X = df.iloc[:, :-1].values  # Select all columns except last
y = df[target_column].values  # Select the last column as target

# Standardizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 3. Logistic Regression Model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)
print("\nLogistic Regression Accuracy:", accuracy_score(y_test, y_pred_logistic))

# 4. Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_test)
print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))

# 5. Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))

# 6. Random Forest
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

# 7. K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))

# 8. K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
kmeans.fit(X_scaled)
print("K-Means Cluster Centers:\n", kmeans.cluster_centers_)


Dataset Preview:
   ID  Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
0   1           5.1          3.5           1.4          0.2  setosa
1   2           4.9          3.0           1.4          0.2  setosa
2   3           4.7          3.2           1.3          0.2  setosa
3   4           4.6          3.1           1.5          0.2  setosa
4   5           5.0          3.6           1.4          0.2  setosa

Columns in dataset: Index(['ID', 'Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

Logistic Regression Accuracy: 1.0
Naïve Bayes Accuracy: 1.0
Decision Tree Accuracy: 1.0
Random Forest Accuracy: 1.0
KNN Accuracy: 1.0
K-Means Cluster Centers:
 [[ 1.14317894  1.03542672 -0.07810364  1.04210818  1.09314461]
 [ 0.07560189  0.03894137 -0.73570307  0.30698645  0.21446939]
 [-1.1547262  -1.01457897  0.85326268 -1.30498732 -1.25489349]]
