<a href="https://colab.research.google.com/github/meetdarbar93/AI-ML-Internship/blob/main/Day8/Day8_sklearn_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#SVC Model

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
df = pd.read_csv(url)
df.head()

le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

X = df.drop('species',axis=1)
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

print(accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))


1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



#Random Forest Classifier

In [2]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print(accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



#visualize features importance

In [3]:
import pandas as pd
pd.DataFrame({'Feature': X.columns, 'Importance': rf.feature_importances_}).sort_values(by="Importance", ascending=False)


Unnamed: 0,Feature,Importance
2,petal_length,0.443288
3,petal_width,0.426342
0,sepal_length,0.103388
1,sepal_width,0.026982


#Apply Cross-Validation


In [4]:
from sklearn.model_selection import cross_val_score
cv_score = cross_val_score(svm, X, y, cv=5)
print(cv_score.mean())

0.9666666666666666


#Hyperparameter Tuning (GridSearchCV)

In [5]:
from sklearn.model_selection import GridSearchCV

params = {
    'kernel':['linear','rbf','poly'],
    'C':[0.1,1,10],
    'gamma':['scale','auto']
}

grid = GridSearchCV(SVC(), params, cv=5)
grid.fit(X_train, y_train)
print(grid.best_params_)
print(grid.best_score_)

{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
0.9583333333333334


#Model Comparison Table


In [6]:

data = {
    "Model": ["SVM", "Random Forest"],
    "Test Accuracy": [accuracy_score(y_test, y_pred_svm),
                      accuracy_score(y_test, y_pred_rf)],
    "Cross-Validation Score": [cv_score.mean(),
                               cross_val_score(rf, X, y, cv=5).mean()],
    "Tuned?": ["Yes", "No"],
    "Notes": ["Improved after tuning", "Strong baseline"]
}

df_compare = pd.DataFrame(data)

df_compare.to_csv("Day7_Model_Comparison.csv", index=False)

df_compare


Unnamed: 0,Model,Test Accuracy,Cross-Validation Score,Tuned?,Notes
0,SVM,1.0,0.966667,Yes,Improved after tuning
1,Random Forest,1.0,0.96,No,Strong baseline
