# **Multi Class Classification Problem**

In [117]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [118]:
import pandas as pd

In [119]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Machine Learning/DAY_7/devnagari_db_csv.zip")

In [120]:
df.shape

(92000, 1025)

In [121]:
df.columns

Index(['pixel_0000', 'pixel_0001', 'pixel_0002', 'pixel_0003', 'pixel_0004',
       'pixel_0005', 'pixel_0006', 'pixel_0007', 'pixel_0008', 'pixel_0009',
       ...
       'pixel_1015', 'pixel_1016', 'pixel_1017', 'pixel_1018', 'pixel_1019',
       'pixel_1020', 'pixel_1021', 'pixel_1022', 'pixel_1023', 'character'],
      dtype='object', length=1025)

In [122]:
selected_chars = ['character_04_gha' , 'character_14_dhaa',
                  'character_24_bha' , 'character_25_ma',
                  'character_21_pa' , 'character_22_pha']

In [123]:
df_s = df.loc[df['character'].isin(selected_chars), :]

In [124]:
df_s.shape

(12000, 1025)

In [125]:
df_s['character'].nunique()

6

In [126]:
df_s['character'].unique()

array(['character_04_gha', 'character_14_dhaa', 'character_21_pa',
       'character_22_pha', 'character_24_bha', 'character_25_ma'],
      dtype=object)

In [127]:
label_map = {'character':{'character_04_gha':0,
                          'character_14_dhaa':1,
                          'character_21_pa':2,
                          'character_22_pha':3,
                          'character_24_bha':4,
                          'character_25_ma':5}
            }

In [128]:
df_s.replace(label_map, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_s.replace(label_map, inplace=True)


In [129]:
df_s['character'].unique()

array([0, 1, 2, 3, 4, 5])

# **X and Y Split**

In [130]:
X = df_s.drop('character' , axis = 1)
y = df_s.loc[:, 'character']

In [131]:
y.unique()

array([0, 1, 2, 3, 4, 5])

In [132]:
y.value_counts()

character
0    2000
1    2000
2    2000
3    2000
4    2000
5    2000
Name: count, dtype: int64

In [133]:
from sklearn.model_selection import train_test_split

In [134]:
X_train,X_test,y_train,y_test = train_test_split(X,y,
                                                 test_size=0.3,
                                                 random_state=7,
                                                 stratify=y)

In [135]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((8400, 1024), (3600, 1024), (8400,), (3600,))

# **which classifiers support Multi class classification directly ?**

*  Decision Tree
*  Logistic Regression
*  KNN
*  RandomForestClassifier
*  GradientBoostingClassifier
*  XGBoostClassifier
*  CatBoostClassifier












In [136]:
from sklearn.linear_model import LogisticRegression

In [137]:
lr = LogisticRegression(solver = 'saga', random_state= 7)

In [138]:
lr.fit(X_train,y_train)



In [139]:
y_pred = lr.predict(X_test)

In [140]:
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report

In [141]:
precision_score(y_test,y_pred, average = 'micro')

0.8905555555555555

In [142]:
recall_score(y_test,y_pred, average = 'micro')

0.8905555555555555

In [143]:
f1_score(y_test,y_pred, average = 'micro')

0.8905555555555555

In [144]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.84      0.83      0.83       600
           1       0.98      0.97      0.98       600
           2       0.83      0.86      0.84       600
           3       0.93      0.93      0.93       600
           4       0.89      0.89      0.89       600
           5       0.88      0.87      0.88       600

    accuracy                           0.89      3600
   macro avg       0.89      0.89      0.89      3600
weighted avg       0.89      0.89      0.89      3600



# **Random Forest Classifier**

In [145]:
from sklearn.ensemble import RandomForestClassifier

In [146]:
rf = RandomForestClassifier(max_samples = 0.8,
                            oob_score = True,
                            random_state = 7)

In [147]:
rf.fit(X_train,y_train)

In [148]:
y_pred = rf.predict(X_test)

In [149]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.93      0.94       600
           1       0.99      0.98      0.99       600
           2       0.91      0.96      0.93       600
           3       0.96      0.96      0.96       600
           4       0.95      0.91      0.93       600
           5       0.92      0.94      0.93       600

    accuracy                           0.95      3600
   macro avg       0.95      0.95      0.95      3600
weighted avg       0.95      0.95      0.95      3600



In [150]:
f1_score(y_test, y_pred, average = 'micro')

0.9466666666666667

# **XGBoost**

In [151]:
import xgboost as xg

In [152]:
xgc = xg.XGBClassifier(random_state = 7)

In [153]:
xgc.fit(X_train, y_train)

In [154]:
y_pred = xgc.predict(X_test)

In [156]:
f1_score(y_test, y_pred, average = 'micro')

0.9636111111111111

In [157]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95       600
           1       0.99      0.99      0.99       600
           2       0.95      0.97      0.96       600
           3       0.97      0.98      0.98       600
           4       0.97      0.94      0.95       600
           5       0.94      0.95      0.95       600

    accuracy                           0.96      3600
   macro avg       0.96      0.96      0.96      3600
weighted avg       0.96      0.96      0.96      3600



# **CatBoost**

In [None]:
!pip install catboost

In [None]:
import catboost

In [None]:
from catboost import CatBoostClassifier

In [None]:
cbc = CatBoostClassifier(random_state = 7)

In [None]:
cbc.fit(X_train, y_train)

In [None]:
y_pred = cbc.predict(X_test)

In [None]:
f1_score(y_test, y_pred, average = 'micro')

In [None]:
print(classification_report(y_test, y_pred))