In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

try:
    from xgboost import XGBClassifier
except ImportError:
    XGBClassifier = None

try:
    from lightgbm import LGBMClassifier
except ImportError:
    LGBMClassifier = None

def proeed_all_models(data, target):
    # Clean and preprocess
    data = data.dropna()
    x = data.drop(columns=[target])
    y = data[target]

    le = LabelEncoder()
    y = le.fit_transform(y)

    x_train, x_val, y_train, y_val = tts(x, y, test_size=0.35, random_state=42)

    # All models
    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(),
        "KNN": KNeighborsClassifier(),
        "Naive Bayes": GaussianNB(),
        "Gradient Boosting": GradientBoostingClassifier(),
        "MLP (Neural Net)": MLPClassifier(max_iter=500),
        "SVM": SVC(),
    }

    if XGBClassifier:
        models["XGBoost"] = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    if LGBMClassifier:
        models["LightGBM"] = LGBMClassifier()

    # To store results
    results = []

    # Training loop
    for name, model in models.items():
        print(f"\n🔹 Training and Evaluating: {name}")
        model.fit(x_train, y_train)

        y_train_pred = model.predict(x_train)
        y_val_pred = model.predict(x_val)

        train_acc = accuracy_score(y_train, y_train_pred)
        val_acc = accuracy_score(y_val, y_val_pred)
        f1 = f1_score(y_val, y_val_pred, average='macro')

        print(f"✅ Train Accuracy: {train_acc:.4f}")
        print(f"✅ Validation Accuracy: {val_acc:.4f}")
        print(f"🎯 F1 Score (Macro): {f1:.4f}")
        print("📊 Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))
        print("📄 Classification Report:\n", classification_report(y_val, y_val_pred, target_names=le.classes_))

        results.append({
            "Model": name,
            "Train Accuracy": train_acc,
            "Validation Accuracy": val_acc,
            "F1 Score (Macro)": f1
        })

    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    return results_df


In [2]:
df = pd.read_csv(r"C:/Users/Dell/OneDrive/Desktop/Summer_Intern/all_aif_poems_new.csv")
df.head()

Unnamed: 0,author,poem_name,TOTAL_WORD_COUNT,UNIQUE_WORD_COUNT,TYPE_TOKEN_RATIO,AVG_WORD_LENGTH,STD_WORD_LENGTH,ZIPF_COEFFICIENT,HAPAX_LEGOMENA,FUNCTION_WORD_FREQ,...,debug_word_count_difference,hapax_ratio,function_word_ratio,char_diversity_ratio,rare_char_count,avg_line_length_chars,std_line_length_words,num_stanzas,compound_word_ratio,avg_words_per_sentence
0,Avvaiyaar,அமுதும்‌ அன்பும்‌!,15,15,1.0,7.6,2.184796,,15,0,...,40,1.0,0.0,0.604167,22,33.0,0.433013,1,0.733333,15.0
1,Avvaiyaar,அழகு எது,18,16,0.888889,6.166667,2.967416,0.776812,14,0,...,32,0.777778,0.0,0.5,19,34.0,0.5,1,0.777778,18.0
2,Avvaiyaar,ஆரையடா,15,15,1.0,7.0,2.129163,,15,0,...,28,1.0,0.0,0.541667,14,30.75,0.433013,1,0.6,15.0
3,Avvaiyaar,இடம்‌ எங்கே,15,15,1.0,7.733333,2.694851,,15,0,...,37,1.0,0.0,0.520833,24,34.75,0.433013,1,0.866667,15.0
4,Avvaiyaar,இன்றுபோல்‌ என்றும்‌ இரும்‌!,15,15,1.0,7.8,2.343786,,15,0,...,34,1.0,0.0,0.5625,25,35.75,0.433013,1,0.866667,15.0


In [3]:
df = df.drop(columns=["poem_name"])

In [4]:
proeed_all_models(df, "author")


🔹 Training and Evaluating: Logistic Regression
✅ Train Accuracy: 1.0000
✅ Validation Accuracy: 0.9545
🎯 F1 Score (Macro): 0.7292
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 19  0  0]
 [ 0  0 12  0]
 [ 0  0  0 11]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.00      0.00      0.00         2
        Kambar       1.00      1.00      1.00        19
Manikkavasagar       1.00      1.00      1.00        12
       kapilar       0.85      1.00      0.92        11

      accuracy                           0.95        44
     macro avg       0.71      0.75      0.73        44
  weighted avg       0.92      0.95      0.93        44


🔹 Training and Evaluating: Decision Tree
✅ Train Accuracy: 1.0000
✅ Validation Accuracy: 0.8864
🎯 F1 Score (Macro): 0.6789
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 17  2  0]
 [ 0  1 11  0]
 [ 0  0  0 11]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


✅ Train Accuracy: 1.0000
✅ Validation Accuracy: 0.8636
🎯 F1 Score (Macro): 0.6629
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 16  3  0]
 [ 0  1 11  0]
 [ 0  0  0 11]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.00      0.00      0.00         2
        Kambar       0.94      0.84      0.89        19
Manikkavasagar       0.79      0.92      0.85        12
       kapilar       0.85      1.00      0.92        11

      accuracy                           0.86        44
     macro avg       0.64      0.69      0.66        44
  weighted avg       0.83      0.86      0.84        44


🔹 Training and Evaluating: KNN
✅ Train Accuracy: 0.8734
✅ Validation Accuracy: 0.8409
🎯 F1 Score (Macro): 0.6454
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 16  3  0]
 [ 0  2 10  0]
 [ 0  0  0 11]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.00      0.00      0.00         2
        Kambar       0.8

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


✅ Train Accuracy: 1.0000
✅ Validation Accuracy: 0.8636
🎯 F1 Score (Macro): 0.7860
📊 Confusion Matrix:
 [[ 1  0  0  1]
 [ 0 16  3  0]
 [ 0  1 11  0]
 [ 1  0  0 10]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.50      0.50      0.50         2
        Kambar       0.94      0.84      0.89        19
Manikkavasagar       0.79      0.92      0.85        12
       kapilar       0.91      0.91      0.91        11

      accuracy                           0.86        44
     macro avg       0.78      0.79      0.79        44
  weighted avg       0.87      0.86      0.86        44


🔹 Training and Evaluating: MLP (Neural Net)
✅ Train Accuracy: 0.4557
✅ Validation Accuracy: 0.5455
🎯 F1 Score (Macro): 0.3289
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 19  0  0]
 [ 0 12  0  0]
 [ 6  0  0  5]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.00      0.00      0.00         2
        Kam

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


✅ Train Accuracy: 1.0000
✅ Validation Accuracy: 0.9318
🎯 F1 Score (Macro): 0.7124
📊 Confusion Matrix:
 [[ 0  0  0  2]
 [ 0 18  1  0]
 [ 0  0 12  0]
 [ 0  0  0 11]]
📄 Classification Report:
                 precision    recall  f1-score   support

     Avvaiyaar       0.00      0.00      0.00         2
        Kambar       1.00      0.95      0.97        19
Manikkavasagar       0.92      1.00      0.96        12
       kapilar       0.85      1.00      0.92        11

      accuracy                           0.93        44
     macro avg       0.69      0.74      0.71        44
  weighted avg       0.90      0.93      0.91        44



Unnamed: 0,Model,Train Accuracy,Validation Accuracy,F1 Score (Macro)
0,Logistic Regression,1.0,0.954545,0.729167
1,Decision Tree,1.0,0.886364,0.678896
2,Random Forest,1.0,0.863636,0.662927
3,KNN,0.873418,0.840909,0.645383
4,Naive Bayes,0.987342,0.886364,0.678896
5,Gradient Boosting,1.0,0.863636,0.786033
6,MLP (Neural Net),0.455696,0.545455,0.328889
7,SVM,0.797468,0.727273,0.548602
8,XGBoost,1.0,0.954545,0.729167
9,LightGBM,1.0,0.931818,0.71241


In [29]:
df.to_csv(r"C:/Users/Dell/OneDrive/Desktop/Summer_Intern/models_file.csv", index=False)
print(f"\nModels saved to:{"C:/Users/Dell/OneDrive/Desktop/Summer_Intern"}")


Models saved to:C:/Users/Dell/OneDrive/Desktop/Summer_Intern
