In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score

In [None]:
def train_test_split(df):
    # Split data into training and testing sets
    X = df['vectors'].to_list()
    y = df['sentiment']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return (X_train, X_test, y_train, y_test)

In [None]:
def models_fit(X_train, y_train)
    # Initialize models
    models = {
        'Logistic Regression': LogisticRegression(),
        'Gaussian Naive Bayes': GaussianNB(),
        'Multinomial Naive Bayes': MultinomialNB(),
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'SVC': SVC(),
        'KNN': KNeighborsClassifier()
    }

    # Dictionary to hold best models
    best_models = {}

    # Train models and find best models using GridSearchCV
    for name, model in models.items():
        if name == 'Logistic Regression':
            param_grid = {
                'C': [0.1, 1.0, 10.0],
                'solver': ['liblinear', 'lbfgs'],
                'max_iter': [100, 200, 300]
            }
        elif name == 'Multinomial Naive Bayes':
            param_grid = {
                'alpha': [0.1, 0.5, 1.0]
            }
        elif name == 'Decision Tree':
            param_grid = {
                'max_depth': [None, 10, 20, 30],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            }
        elif name == 'Random Forest':
            param_grid = {
                'n_estimators': [100, 200, 300],
                'max_depth': [None, 10, 20, 30],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            }
        elif name == 'SVC':
            param_grid = {
                'C': [0.1, 1.0, 10.0],
                'kernel': ['linear', 'rbf'],
                'gamma': ['scale', 'auto']
            }
        elif name == 'KNN':
            param_grid = {
                'n_neighbors': [3, 5, 7],
                'weights': ['uniform', 'distance'],
                'metric': ['euclidean', 'manhattan']
            }
        else:
            # Gaussian Naive Bayes does not have parameters to tune
            continue

        # Perform GridSearchCV
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
        grid_search.fit(X_train, y_train)

        # Get best model
        best_model = grid_search.best_estimator_

        # Save best model to file
        model_filename = f'./BestModels/best_{name.lower().replace(" ", "_")}_model.pkl'
        joblib.dump(best_model, model_filename)

        # Store best model in dictionary
        best_models[name] = best_model

        print(f"Saved best {name} model to {model_filename}")

    print("All best models saved successfully!")


In [None]:
def models_predict_summary(X_test, y_test):
    
    # Initialize models
    models = {
        'Logistic Regression',
        'Gaussian Naive Bayes',
        'Multinomial Naive Bayes',
        'Decision Tree',
        'Random Forest',
        'SVC',
        'KNN'
    }

    # Train and evaluate models
    results = {}
    for name in models:
        model_filename = f'./BestModels/best_{name.lower().replace(" ", "_")}_model.pkl'
        model = joblib.load(model_filename)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results[name] = accuracy
    
    # Create a DataFrame from results
    results_df = pd.DataFrame(results)

    # Specify the path where you want to save the Excel file
    excel_file = '../../Data/Output/model_results.xlsx'

    # Save DataFrame to Excel
    results_df.to_excel(excel_file, index=False)

    print(f"Results saved to {excel_file}")
