In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

def perform_ablation(model, X, y, cv, feature_count):
    ablation_scores = []

    for feature_index in range(feature_count):
        ablation_fold_scores = []

        for train_index, test_index in cv.split(X, y):
            ablated_X_train, ablated_X_test = X[train_index].copy(), X[test_index].copy()
            ablated_X_train[:, feature_index] = 0
            ablated_X_test[:, feature_index] = 0

            y_train, y_test = y[train_index], y[test_index]

            ablated_model = model
            ablated_model.fit(ablated_X_train, y_train)

            # Evaluation
            ablation_predictions = ablated_model.predict(ablated_X_test)
            ablation_accuracy = accuracy_score(y_test, ablation_predictions)
            ablation_fold_scores.append(ablation_accuracy)

        # Average across folds
        ablation_scores.append(np.mean(ablation_fold_scores))

    return ablation_scores

# Dataset
dataset = pd.read_csv('/content/SingleTweets.csv')
X = dataset.iloc[:, 1:22].values
y = dataset.iloc[:, 22].values

# cross-validation & Datasplitting
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Models
models = [

    LogisticRegression(random_state=42),
    DecisionTreeClassifier(random_state=42),
    RandomForestClassifier(random_state=42),
    SVC(kernel='linear', random_state=42),
    SVC(kernel='rbf', random_state=42)
]

# Perform ablation
for model in models:
    model_name = type(model).__name__
    ablation_scores = perform_ablation(model, X, y, cv, X.shape[1])

    # Print
    feature_importance = pd.DataFrame({'Feature': dataset.columns[1:22], 'Average_Accuracy': ablation_scores})
    feature_importance = feature_importance.sort_values(by='Average_Accuracy', ascending=False)
    print(f"\nFeature Importance ({model_name}):")
    print(feature_importance)
