In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score


In [7]:
# Load data from a CSV file or any other data source
data = pd.read_csv("41-50.csv")

In [8]:
# Load data here and calculate the citation ratio and normalize the data

# Calculate the citation ratio (2022/2021)
data['citation_ratio'] = data['cit_2022'] / data['cit_2021']

# Select the columns for citation data from 2017 to 2022
citation_columns = ['cit_2017', 'cit_2018', 'cit_2019', 'cit_2020', 'cit_2021', 'cit_2022']

# Normalize the citation data using Min-Max scaling
scaler = MinMaxScaler()
data[citation_columns] = scaler.fit_transform(data[citation_columns])

# Convert the labels (categories) to numerical values
data['category'] = data['citation_ratio'].apply(lambda ratio: 0 if ratio < 1.05 else (1 if ratio <= 1.15 else 2))
data['category'] = data['category'].astype('category')

# Split the data
X = data[citation_columns].values
y = data['category']  

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Now, 'data' contains the normalized citation data and their corresponding categories

In [10]:

# Initialize AdaBoost classifier
adaboost_clf = AdaBoostClassifier(n_estimators=50, random_state=42)

# Train the classifier
adaboost_clf.fit(X_train, y_train)

# Make predictions
y_pred = adaboost_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 80.00%


: 